From a30e7c950601c3de71655fe46fa93cb3381b83b1 Mon Sep 17 00:00:00 2001 From: Tyler McMullen Date: Wed, 11 Dec 2024 17:54:24 -0800 Subject: [PATCH 001/119] New parser. Nowhere near functional. Just prototyping. --- esi/Cargo.toml | 1 + esi/src/lib.rs | 1 + esi/src/new_parse.rs | 116 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 esi/src/new_parse.rs diff --git a/esi/Cargo.toml b/esi/Cargo.toml index d11dc26..dc50e40 100644 --- a/esi/Cargo.toml +++ b/esi/Cargo.toml @@ -15,6 +15,7 @@ fastly = "^0.11" log = "^0.4" regex = "1.11.1" html-escape = "0.2.13" +nom = "7.1.3" [dev-dependencies] env_logger = "^0.11" diff --git a/esi/src/lib.rs b/esi/src/lib.rs index 31aaf29..9600ca8 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -5,6 +5,7 @@ mod document; mod error; mod expression; mod functions; +mod new_parse; mod parse; use document::{FetchState, Task}; diff --git a/esi/src/new_parse.rs b/esi/src/new_parse.rs new file mode 100644 index 0000000..b155496 --- /dev/null +++ b/esi/src/new_parse.rs @@ -0,0 +1,116 @@ +use nom::branch::alt; +use nom::bytes::streaming::*; +use nom::character::streaming::*; +use nom::combinator::{complete, map, recognize, success}; +use nom::error::Error; +use nom::multi::fold_many0; +use nom::sequence::{delimited, pair, preceded, separated_pair}; +use nom::IResult; + +#[derive(Debug)] +enum Chunk<'a> { + EsiStartTag(&'a str, Vec<(&'a str, &'a str)>), + EsiEndTag(&'a str), + Text(&'a str), +} + +fn parse(input: &str) -> IResult<&str, Vec, Error<&str>> { + fold_many0( + complete(chunk), + Vec::new, + |mut acc: Vec, mut item| { + acc.append(&mut item); + acc + }, + )(input) +} + +fn chunk(input: &str) -> IResult<&str, Vec, Error<&str>> { + alt((text, alt((esi_tag, html))))(input) +} + +fn esi_tag(input: &str) -> IResult<&str, Vec, Error<&str>> { + alt((esi_start_tag, esi_end_tag))(input) +} +fn esi_start_tag(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + delimited(tag("')), + |(tagname, attrs)| vec![Chunk::EsiStartTag(tagname, attrs)], + )(input) +} +fn esi_end_tag(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + delimited(tag(""), char('>')), + |s: &str| vec![Chunk::EsiEndTag(s)], + )(input) +} + +fn esi_tag_name(input: &str) -> IResult<&str, &str, Error<&str>> { + tag("vars")(input) +} + +fn attributes(input: &str) -> IResult<&str, Vec<(&str, &str)>, Error<&str>> { + map( + separated_pair(preceded(multispace1, alpha1), char('='), xmlstring), + |(name, value)| vec![(name, value)], + )(input) +} + +fn xmlstring(input: &str) -> IResult<&str, &str, Error<&str>> { + delimited(char('"'), is_not("\""), char('"'))(input) // TODO: obviously wrong +} + +fn html(input: &str) -> IResult<&str, Vec, Error<&str>> { + alt((script, end_tag, start_tag))(input) +} + +fn script(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + recognize(delimited( + delimited(tag(""), success(""))), char('>')), + take_until(""), success(""))), char('>')), + )), + |s: &str| vec![Chunk::Text(s)], + )(input) +} + +fn end_tag(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + recognize(delimited(tag(""), char('>'))), + |s: &str| vec![Chunk::Text(s)], + )(input) +} + +fn start_tag(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + recognize(delimited(char('<'), is_not(">"), char('>'))), + |s: &str| vec![Chunk::Text(s)], + )(input) +} +fn text(input: &str) -> IResult<&str, Vec, Error<&str>> { + map(take_until1("<"), |s: &str| vec![Chunk::Text(s)])(input) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new_parse() { + let x = parse( + "foobaz", + ); + println!("{:?}", x); + } + #[test] + fn test_new_parse_script() { + let x = script(""); + println!("{:?}", x); + } + #[test] + fn test_new_parse_esi_tag() { + let x = esi_start_tag(""); + println!("{:?}", x); + } +} From 6eaca34e8728efec56342ba3de34c776e8b64992 Mon Sep 17 00:00:00 2001 From: Tyler McMullen Date: Thu, 12 Dec 2024 17:29:32 -0800 Subject: [PATCH 002/119] starting to wonder if this is a good idea --- esi/src/new_parse.rs | 53 ++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/esi/src/new_parse.rs b/esi/src/new_parse.rs index b155496..0e16344 100644 --- a/esi/src/new_parse.rs +++ b/esi/src/new_parse.rs @@ -1,11 +1,11 @@ use nom::branch::alt; use nom::bytes::streaming::*; use nom::character::streaming::*; -use nom::combinator::{complete, map, recognize, success}; -use nom::error::Error; -use nom::multi::fold_many0; -use nom::sequence::{delimited, pair, preceded, separated_pair}; -use nom::IResult; +use nom::combinator::{complete, map, not, peek, recognize, success, verify}; +use nom::error::{Error, ParseError}; +use nom::multi::{fold_many0, length_data, many0, many_till}; +use nom::sequence::{delimited, pair, preceded, separated_pair, tuple}; +use nom::{IResult, Parser}; #[derive(Debug)] enum Chunk<'a> { @@ -26,7 +26,7 @@ fn parse(input: &str) -> IResult<&str, Vec, Error<&str>> { } fn chunk(input: &str) -> IResult<&str, Vec, Error<&str>> { - alt((text, alt((esi_tag, html))))(input) + alt((text, esi_tag, html))(input) } fn esi_tag(input: &str) -> IResult<&str, Vec, Error<&str>> { @@ -50,10 +50,11 @@ fn esi_tag_name(input: &str) -> IResult<&str, &str, Error<&str>> { } fn attributes(input: &str) -> IResult<&str, Vec<(&str, &str)>, Error<&str>> { - map( - separated_pair(preceded(multispace1, alpha1), char('='), xmlstring), - |(name, value)| vec![(name, value)], - )(input) + many0(separated_pair( + preceded(multispace1, alpha1), + char('='), + xmlstring, + ))(input) } fn xmlstring(input: &str) -> IResult<&str, &str, Error<&str>> { @@ -66,12 +67,25 @@ fn html(input: &str) -> IResult<&str, Vec, Error<&str>> { fn script(input: &str) -> IResult<&str, Vec, Error<&str>> { map( - recognize(delimited( - delimited(tag(""), success(""))), char('>')), - take_until(""), success(""))), char('>')), + tuple(( + recognize(verify( + delimited(tag_no_case("')), + |attrs: &Vec<(&str, &str)>| !attrs.iter().any(|(k, _)| k == &"src"), + )), + length_data(map( + peek(many_till(anychar, tag_no_case(""), success(""))), + char('>'), + )), )), - |s: &str| vec![Chunk::Text(s)], + |(start, script, end)| { + println!("script parser succeeded"); + vec![Chunk::Text(start), Chunk::Text(script), Chunk::Text(end)] + }, )(input) } @@ -99,13 +113,18 @@ mod tests { #[test] fn test_new_parse() { let x = parse( - "foobaz", + "foobaz", ); println!("{:?}", x); } #[test] fn test_new_parse_script() { - let x = script(""); + let x = script(""); + println!("{:?}", x); + } + #[test] + fn test_new_parse_script_with_src() { + let x = parse("", - ); - println!("{:?}", x); + let input = r#" +foo + +baz + + +hello
+
+"#; + let output = parse(input); + println!("{input}"); + println!("{:?}", output); } #[test] fn test_new_parse_script() { @@ -128,8 +180,30 @@ mod tests { println!("{:?}", x); } #[test] - fn test_new_parse_esi_tag() { - let x = esi_start_tag(""); + fn test_new_parse_esi_vars_short() { + let x = esi_tag(r#""#); + println!("{:?}", x); + } + #[test] + fn test_new_parse_esi_vars_long() { + let x = esi_tag( + r#"hello
there"#, + ); + println!("{:?}", x); + } + #[test] + fn test_new_parse_plain_text() { + let x = parse("hello\nthere"); + println!("{:?}", x); + } + #[test] + fn test_new_parse_esi_end_tag() { + let x = parse("
"); + println!("{:?}", x); + } + #[test] + fn test_new_parse_interpolated() { + let x = parse("hello $(foo)goodbye $(foo)"); println!("{:?}", x); } } diff --git a/esi/src/parser_types.rs b/esi/src/parser_types.rs new file mode 100644 index 0000000..a5283fc --- /dev/null +++ b/esi/src/parser_types.rs @@ -0,0 +1,21 @@ +#[derive(Debug)] +pub enum Tag { + Vars, + Include, + Text, + Choose, + When, + Otherwise, + Try, + Attempt, + Except, +} + +#[derive(Debug)] +pub enum Chunk<'a> { + EsiStartTag(Tag, Vec<(&'a str, &'a str)>), + EsiEndTag(Tag), + Expr(&'a str), + Html(&'a str), + Text(&'a str), +} From 214080dc1056b09716b54ba0f57f38157bdc0065 Mon Sep 17 00:00:00 2001 From: Tyler McMullen Date: Thu, 19 Dec 2024 11:07:54 -0800 Subject: [PATCH 004/119] That's the easy tags out of the way --- esi/src/new_parse.rs | 63 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/esi/src/new_parse.rs b/esi/src/new_parse.rs index 72dfcb4..14367be 100644 --- a/esi/src/new_parse.rs +++ b/esi/src/new_parse.rs @@ -40,8 +40,22 @@ fn interpolated_chunk(input: &str) -> IResult<&str, Vec, Error<&str>> { alt((interpolated_text, interpolation, esi_tag, html))(input) } +fn parse_without_esi(input: &str) -> IResult<&str, Vec, Error<&str>> { + fold_many0( + complete(no_esi_chunk), + Vec::new, + |mut acc: Vec, mut item| { + acc.append(&mut item); + acc + }, + )(input) +} +fn no_esi_chunk(input: &str) -> IResult<&str, Vec, Error<&str>> { + alt((text, html))(input) +} + fn esi_tag(input: &str) -> IResult<&str, Vec, Error<&str>> { - alt((esi_vars,))(input) + alt((esi_vars, esi_comment, esi_remove, esi_text, esi_include))(input) } fn esi_vars(input: &str) -> IResult<&str, Vec, Error<&str>> { @@ -72,6 +86,47 @@ fn esi_vars_long(input: &str) -> IResult<&str, Vec, Error<&str>> { )(input) } +fn esi_comment(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + delimited( + tag(""), tag("/>")))), + ), + |_| vec![], + )(input) +} +fn esi_remove(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + delimited(tag(""), parse, tag("")), + |_| vec![], + )(input) +} + +fn esi_text(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + tuple(( + tag(""), + length_data(map( + peek(many_till(anychar, tag(""))), + |(v, _)| v.len(), + )), + tag(""), + )), + |(_, v, _)| vec![Chunk::Text(v)], + )(input) +} +fn esi_include(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + delimited( + tag(""), tag("/>")))), + ), + |attrs| vec![Chunk::EsiStartTag(Tag::Include, attrs)], + )(input) +} + fn attributes(input: &str) -> IResult<&str, Vec<(&str, &str)>, Error<&str>> { many0(separated_pair( preceded(multispace1, alpha1), @@ -164,7 +219,11 @@ hello
"#; + +should not appear + + this $(should) appear unchanged +"#; let output = parse(input); println!("{input}"); println!("{:?}", output); From 5e9b3f7c449553d14e55b75af4abf76e3726c512 Mon Sep 17 00:00:00 2001 From: Tyler McMullen Date: Thu, 19 Dec 2024 14:23:24 -0800 Subject: [PATCH 005/119] All of the rest of the esi tags we currently support. --- esi/src/new_parse.rs | 166 ++++++++++++++++++++++- esi/src/parser_types.rs | 26 ++-- examples/esi_vars_example/src/index.html | 26 ++-- 3 files changed, 184 insertions(+), 34 deletions(-) diff --git a/esi/src/new_parse.rs b/esi/src/new_parse.rs index 14367be..0fd650c 100644 --- a/esi/src/new_parse.rs +++ b/esi/src/new_parse.rs @@ -55,7 +55,132 @@ fn no_esi_chunk(input: &str) -> IResult<&str, Vec, Error<&str>> { } fn esi_tag(input: &str) -> IResult<&str, Vec, Error<&str>> { - alt((esi_vars, esi_comment, esi_remove, esi_text, esi_include))(input) + alt(( + esi_vars, + esi_comment, + esi_remove, + esi_text, + esi_include, + esi_choose, + esi_when, + esi_otherwise, + esi_attempt, + esi_except, + esi_try, + esi_assign, + ))(input) +} + +fn esi_assign(input: &str) -> IResult<&str, Vec, Error<&str>> { + alt((esi_assign_short, esi_assign_long))(input) +} + +fn esi_assign_short(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + delimited( + tag(""), tag("/>")))), + ), + |attrs| vec![Chunk::Esi(Tag::Assign(attrs, None))], + )(input) +} + +fn esi_assign_long(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + tuple(( + delimited( + tag(""), tag("/>")))), + ), + parse_interpolated, + tag(""), + )), + |(attrs, chunks, _)| vec![Chunk::Esi(Tag::Assign(attrs, Some(chunks)))], + )(input) +} +fn esi_except(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + delimited( + tag(""), + parse_interpolated, + tag(""), + ), + |v| vec![Chunk::Esi(Tag::Except(v))], + )(input) +} +fn esi_attempt(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + delimited( + tag(""), + parse_interpolated, + tag(""), + ), + |v| vec![Chunk::Esi(Tag::Attempt(v))], + )(input) +} +fn esi_try(input: &str) -> IResult<&str, Vec, Error<&str>> { + map(delimited(tag(""), parse, tag("")), |v| { + let mut attempts = vec![]; + let mut except = None; + for chunk in v { + match chunk { + Chunk::Esi(Tag::Attempt(cs)) => attempts.push(cs), + Chunk::Esi(Tag::Except(cs)) => { + except = Some(cs); + } + _ => {} + } + } + vec![Chunk::Esi(Tag::Try(attempts, except))] + })(input) +} + +fn esi_otherwise(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + delimited( + tag(""), + parse_interpolated, + tag(""), + ), + |v| vec![Chunk::Esi(Tag::Otherwise(v))], + )(input) +} + +fn esi_when(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + tuple(( + delimited( + tag(""), tag("/>")))), + ), + parse_interpolated, + tag(""), + )), + |(attrs, v, _)| vec![Chunk::Esi(Tag::When(attrs, v))], + )(input) +} + +fn esi_choose(input: &str) -> IResult<&str, Vec, Error<&str>> { + map( + delimited(tag(""), parse, tag("")), + |v| { + let mut whens = vec![]; + let mut otherwise = None; + for chunk in v { + match chunk { + Chunk::Esi(Tag::When(..)) => whens.push(chunk), + Chunk::Esi(Tag::Otherwise(cs)) => { + otherwise = Some(cs); + } + _ => {} + } + } + vec![Chunk::Esi(Tag::Choose(whens, otherwise))] + }, + )(input) } fn esi_vars(input: &str) -> IResult<&str, Vec, Error<&str>> { @@ -123,7 +248,7 @@ fn esi_include(input: &str) -> IResult<&str, Vec, Error<&str>> { attributes, preceded(multispace0, alt((tag(">"), tag("/>")))), ), - |attrs| vec![Chunk::EsiStartTag(Tag::Include, attrs)], + |attrs| vec![Chunk::Esi(Tag::Include(attrs))], )(input) } @@ -160,10 +285,7 @@ fn script(input: &str) -> IResult<&str, Vec, Error<&str>> { char('>'), )), )), - |(start, script, end)| { - println!("script parser succeeded"); - vec![Chunk::Html(start), Chunk::Text(script), Chunk::Html(end)] - }, + |(start, script, end)| vec![Chunk::Html(start), Chunk::Text(script), Chunk::Html(end)], )(input) } @@ -223,7 +345,30 @@ hello
should not appear this $(should) appear unchanged -"#; + + +should not appear + + +should not appear +hi +goodbye +should not appear + + +should not appear + +attempt 1 + +should not appear + +attempt 2 + +should not appear + +exception! + +"#; let output = parse(input); println!("{input}"); println!("{:?}", output); @@ -265,4 +410,11 @@ hello
let x = parse("hello $(foo)goodbye $(foo)"); println!("{:?}", x); } + #[test] + fn test_new_parse_examples() { + let x = parse(include_str!( + "../../examples/esi_vars_example/src/index.html" + )); + println!("{:?}", x); + } } diff --git a/esi/src/parser_types.rs b/esi/src/parser_types.rs index a5283fc..f64e3ce 100644 --- a/esi/src/parser_types.rs +++ b/esi/src/parser_types.rs @@ -1,20 +1,18 @@ -#[derive(Debug)] -pub enum Tag { - Vars, - Include, - Text, - Choose, - When, - Otherwise, - Try, - Attempt, - Except, +#[derive(Debug, Clone)] +pub enum Tag<'a> { + Include(Vec<(&'a str, &'a str)>), + Choose(Vec>, Option>>), + When(Vec<(&'a str, &'a str)>, Vec>), + Otherwise(Vec>), + Try(Vec>>, Option>>), + Attempt(Vec>), + Except(Vec>), + Assign(Vec<(&'a str, &'a str)>, Option>>), } -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum Chunk<'a> { - EsiStartTag(Tag, Vec<(&'a str, &'a str)>), - EsiEndTag(Tag), + Esi(Tag<'a>), Expr(&'a str), Html(&'a str), Text(&'a str), diff --git a/examples/esi_vars_example/src/index.html b/examples/esi_vars_example/src/index.html index 990db69..2a03105 100644 --- a/examples/esi_vars_example/src/index.html +++ b/examples/esi_vars_example/src/index.html @@ -52,7 +52,7 @@

Choose Tests

GOODBYE - +
@@ -64,7 +64,7 @@

Choose Tests

GOODBYE - +
@@ -79,7 +79,7 @@

Choose Tests

GOODBYE - +
@@ -94,7 +94,7 @@

Choose Tests

GOODBYE - +
@@ -109,7 +109,7 @@

Choose Tests

GOODBYE - +
@@ -121,7 +121,7 @@

Matches tests

UH OH - +
@@ -132,7 +132,7 @@

Matches tests

NO - +
@@ -143,7 +143,7 @@

Matches tests

NO - +
@@ -155,7 +155,7 @@

Matches tests

UH OH - +
@@ -167,7 +167,7 @@

Matches tests

NO - +
@@ -178,7 +178,7 @@

Matches tests

UH OH - +
@@ -189,7 +189,7 @@

Matches tests

UH OH - +
@@ -202,7 +202,7 @@

Matches tests

UH OH - +
From b23d0e3b963da5317ec3860f228872bde9cc50bb Mon Sep 17 00:00:00 2001 From: Tyler McMullen Date: Thu, 19 Dec 2024 16:10:32 -0800 Subject: [PATCH 006/119] Integrate nom expression parser from @vagetman (thank you!) and do a little cleanup. --- esi/src/lib.rs | 1 + esi/src/new_parse.rs | 256 ++++++++++++++++++++++++++++++++-------- esi/src/parser_types.rs | 19 ++- 3 files changed, 222 insertions(+), 54 deletions(-) diff --git a/esi/src/lib.rs b/esi/src/lib.rs index f73069f..701fb60 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -20,6 +20,7 @@ use std::io::{BufRead, Write}; pub use crate::document::{Element, Fragment}; pub use crate::error::Result; +pub use crate::new_parse::parse; pub use crate::parse::{parse_tags, Event, Include, Tag, Tag::Try}; pub use crate::config::Configuration; diff --git a/esi/src/new_parse.rs b/esi/src/new_parse.rs index 0fd650c..cbc7a97 100644 --- a/esi/src/new_parse.rs +++ b/esi/src/new_parse.rs @@ -2,15 +2,15 @@ use nom::branch::alt; use nom::bytes::complete::is_not as complete_is_not; use nom::bytes::streaming::*; use nom::character::streaming::*; -use nom::combinator::{complete, map, map_res, not, peek, recognize, success, verify}; -use nom::error::{Error, ParseError}; -use nom::multi::{fold_many0, length_data, length_value, many0, many1, many_till}; -use nom::sequence::{delimited, pair, preceded, separated_pair, tuple}; -use nom::{IResult, Parser}; +use nom::combinator::{complete, map, map_res, opt, peek, recognize, success, verify}; +use nom::error::Error; +use nom::multi::{fold_many0, length_data, many0, many1, many_till, separated_list0}; +use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; +use nom::{AsChar, IResult}; use crate::parser_types::*; -fn parse(input: &str) -> IResult<&str, Vec, Error<&str>> { +pub fn parse(input: &str) -> IResult<&str, Vec, Error<&str>> { fold_many0( complete(chunk), Vec::new, @@ -37,21 +37,7 @@ fn parse_interpolated(input: &str) -> IResult<&str, Vec, Error<&str>> { } fn interpolated_chunk(input: &str) -> IResult<&str, Vec, Error<&str>> { - alt((interpolated_text, interpolation, esi_tag, html))(input) -} - -fn parse_without_esi(input: &str) -> IResult<&str, Vec, Error<&str>> { - fold_many0( - complete(no_esi_chunk), - Vec::new, - |mut acc: Vec, mut item| { - acc.append(&mut item); - acc - }, - )(input) -} -fn no_esi_chunk(input: &str) -> IResult<&str, Vec, Error<&str>> { - alt((text, html))(input) + alt((interpolated_text, interpolated_expression, esi_tag, html))(input) } fn esi_tag(input: &str) -> IResult<&str, Vec, Error<&str>> { @@ -195,8 +181,12 @@ fn esi_vars_short(input: &str) -> IResult<&str, Vec, Error<&str>> { preceded(multispace0, alt((tag(">"), tag("/>")))), ), |attrs| { - if let Some((k, v)) = attrs.iter().find(|(k, v)| *k == "name") { - Ok(vec![Chunk::Expr(v)]) + if let Some((_k, v)) = attrs.iter().find(|(k, _v)| *k == "name") { + if let Ok((_, expr)) = expression(v) { + Ok(expr) + } else { + Err("failed to parse expression") + } } else { Err("no name field in short form vars") } @@ -318,10 +308,134 @@ fn interpolated_text(input: &str) -> IResult<&str, Vec, Error<&str>> { vec![Chunk::Text(s)] })(input) } -fn interpolation(input: &str) -> IResult<&str, Vec, Error<&str>> { + +fn is_alphanumeric_or_underscore(c: char) -> bool { + c.is_alphanum() || c == '_' +} + +fn is_lower_alphanumeric_or_underscore(c: char) -> bool { + c.is_ascii_lowercase() || c.is_numeric() || c == '_' +} + +fn fn_name(input: &str) -> IResult<&str, &str, Error<&str>> { + preceded(char('$'), take_while1(is_lower_alphanumeric_or_underscore))(input) +} + +fn var_name(input: &str) -> IResult<&str, (&str, Option<&str>, Option), Error<&str>> { + tuple(( + take_while1(is_alphanumeric_or_underscore), + opt(delimited(char('{'), var_key, char('}'))), + opt(preceded(char('|'), fn_nested_argument)), + ))(input) +} + +fn not_dollar_or_curlies(input: &str) -> IResult<&str, &str, Error<&str>> { + take_till(|c: char| "${},\"".contains(c))(input) +} + +// TODO: handle escaping +fn single_quoted_string(input: &str) -> IResult<&str, &str, Error<&str>> { + delimited( + char('\''), + take_till(|c: char| c == '\'' || !c.is_ascii()), + char('\''), + )(input) +} +fn triple_quoted_string(input: &str) -> IResult<&str, &str, Error<&str>> { + delimited( + tag("'''"), + length_data(map(peek(many_till(anychar, tag("'''"))), |(v, _)| v.len())), + tag("'''"), + )(input) +} + +fn string(input: &str) -> IResult<&str, &str, Error<&str>> { + alt((single_quoted_string, triple_quoted_string))(input) +} + +fn fn_string(input: &str) -> IResult<&str, &str, Error<&str>> { + alt((single_quoted_string, triple_quoted_string))(input) +} + +fn var_key(input: &str) -> IResult<&str, &str, Error<&str>> { + alt(( + single_quoted_string, + triple_quoted_string, + not_dollar_or_curlies, + ))(input) +} + +fn fn_argument(input: &str) -> IResult<&str, Vec, Error<&str>> { + let (input, mut parsed) = separated_list0( + tuple((multispace0, char(','), multispace0)), + fn_nested_argument, + )(input)?; + + // If the parsed list contains a single empty string element return an empty vec + if parsed.len() == 1 && parsed[0] == Symbol::String(None) { + parsed = vec![]; + } + Ok((input, parsed)) +} + +fn fn_nested_argument(input: &str) -> IResult<&str, Symbol, Error<&str>> { + alt(( + function, + variable, + map(fn_string, |string| { + if string.is_empty() { + Symbol::String(None) + } else { + Symbol::String(Some(string)) + } + }), + ))(input) +} + +fn function(input: &str) -> IResult<&str, Symbol, Error<&str>> { + let (input, parsed) = tuple(( + fn_name, + delimited( + terminated(char('('), multispace0), + fn_argument, + preceded(multispace0, char(')')), + ), + ))(input)?; + + let (name, args) = parsed; + + Ok((input, Symbol::Function { name, args })) +} + +fn variable(input: &str) -> IResult<&str, Symbol, Error<&str>> { + let (input, parsed) = delimited(tag("$("), var_name, char(')'))(input)?; + + let (name, key, default) = parsed; + let default = default.map(Box::new); + + Ok((input, Symbol::Variable { name, key, default })) +} + +fn interpolated_expression(input: &str) -> IResult<&str, Vec, Error<&str>> { + map(alt((function, variable)), |symbol| { + vec![Chunk::Expr(symbol)] + })(input) +} + +fn expression(input: &str) -> IResult<&str, Vec, Error<&str>> { map( - recognize(delimited(tag("$("), is_not(")"), tag(")"))), - |s: &str| vec![Chunk::Expr(s)], + alt(( + function, + variable, + map(string, |string| { + if string.is_empty() { + Symbol::String(None) + } else { + Symbol::String(Some(string)) + } + }), + )), + |symbol| vec![Chunk::Expr(symbol)], )(input) } @@ -335,7 +449,7 @@ mod tests { foo baz - + hello
@@ -369,52 +483,92 @@ should not appear exception! "#; - let output = parse(input); - println!("{input}"); - println!("{:?}", output); + let (rest, _) = parse(input).unwrap(); + // Just test to make sure it parsed the whole thing + assert_eq!(rest.len(), 0); } #[test] fn test_new_parse_script() { - let x = script(""); - println!("{:?}", x); + let (rest, x) = script("").unwrap(); + assert_eq!(rest.len(), 0); + assert_eq!( + x, + [ + Chunk::Html("") + ] + ); } #[test] fn test_new_parse_script_with_src() { - let x = parse("") + Element::Html("") ] ); } @@ -721,13 +789,13 @@ exception! fn test_new_parse_script_with_src() { let (rest, x) = parse("").unwrap(); + let (rest, x) = script(b"").unwrap(); assert_eq!(rest.len(), 0); assert_eq!( x, [ - Element::Html("") + Element::Html(Bytes::from_static(b"")) ] ); } #[test] fn test_new_parse_script_with_src() { - let (rest, x) = parse(" +should not appear + + this $(should) appear unchanged + + +should not appear + + +should not appear +hi +goodbye +should not appear + + +should not appear + +attempt 1 + +should not appear + +attempt 2 + +should not appear + +exception! + +"#; + let result = parse(input); + match result { + Ok((rest, _)) => { + // Just test to make sure it parsed the whole thing + if !rest.is_empty() { + panic!("Failed to parse completely. Remaining: '{}'", rest); + } + } + Err(e) => { + panic!("Parse failed with error: {:?}", e); + } + } + } + #[test] + fn test_new_parse_script() { + let (rest, x) = script("").unwrap(); + assert_eq!(rest.len(), 0); + assert_eq!( + x, + [ + Element::Html("") + ] + ); + } + #[test] + fn test_new_parse_script_with_src() { + let (rest, x) = parse(" tag + ("", "Script opening tag, REQUIRES closing"), + (" tag ("", "Script opening tag, REQUIRES closing"), - ("").unwrap(); + let input = b""; + let bytes = Bytes::from_static(input); + let (rest, x) = script(&bytes, input).unwrap(); assert_eq!(rest.len(), 0); assert_eq!( x, @@ -990,7 +1278,9 @@ exception! } #[test] fn test_new_parse_script_with_src() { - let (rest, x) = parse_complete(b" -should not appear - - this $(should) appear unchanged - - -should not appear - - -should not appear -hi -goodbye -should not appear - - -should not appear - -attempt 1 - -should not appear - -attempt 2 - -should not appear - -exception! - -"#; - let result = parse(input); - match result { - Ok((rest, _)) => { - // Just test to make sure it parsed the whole thing - if !rest.is_empty() { - panic!("Failed to parse completely. Remaining: '{}'", rest); - } - } - Err(e) => { - panic!("Parse failed with error: {:?}", e); - } - } - } - #[test] - fn test_new_parse_script() { - let (rest, x) = script("").unwrap(); - assert_eq!(rest.len(), 0); - assert_eq!( - x, - [ - Element::Html("") - ] - ); - } - #[test] - fn test_new_parse_script_with_src() { - let (rest, x) = parse(" IResult<&[u8], &[u8], Error<&[u8]>> { + recognize(many_till(take(1usize), peek(tag_no_case(b"( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { - // Reject ESI closing tags before trying to parse - let (_, _) = peek(not(tag(b""[..]), tag(b">"))), - |s: &[u8]| vec![Element::Html(slice_as_bytes(original, s))], - )(input) + // Inline script - find closing tag (case insensitive) + let (input, content) = script_content(input)?; + + // Parse closing tag + let closing_start = input; + let (input, _) = tag_no_case(b"( +// ============================================================================ +// ESI Tag Parsers (continue from where tag_dispatch left off) +// ============================================================================ + +fn closing_tag<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { - // Reject ESI tags and closing tags before trying to parse - let (_, _) = peek(not(alt((tag(b""[..]), tag(b">"))), + recognize(tuple((tag(b"(original: &Bytes, input: &'a [u8]) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { - map(recognize(many1(is_not(&b"<"[..]))), |s: &[u8]| { - vec![Element::Text(slice_as_bytes(original, s))] - })(input) + map( + recognize(take_while1(|c| !is_opening_bracket(c))), + |s: &[u8]| vec![Element::Text(slice_as_bytes(original, s))], + )(input) } +/// Check if byte is the opening bracket '<' +#[inline] +fn is_opening_bracket(b: u8) -> bool { + b == b'<' +} + +/// Check if byte is a dollar sign '$' +#[inline] +fn is_dollar(b: u8) -> bool { + b == b'$' +} +#[inline] fn is_alphanumeric_or_underscore(c: u8) -> bool { c.is_ascii_alphanumeric() || c == b'_' } +#[inline] fn is_lower_alphanumeric_or_underscore(c: u8) -> bool { c.is_ascii_lowercase() || c.is_ascii_digit() || c == b'_' } @@ -1055,7 +1140,7 @@ hello
+ should not appear this $(should) appear unchanged @@ -1104,7 +1189,7 @@ exception! fn test_new_parse_script() { let input = b""; let bytes = Bytes::from_static(input); - let (rest, x) = script(&bytes, input).unwrap(); + let (rest, x) = html_script_tag(&bytes, input).unwrap(); assert_eq!(rest.len(), 0); assert_eq!( x, @@ -1132,7 +1217,7 @@ exception! fn test_new_parse_esi_vars_short() { let input = br#""#; let bytes = Bytes::from_static(input); - let (rest, x) = esi_tag(&bytes, input).unwrap(); + let (rest, x) = esi_vars(&bytes, input).unwrap(); assert_eq!(rest.len(), 0); assert_eq!( x, @@ -1293,14 +1378,9 @@ exception! let input = br#" $(QUERY_STRING{param}) "#; - eprintln!( - "Testing esi_tag on input: {:?}", - String::from_utf8_lossy(input) - ); let bytes = Bytes::from_static(input); - let result = esi_tag(&bytes, input); - eprintln!("Result: {:?}", result); - assert!(result.is_ok(), "esi_tag should parse: {:?}", result.err()); + let (rest, _result) = esi_vars(&bytes, input).unwrap(); + assert_eq!(rest.len(), 0, "Parser should consume all input"); } #[test] From 23093217be564049e7909188661fa4f581402112 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Wed, 12 Nov 2025 20:38:24 -0600 Subject: [PATCH 050/119] Refactor parser: Enhance esi_vars_content and htmlstring functions for improved handling of quotes and nested ESI tags --- esi/src/parser.rs | 126 ++++++++++++++++++++++++++++------------------ 1 file changed, 78 insertions(+), 48 deletions(-) diff --git a/esi/src/parser.rs b/esi/src/parser.rs index 4030bd1..8f23c19 100644 --- a/esi/src/parser.rs +++ b/esi/src/parser.rs @@ -3,7 +3,7 @@ use nom::branch::alt; // Using STREAMING parsers - they return Incomplete when they need more data // This enables TRUE bounded-memory streaming use nom::bytes::streaming::{ - is_not, tag, tag_no_case, take, take_until, take_while, take_while1, take_while_m_n, + tag, tag_no_case, take, take_until, take_while, take_while1, take_while_m_n, }; use nom::character::streaming::{alpha1, multispace0, multispace1}; use nom::combinator::{map, map_res, not, opt, peek, recognize}; @@ -561,8 +561,8 @@ fn esi_vars_short(input: &[u8]) -> IResult<&[u8], Vec, Error<&[u8]>> { )(input) } -// Parser for content inside esi:vars - handles text, expressions, and most ESI tags (except nested vars) -// NOTE: Supports nested variable expressions like $(VAR{$(other)}) as of the nom migration +// Parser for content inside esi:vars - handles text, expressions, and ESI tags +// NOTE: Supports nested variable expressions like $(VAR{$(other)}) fn esi_vars_content<'a>( original: &Bytes, input: &'a [u8], @@ -673,7 +673,18 @@ fn attributes(input: &[u8]) -> IResult<&[u8], Vec<(String, String)>, Error<&[u8] } fn htmlstring(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { - delimited(tag(b"\""), is_not(&b"\""[..]), tag(b"\""))(input) + alt(( + delimited( + double_quote, + take_while(|c| !is_double_quote(c)), + double_quote, + ), + delimited( + single_quote, + take_while(|c| !is_single_quote(c)), + single_quote, + ), + ))(input) } // Used by parse_interpolated - zero-copy with original Bytes reference @@ -695,6 +706,8 @@ fn interpolated_text<'a>( fn closing_bracket(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { tag(b">")(input) } + +/// Helper to find and consume the closing self-closing tag characters '/>' #[inline] fn self_closing(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { tag(b"/>")(input) @@ -706,6 +719,28 @@ fn opening_bracket(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { tag(b"<")(input) } +/// Helper to find and consume the closing double quote character +#[inline] +fn double_quote(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { + tag(b"\"")(input) +} + +/// Helper to find and consume the closing single quote character +#[inline] +fn single_quote(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { + tag(b"\'")(input) +} + +#[inline] +fn is_double_quote(b: u8) -> bool { + b == b'\"' +} + +#[inline] +fn is_single_quote(b: u8) -> bool { + b == b'\'' +} + /// Check if byte can start an HTML/XML tag name (including special constructs like

Content

"#, + ), + ( + "vars_long", + r#"User agent: $(HTTP_USER_AGENT), Host: $(HTTP_HOST)"#, + ), + ("assign_short", r#""#), + ( + "assign_long", + r#"Some value with $(VAR)"#, + ), + ( + "choose_multiple_when", + r#" + + +

Premium content

+
+ +

Basic content

+
+ +

Regular content

+
+
+ "#, + ), + ( + "expression_comparison", + r#" + High + Medium + Low + "#, + ), + ( + "expression_logical", + r#" + Access granted + Access denied + "#, + ), + ( + "script_tag", + r#"Content"#, + ), + ( + "mixed_content", + r#" +
+ Text before + + Text after + $(VAR) + More text + + + Final text +
+ "#, + ), + ]; + + for (name, xml) in documents { + group.bench_with_input(BenchmarkId::from_parameter(name), &xml, |b, xml| { + b.iter(|| { + let bytes = Bytes::from(*xml); + let result = parse(black_box(&bytes)).unwrap(); + black_box(result); + }); + }); + } + + group.finish(); +} + +fn benchmark_parser_scaling(c: &mut Criterion) { + let mut group = c.benchmark_group("parser_scaling"); + + // Test how parser scales with document size + let sizes = vec![100, 500, 1000, 5000, 10000]; + + for size in sizes { + let mut doc = String::new(); + doc.push_str(""); + + for i in 0..size { + doc.push_str(&format!( + r#"
Item {}
$(VAR_{})"#, + i, i + )); + } + + doc.push_str(""); + + group.bench_with_input( + BenchmarkId::from_parameter(format!("elements_{}", size * 2)), + &doc, + |b, doc| { + b.iter(|| { + let bytes = Bytes::copy_from_slice(doc.as_bytes()); + let result = parse(black_box(&bytes)).unwrap(); + black_box(result); + }); + }, + ); + } + + group.finish(); +} + +fn benchmark_expression_parsing(c: &mut Criterion) { + let mut group = c.benchmark_group("expression_parsing"); + + let expressions = vec![ + ("simple_var", "$(VAR)"), + ("var_with_key", "$(HTTP_COOKIE{name})"), + ("var_with_default", "$(VAR|'default')"), + ("integer", "42"), + ("string", "'hello world'"), + ("comparison_eq", "$(count) == 10"), + ("comparison_ne", "$(status) != 'error'"), + ("comparison_gt", "$(value) > 100"), + ("comparison_lte", "$(score) <= 50"), + ("logical_and", "$(a) == 1 && $(b) == 2"), + ("logical_or", "$(x) == 'yes' || $(y) == 'no'"), + ("negation", "!($(flag))"), + ("grouped", "($(a) == 1) && ($(b) == 2)"), + ( + "complex", + "(($(role) == 'admin') || ($(role) == 'mod')) && $(active) != false", + ), + ("function_call", "$url_encode($(path))"), + ("nested_function", "$base64_encode($url_encode($(text)))"), + ]; + + for (name, expr) in expressions { + group.bench_with_input(BenchmarkId::from_parameter(name), &expr, |b, expr| { + b.iter(|| { + let result = esi::parse_expression(black_box(expr)).unwrap(); + black_box(result); + }); + }); + } + + group.finish(); +} + +fn benchmark_interpolated_strings(c: &mut Criterion) { + let mut group = c.benchmark_group("interpolated_strings"); + + let strings = vec![ + ("no_interpolation", "Just plain text"), + ("single_var", "Hello $(name)"), + ("multiple_vars", "$(first) $(middle) $(last)"), + ( + "mixed_content", + "User: $(user), Email: $(email), Role: $(role)", + ), + ( + "with_html", + "
Welcome $(user)!

Your score: $(score)

", + ), + ]; + + for (name, string) in strings { + group.bench_with_input(BenchmarkId::from_parameter(name), &string, |b, string| { + b.iter(|| { + let bytes = Bytes::from(*string); + let result = esi::parse_interpolated_string(black_box(&bytes)).unwrap(); + black_box(result); + }); + }); + } + + group.finish(); +} + +criterion_group!( + benches, + benchmark_various_esi_documents, + benchmark_nom_parser_features, + benchmark_parser_scaling, + benchmark_expression_parsing, + benchmark_interpolated_strings +); +criterion_main!(benches); diff --git a/esi/src/lib.rs b/esi/src/lib.rs index 60f80de..8aeeb47 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -17,7 +17,7 @@ use std::collections::VecDeque; use std::io::{BufRead, Write}; pub use crate::error::{ExecutionError as ESIError, Result}; -pub use crate::parser::{parse, parse_complete}; +pub use crate::parser::{parse, parse_complete, parse_expression, parse_interpolated_string}; pub use crate::config::Configuration; pub use crate::error::ExecutionError; diff --git a/esi/src/parser.rs b/esi/src/parser.rs index 6f530a6..1632e28 100644 --- a/esi/src/parser.rs +++ b/esi/src/parser.rs @@ -455,10 +455,10 @@ fn esi_choose<'a>( ) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { let (input, _) = tag(b"")(input)?; let (input, v) = parse_interpolated(original, input)?; - eprintln!( - "esi_choose: parse_interpolated returned {} elements", - v.len() - ); + // eprintln!( + // "esi_choose: parse_interpolated returned {} elements", + // v.len() + // ); let (input, _) = tag(b"")(input)?; let mut when_branches = vec![]; From d0df9104084fc2c1ee71af3c99e2b16f5f4ee44e Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Sun, 14 Dec 2025 14:50:04 -0600 Subject: [PATCH 053/119] Refactor Value display logic: Simplify to_string method and enhance Display implementation --- esi/src/expression.rs | 23 ++++++----------------- esi/src/lib.rs | 2 +- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/esi/src/expression.rs b/esi/src/expression.rs index ed067fd..0b31e14 100644 --- a/esi/src/expression.rs +++ b/esi/src/expression.rs @@ -323,22 +323,6 @@ impl Value { Self::Null => Bytes::new(), } } - - /// Convert Value to string for display/processing - pub(crate) fn to_string(&self) -> String { - match self { - Self::Integer(i) => i.to_string(), - Self::Text(b) => String::from_utf8_lossy(b.as_ref()).into_owned(), - Self::Boolean(b) => { - if *b { - "true".to_string() - } else { - "false".to_string() - } - } - Self::Null => String::new(), // Empty string, not "null" - } - } } impl From for Value { @@ -363,7 +347,12 @@ impl From for Value { impl Display for Value { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.to_string()) + match self { + Self::Integer(i) => write!(f, "{}", i), + Self::Text(b) => write!(f, "{}", String::from_utf8_lossy(b.as_ref())), + Self::Boolean(b) => write!(f, "{}", if *b { "true" } else { "false" }), + Self::Null => Ok(()), // Empty string for Null + } } } diff --git a/esi/src/lib.rs b/esi/src/lib.rs index 8aeeb47..bc369e0 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -696,7 +696,7 @@ impl Processor { Ok(pending) => { let fragment = Fragment { request: req, - alt: alt.map(|s| s.clone()), + alt: alt.cloned(), continue_on_error, pending_content: pending, }; From 0e719ad5e5e45fcca0b5ceef1f782117f998860c Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Sun, 14 Dec 2025 15:37:27 -0600 Subject: [PATCH 054/119] Refactor parser: Optimize attribute handling by switching from Vec to HashMap for improved performance and clarity --- esi/src/lib.rs | 11 +++--- esi/src/parser.rs | 89 ++++++++++++++++++----------------------------- 2 files changed, 39 insertions(+), 61 deletions(-) diff --git a/esi/src/lib.rs b/esi/src/lib.rs index bc369e0..4fc59aa 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -810,10 +810,10 @@ impl Processor { ) -> Result<()> { while !self.queue.is_empty() { // First, write out any content that's at the front - while let Some(QueuedElement::Content(bytes)) = self.queue.front() { - let bytes = bytes.clone(); - self.queue.pop_front(); - output_writer.write_all(&bytes)?; + while let Some(QueuedElement::Content(_)) = self.queue.front() { + if let Some(QueuedElement::Content(bytes)) = self.queue.pop_front() { + output_writer.write_all(&bytes)?; + } } if self.queue.is_empty() { @@ -1011,7 +1011,6 @@ impl Processor { dispatcher: &FragmentRequestDispatcher, processor: Option<&FragmentResponseProcessor>, ) -> Result<()> { - let alt = fragment.alt.clone(); let continue_on_error = fragment.continue_on_error; // Wait for response @@ -1031,7 +1030,7 @@ impl Processor { // Write Bytes directly - no UTF-8 conversion needed! output_writer.write_all(&body_bytes)?; Ok(()) - } else if let Some(alt_src) = alt { + } else if let Some(alt_src) = fragment.alt { // Try alt debug!("Main request failed, trying alt"); let alt_interpolated = try_evaluate_interpolated_string(&alt_src, &mut self.ctx)?; diff --git a/esi/src/parser.rs b/esi/src/parser.rs index 1632e28..9d6c948 100644 --- a/esi/src/parser.rs +++ b/esi/src/parser.rs @@ -11,6 +11,7 @@ use nom::error::Error; use nom::multi::{fold_many0, many0, many_till, separated_list0}; use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; use nom::IResult; +use std::collections::HashMap; use crate::parser_types::*; @@ -245,16 +246,9 @@ fn esi_assign<'a>( alt((esi_assign_short, |i| esi_assign_long(original, i)))(input) } -fn assign_attributes_short(attrs: Vec<(String, String)>) -> Vec { - let mut name = String::new(); - let mut value_str = String::new(); - for (key, val) in attrs { - match key.as_str() { - "name" => name = val, - "value" => value_str = val, - _ => {} - } - } +fn assign_attributes_short(attrs: HashMap) -> Vec { + let name = attrs.get("name").cloned().unwrap_or_default(); + let value_str = attrs.get("value").cloned().unwrap_or_default(); // Per ESI spec, short form value attribute contains an expression // Try to parse as ESI expression. If it fails, treat as string literal. @@ -262,20 +256,15 @@ fn assign_attributes_short(attrs: Vec<(String, String)>) -> Vec { Ok((_, expr)) => expr, Err(_) => { // If parsing fails (e.g., plain text), treat as a string literal - Expr::String(Some(value_str.clone())) + Expr::String(Some(value_str)) } }; vec![Element::Esi(Tag::Assign { name, value })] } -fn assign_long(attrs: Vec<(String, String)>, content: Vec) -> Vec { - let mut name = String::new(); - for (key, val) in attrs { - if key == "name" { - name = val; - } - } +fn assign_long(attrs: HashMap, mut content: Vec) -> Vec { + let name = attrs.get("name").cloned().unwrap_or_default(); // Per ESI spec, long form value comes from content between tags // Content is already parsed as Vec (can be text, expressions, etc.) @@ -284,19 +273,21 @@ fn assign_long(attrs: Vec<(String, String)>, content: Vec) -> Vec expr, - Err(_) => Expr::String(Some(text_str)), + // Single element - pop to take ownership + match content.pop().expect("checked len == 1") { + Element::Expr(expr) => expr, + Element::Text(text) => { + // Try to parse the text as an expression + let text_str = String::from_utf8_lossy(text.as_ref()).to_string(); + match parse_expression(&text_str) { + Ok((_, expr)) => expr, + Err(_) => Expr::String(Some(text_str)), + } + } + _ => { + // HTML or other - treat as empty string + Expr::String(Some(String::new())) } - } else { - // HTML or other - treat as empty string - Expr::String(Some(String::new())) } } else { // Multiple elements - this is a compound expression per ESI spec @@ -429,16 +420,8 @@ fn esi_when<'a>( tag(b""), )), |(attrs, content, _)| { - let test = attrs - .iter() - .find(|(key, _)| key == "test") - .map(|(_, val)| val.clone()) - .unwrap_or_default(); - - let match_name = attrs - .iter() - .find(|(key, _)| key == "matchname") - .map(|(_, val)| val.clone()); + let test = attrs.get("test").cloned().unwrap_or_default(); + let match_name = attrs.get("matchname").cloned(); // Return the When tag followed by its content elements as a marker let mut result = vec![Element::Esi(Tag::When { test, match_name })]; @@ -536,8 +519,8 @@ fn esi_vars<'a>( alt((esi_vars_short, |i| esi_vars_long(original, i)))(input) } -fn parse_vars_attributes(attrs: Vec<(String, String)>) -> Result, &'static str> { - if let Some((_k, v)) = attrs.iter().find(|(k, _v)| k == "name") { +fn parse_vars_attributes(attrs: HashMap) -> Result, &'static str> { + if let Some(v) = attrs.get("name") { if let Ok((_, expr)) = expression(v.as_bytes()) { Ok(expr) } else { @@ -633,18 +616,14 @@ fn esi_include(input: &[u8]) -> IResult<&[u8], Vec, Error<&[u8]>> { attributes, preceded(multispace0, alt((closing_bracket, self_closing))), ), - |attrs| { - let mut src = Bytes::new(); - let mut alt = None; - let mut continue_on_error = false; - for (key, val) in attrs { - match key.as_str() { - "src" => src = Bytes::from(val), - "alt" => alt = Some(Bytes::from(val)), - "onerror" => continue_on_error = &val == "continue", - _ => {} - } - } + |mut attrs| { + let src = attrs.remove("src").map(Bytes::from).unwrap_or_default(); + let alt = attrs.remove("alt").map(Bytes::from); + let continue_on_error = attrs + .get("onerror") + .map(|s| s == "continue") + .unwrap_or(false); + vec![Element::Esi(Tag::Include { src, alt, @@ -654,7 +633,7 @@ fn esi_include(input: &[u8]) -> IResult<&[u8], Vec, Error<&[u8]>> { )(input) } -fn attributes(input: &[u8]) -> IResult<&[u8], Vec<(String, String)>, Error<&[u8]>> { +fn attributes(input: &[u8]) -> IResult<&[u8], HashMap, Error<&[u8]>> { map( many0(separated_pair( preceded(multispace1, alpha1), From 87ad082ab147df2623c45608059719cabd7764d3 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Mon, 15 Dec 2025 20:01:23 -0600 Subject: [PATCH 055/119] Refactor parser: Replace Vec with ParseResult to optimize memory allocation and improve handling of parser outputs --- esi/src/parser.rs | 291 +++++++++++++++++++++++++++++----------------- 1 file changed, 186 insertions(+), 105 deletions(-) diff --git a/esi/src/parser.rs b/esi/src/parser.rs index 9d6c948..0604851 100644 --- a/esi/src/parser.rs +++ b/esi/src/parser.rs @@ -3,12 +3,12 @@ use nom::branch::alt; // Using STREAMING parsers - they return Incomplete when they need more data // This enables TRUE bounded-memory streaming use nom::bytes::streaming::{ - tag, tag_no_case, take, take_till, take_until, take_while, take_while1, take_while_m_n, + tag, tag_no_case, take_till, take_until, take_while, take_while1, take_while_m_n, }; use nom::character::streaming::{alpha1, multispace0, multispace1}; use nom::combinator::{map, map_res, not, opt, peek, recognize}; use nom::error::Error; -use nom::multi::{fold_many0, many0, many_till, separated_list0}; +use nom::multi::{fold_many0, many0, separated_list0}; use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; use nom::IResult; use std::collections::HashMap; @@ -50,6 +50,30 @@ enum ParsingMode { Complete, } +/// Parser output that avoids Vec allocation for single elements +/// This is a key optimization: most parsers return exactly one element, +/// so we avoid the Vec allocation overhead in the common case. +enum ParseResult { + /// Single element (most common case - no Vec allocation) + Single(Element), + /// Multiple elements (for parsers that return variable number of elements) + Multiple(Vec), + /// No elements (for esi:comment, esi:remove that produce nothing) + Empty, +} + +impl ParseResult { + /// Append elements to an existing Vec + #[inline] + fn append_to(self, acc: &mut Vec) { + match self { + ParseResult::Single(e) => acc.push(e), + ParseResult::Multiple(mut v) => acc.append(&mut v), + ParseResult::Empty => {} + } + } +} + /// Zero-copy parse loop that threads Bytes through the parser chain fn parse_loop<'a, F>( original: &Bytes, @@ -58,15 +82,15 @@ fn parse_loop<'a, F>( incomplete_strategy: ParsingMode, ) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> where - F: FnMut(&Bytes, &'a [u8]) -> IResult<&'a [u8], Vec, Error<&'a [u8]>>, + F: FnMut(&Bytes, &'a [u8]) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>>, { let mut result = Vec::new(); let mut remaining = input; loop { match parser(original, remaining) { - Ok((rest, mut elements)) => { - result.append(&mut elements); + Ok((rest, parse_result)) => { + parse_result.append_to(&mut result); // If we consumed nothing, break to avoid infinite loop if rest.len() == remaining.len() { @@ -179,8 +203,8 @@ pub fn parse_interpolated_string(input: &Bytes) -> IResult<&[u8], Vec, loop { match alt((interpolated_expression, |i| interpolated_text(input, i)))(remaining) { - Ok((rest, mut elements)) => { - result.append(&mut elements); + Ok((rest, parse_result)) => { + parse_result.append_to(&mut result); if rest.is_empty() { // Parsed everything return Ok((b"", result)); @@ -209,7 +233,7 @@ pub fn parse_interpolated_string(input: &Bytes) -> IResult<&[u8], Vec, fn element<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { alt((|i| text(original, i), |i| tag_handler(original, i)))(input) } @@ -220,8 +244,8 @@ fn parse_interpolated<'a>( fold_many0( |i| interpolated_element(original, i), Vec::new, - |mut acc: Vec, mut item| { - acc.append(&mut item); + |mut acc: Vec, item: ParseResult| { + item.append_to(&mut acc); acc }, )(input) @@ -230,7 +254,7 @@ fn parse_interpolated<'a>( fn interpolated_element<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { alt(( |i| interpolated_text(original, i), interpolated_expression, @@ -242,11 +266,11 @@ fn interpolated_element<'a>( fn esi_assign<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { alt((esi_assign_short, |i| esi_assign_long(original, i)))(input) } -fn assign_attributes_short(attrs: HashMap) -> Vec { +fn assign_attributes_short(attrs: HashMap) -> ParseResult { let name = attrs.get("name").cloned().unwrap_or_default(); let value_str = attrs.get("value").cloned().unwrap_or_default(); @@ -260,10 +284,10 @@ fn assign_attributes_short(attrs: HashMap) -> Vec { } }; - vec![Element::Esi(Tag::Assign { name, value })] + ParseResult::Single(Element::Esi(Tag::Assign { name, value })) } -fn assign_long(attrs: HashMap, mut content: Vec) -> Vec { +fn assign_long(attrs: HashMap, mut content: Vec) -> ParseResult { let name = attrs.get("name").cloned().unwrap_or_default(); // Per ESI spec, long form value comes from content between tags @@ -297,10 +321,10 @@ fn assign_long(attrs: HashMap, mut content: Vec) -> Vec Expr::Interpolated(content) }; - vec![Element::Esi(Tag::Assign { name, value })] + ParseResult::Single(Element::Esi(Tag::Assign { name, value })) } -fn esi_assign_short(input: &[u8]) -> IResult<&[u8], Vec, Error<&[u8]>> { +fn esi_assign_short(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { map( delimited( tag(b" IResult<&[u8], Vec, Error<&[u8]>> fn esi_assign_long<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { map( tuple(( delimited( @@ -332,28 +356,28 @@ fn esi_assign_long<'a>( fn esi_except<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { map( delimited( tag(b""), |i| parse_interpolated(original, i), tag(b""), ), - |v| vec![Element::Esi(Tag::Except(v))], + |v| ParseResult::Single(Element::Esi(Tag::Except(v))), )(input) } fn esi_attempt<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { map( delimited( tag(b""), |i| parse_interpolated(original, i), tag(b""), ), - |v| vec![Element::Esi(Tag::Attempt(v))], + |v| ParseResult::Single(Element::Esi(Tag::Attempt(v))), )(input) } @@ -361,7 +385,7 @@ fn esi_attempt<'a>( fn esi_try<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { let (input, _) = tag(b"")(input)?; let (input, v) = parse_interpolated(original, input)?; let (input, _) = tag(b"")(input)?; @@ -379,17 +403,17 @@ fn esi_try<'a>( } Ok(( input, - vec![Element::Esi(Tag::Try { + ParseResult::Single(Element::Esi(Tag::Try { attempt_events: attempts, except_events: except.unwrap_or_default(), - })], + })), )) } fn esi_otherwise<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { map( delimited( tag(b""), @@ -400,7 +424,7 @@ fn esi_otherwise<'a>( // Return the Otherwise tag followed by its content elements let mut result = vec![Element::Esi(Tag::Otherwise)]; result.extend(content); - result + ParseResult::Multiple(result) }, )(input) } @@ -408,7 +432,7 @@ fn esi_otherwise<'a>( fn esi_when<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { map( tuple(( delimited( @@ -426,7 +450,7 @@ fn esi_when<'a>( // Return the When tag followed by its content elements as a marker let mut result = vec![Element::Esi(Tag::When { test, match_name })]; result.extend(content); - result + ParseResult::Multiple(result) }, )(input) } @@ -435,13 +459,9 @@ fn esi_when<'a>( fn esi_choose<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { let (input, _) = tag(b"")(input)?; let (input, v) = parse_interpolated(original, input)?; - // eprintln!( - // "esi_choose: parse_interpolated returned {} elements", - // v.len() - // ); let (input, _) = tag(b"")(input)?; let mut when_branches = vec![]; @@ -501,10 +521,10 @@ fn esi_choose<'a>( Ok(( input, - vec![Element::Esi(Tag::Choose { + ParseResult::Single(Element::Esi(Tag::Choose { when_branches, otherwise_events, - })], + })), )) } @@ -515,14 +535,14 @@ fn esi_choose<'a>( fn esi_vars<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { alt((esi_vars_short, |i| esi_vars_long(original, i)))(input) } -fn parse_vars_attributes(attrs: HashMap) -> Result, &'static str> { +fn parse_vars_attributes(attrs: HashMap) -> Result { if let Some(v) = attrs.get("name") { if let Ok((_, expr)) = expression(v.as_bytes()) { - Ok(expr) + Ok(ParseResult::Multiple(expr)) } else { Err("failed to parse expression") } @@ -531,7 +551,7 @@ fn parse_vars_attributes(attrs: HashMap) -> Result, } } -fn esi_vars_short(input: &[u8]) -> IResult<&[u8], Vec, Error<&[u8]>> { +fn esi_vars_short(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { map_res( delimited( tag(b"( |i| tag_handler(original, i), )), Vec::new, - |mut acc: Vec, mut item| { - acc.append(&mut item); + |mut acc: Vec, item: ParseResult| { + item.append_to(&mut acc); acc }, )(input) @@ -565,23 +585,23 @@ fn esi_vars_content<'a>( fn esi_vars_long<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { // Use parse_vars_content instead of parse_interpolated to avoid infinite recursion let (input, _) = tag(b"")(input)?; let (input, elements) = esi_vars_content(original, input)?; let (input, _) = tag(b"")(input)?; - Ok((input, elements)) + Ok((input, ParseResult::Multiple(elements))) } -fn esi_comment(input: &[u8]) -> IResult<&[u8], Vec, Error<&[u8]>> { +fn esi_comment(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { map( delimited( tag(b" IResult<&[u8], Vec, Error<&[u8]>> { fn esi_remove<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { let (input, _) = tag(b"")(input)?; let (input, _) = parse_interpolated(original, input)?; let (input, _) = tag(b"")(input)?; - Ok((input, vec![])) + Ok((input, ParseResult::Empty)) } fn esi_text<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { map( delimited( tag(b""), take_until(b"".as_ref()), tag(b""), ), - |v| vec![Element::Text(slice_as_bytes(original, v))], + |v| ParseResult::Single(Element::Text(slice_as_bytes(original, v))), )(input) } -fn esi_include(input: &[u8]) -> IResult<&[u8], Vec, Error<&[u8]>> { +fn esi_include(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { map( delimited( tag(b" IResult<&[u8], Vec, Error<&[u8]>> { .map(|s| s == "continue") .unwrap_or(false); - vec![Element::Esi(Tag::Include { + ParseResult::Single(Element::Esi(Tag::Include { src, alt, continue_on_error, - })] + })) }, )(input) } fn attributes(input: &[u8]) -> IResult<&[u8], HashMap, Error<&[u8]>> { - map( - many0(separated_pair( - preceded(multispace1, alpha1), - tag(b"="), - htmlstring, - )), - |pairs| { - pairs - .into_iter() - .map(|(k, v)| (bytes_to_string(k), bytes_to_string(v))) - .collect() + // map( + // many0(separated_pair( + // preceded(multispace1, alpha1), + // tag(b"="), + // htmlstring, + // )), + // |pairs| { + // pairs + // .into_iter() + // .map(|(k, v)| (bytes_to_string(k), bytes_to_string(v))) + // .collect() + // }, + // )(input) + fold_many0( + separated_pair(preceded(multispace1, alpha1), tag(b"="), htmlstring), + HashMap::new, + |mut acc, (k, v)| { + acc.insert(bytes_to_string(k), bytes_to_string(v)); + acc }, )(input) } @@ -668,10 +696,10 @@ fn htmlstring(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { fn interpolated_text<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { map( recognize(take_while1(|c| !is_opening_bracket(c) && !is_dollar(c))), - |s: &[u8]| vec![Element::Text(slice_as_bytes(original, s))], + |s: &[u8]| ParseResult::Single(Element::Text(slice_as_bytes(original, s))), )(input) } @@ -754,7 +782,7 @@ fn tag_name(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { fn tag_handler<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { alt(( // Try HTML comment first (special syntax `".as_ref()), - tag(b"-->"), - )), - |s: &[u8]| vec![Element::Html(slice_as_bytes(original, s))], + delimited(tag(b"".as_ref()), tag(b"-->")), + |s: &[u8]| ParseResult::Single(Element::Html(slice_as_bytes(original, s))), )(input) } /// Helper to find closing script tag, handling any content including other closing tags /// Looks for IResult<&[u8], &[u8], Error<&[u8]>> { - recognize(many_till(take(1usize), peek(tag_no_case(b"( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { let start = input; // Parse opening tag @@ -859,7 +896,7 @@ fn html_script_tag<'a>( let full_script = &start[..start.len() - input.len()]; Ok(( input, - vec![Element::Html(slice_as_bytes(original, full_script))], + ParseResult::Single(Element::Html(slice_as_bytes(original, full_script))), )) } @@ -870,20 +907,20 @@ fn html_script_tag<'a>( fn closing_tag<'a>( original: &Bytes, input: &'a [u8], -) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { // Reject ESI closing tags before trying to parse let (_, _) = peek(not(tag(b"(original: &Bytes, input: &'a [u8]) -> IResult<&'a [u8], Vec, Error<&'a [u8]>> { +fn text<'a>(original: &Bytes, input: &'a [u8]) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { map( recognize(take_while1(|c| !is_opening_bracket(c))), - |s: &[u8]| vec![Element::Text(slice_as_bytes(original, s))], + |s: &[u8]| ParseResult::Single(Element::Text(slice_as_bytes(original, s))), )(input) } @@ -1057,9 +1094,9 @@ fn operator(input: &[u8]) -> IResult<&[u8], Operator, Error<&[u8]>> { ))(input) } -fn interpolated_expression(input: &[u8]) -> IResult<&[u8], Vec, Error<&[u8]>> { +fn interpolated_expression(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { map(alt((esi_function, esi_variable)), |expr| { - vec![Element::Expr(expr)] + ParseResult::Single(Element::Expr(expr)) })(input) } @@ -1210,12 +1247,10 @@ exception! let bytes = Bytes::from_static(input); let (rest, x) = html_script_tag(&bytes, input).unwrap(); assert_eq!(rest.len(), 0); - assert_eq!( + assert!(matches!( x, - [Element::Html(Bytes::from_static( - b"" - ))] - ); + ParseResult::Single(Element::Html(ref h)) if h.as_ref() == b"" + )); } #[test] fn test_parse_script_with_src() { @@ -1223,12 +1258,10 @@ exception! let bytes = Bytes::from_static(input); let (rest, x) = html_script_tag(&bytes, input).unwrap(); assert_eq!(rest.len(), 0); - assert_eq!( + assert!(matches!( x, - [Element::Html(Bytes::from_static( - b"" - ))] - ); + ParseResult::Single(Element::Html(ref h)) if h.as_ref() == b"" + )); } #[test] fn test_parse_esi_vars_short() { @@ -1236,14 +1269,16 @@ exception! let bytes = Bytes::from_static(input); let (rest, x) = esi_vars(&bytes, input).unwrap(); assert_eq!(rest.len(), 0); - assert_eq!( - x, - [Element::Expr(Expr::Variable( - "hello".to_string(), - None, - None - )),] - ); + // esi_vars returns Multiple when parsing short form with expression + if let ParseResult::Multiple(elements) = x { + assert_eq!(elements.len(), 1); + assert!(matches!( + &elements[0], + Element::Expr(Expr::Variable(name, None, None)) if name == "hello" + )); + } else { + panic!("Expected ParseResult::Multiple"); + } } #[test] fn test_parse_esi_vars_long() { @@ -1350,11 +1385,11 @@ exception! $(QUERY_STRING{param}) $(QUERY_STRING{$(keyVar)})
"#; - eprintln!("Testing esi_vars on input: {:?}", input); let bytes = Bytes::from_static(input); let result = esi_vars(&bytes, input); - eprintln!("Result: {:?}", result); assert!(result.is_ok(), "esi_vars should parse: {:?}", result.err()); + let (rest, _) = result.unwrap(); + assert_eq!(rest.len(), 0, "Should consume all input"); } #[test] @@ -1539,4 +1574,50 @@ exception! panic!("Expected Assign tag"); } } + #[test] + fn test_unclosed_script_tag() { + // Unclosed script tag - should handle gracefully + let input = b" IResult<&[u8], &[u8], Error<&[u8]>> { - // recognize(many_till(take(1usize), peek(tag_no_case(b"( // Parse opening tag let (input, _) = recognize(delimited( - streaming_bytes::tag_no_case(b"( let (input, _) = opt(tuple(( script_content, recognize(delimited( - streaming_bytes::tag_no_case(b"( input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { // Reject ESI closing tags before trying to parse - let (_, _) = peek(not(streaming_bytes::tag(b"( } // ============================================================================ -// Byte Predicate Constants and Helpers +// Byte Predicate Helpers // ============================================================================ -/// Common byte constants for parsing -const OPEN_BRACKET: u8 = b'<'; -const CLOSE_BRACKET: u8 = b'>'; -const DOLLAR: u8 = b'$'; -const DOUBLE_QUOTE: u8 = b'"'; -const SINGLE_QUOTE: u8 = b'\''; -const SLASH: u8 = b'/'; -const EQUALS: u8 = b'='; - /// Check if byte is the opening bracket '<' #[inline] const fn is_open_bracket(b: u8) -> bool { @@ -1418,17 +1410,20 @@ const fn is_dollar(b: u8) -> bool { } #[inline] const fn is_alphanumeric_or_underscore(c: u8) -> bool { - c.is_ascii_alphanumeric() || c == b'_' + c.is_ascii_alphanumeric() || c == UNDERSCORE } #[inline] const fn is_lower_alphanumeric_or_underscore(c: u8) -> bool { - c.is_ascii_lowercase() || c.is_ascii_digit() || c == b'_' + c.is_ascii_lowercase() || c.is_ascii_digit() || c == UNDERSCORE } fn esi_fn_name(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { map( - preceded(tag(b"$"), take_while1(is_lower_alphanumeric_or_underscore)), + preceded( + tag(&[DOLLAR]), + take_while1(is_lower_alphanumeric_or_underscore), + ), bytes_to_string, )(input) } @@ -1437,8 +1432,12 @@ fn esi_var_name(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { map( tuple(( take_while1(is_alphanumeric_or_underscore), - opt(delimited(tag(b"{"), esi_var_key_expr, tag(b"}"))), - opt(preceded(tag(b"|"), fn_nested_argument)), + opt(delimited( + tag(&[OPEN_BRACE]), + esi_var_key_expr, + tag(&[CLOSE_BRACE]), + )), + opt(preceded(tag(PIPE), fn_nested_argument)), )), |(name, key, default): (&[u8], _, _)| { Expr::Variable( @@ -1452,7 +1451,9 @@ fn esi_var_name(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { fn not_dollar_or_curlies(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { map( - take_while(|c| !is_dollar(c) && c != b'{' && c != b'}' && c != b',' && c != DOUBLE_QUOTE), + take_while(|c| { + !is_dollar(c) && c != OPEN_BRACE && c != CLOSE_BRACE && c != COMMA && c != DOUBLE_QUOTE + }), bytes_to_string, )(input) } @@ -1460,13 +1461,21 @@ fn not_dollar_or_curlies(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { // TODO: handle escaping fn single_quoted_string(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { map( - delimited(tag(b"'"), take_while(|c| !is_single_quote(c)), tag(b"'")), + delimited( + tag(&[SINGLE_QUOTE]), + take_while(|c| !is_single_quote(c)), + tag(&[SINGLE_QUOTE]), + ), bytes_to_string, )(input) } fn triple_quoted_string(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { map( - delimited(tag(b"'''"), take_until("'''"), tag(b"'''")), + delimited( + tag(QUOTE_TRIPLE), + take_until(QUOTE_TRIPLE), + tag(QUOTE_TRIPLE), + ), bytes_to_string, )(input) } @@ -1504,7 +1513,7 @@ fn esi_var_key_expr(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { fn fn_argument(input: &[u8]) -> IResult<&[u8], Vec, Error<&[u8]>> { let (input, mut parsed) = separated_list0( - tuple((multispace0, tag(b","), multispace0)), + tuple((multispace0, tag(&[COMMA]), multispace0)), fn_nested_argument, )(input)?; @@ -1522,7 +1531,7 @@ fn fn_nested_argument(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { fn integer(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { map_res( recognize(tuple(( - opt(tag(b"-")), + opt(tag(&[HYPHEN])), take_while1(|c: u8| c.is_ascii_digit()), ))), |s: &[u8]| String::from_utf8_lossy(s).parse::().map(Expr::Integer), @@ -1540,9 +1549,9 @@ fn esi_function(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { let (input, parsed) = tuple(( esi_fn_name, delimited( - terminated(tag(b"("), multispace0), + terminated(tag(OPEN_PAREN), multispace0), fn_argument, - preceded(multispace0, tag(b")")), + preceded(multispace0, tag(CLOSE_PAREN)), ), ))(input)?; @@ -1552,7 +1561,7 @@ fn esi_function(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { } fn esi_variable(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { - delimited(tag(b"$("), esi_var_name, tag(b")"))(input) + delimited(tag(VAR_OPEN), esi_var_name, tag(CLOSE_PAREN))(input) } /// Parse all binary operators @@ -1560,24 +1569,24 @@ fn esi_variable(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { fn operator(input: &[u8]) -> IResult<&[u8], Operator, Error<&[u8]>> { alt(( // Longer operators first to avoid partial matches - map(tag(b"matches_i"), |_| Operator::MatchesInsensitive), - map(tag(b"matches"), |_| Operator::Matches), - map(tag(b"has_i"), |_| Operator::HasInsensitive), - map(tag(b"has"), |_| Operator::Has), - map(tag(b"=="), |_| Operator::Equals), - map(tag(b"!="), |_| Operator::NotEquals), - map(tag(b"<="), |_| Operator::LessThanOrEqual), - map(tag(b">="), |_| Operator::GreaterThanOrEqual), - map(tag(b"<"), |_| Operator::LessThan), - map(tag(b">"), |_| Operator::GreaterThan), - map(tag(b"&&"), |_| Operator::And), - map(tag(b"||"), |_| Operator::Or), + map(tag(OP_MATCHES_I), |_| Operator::MatchesInsensitive), + map(tag(OP_MATCHES), |_| Operator::Matches), + map(tag(OP_HAS_I), |_| Operator::HasInsensitive), + map(tag(OP_HAS), |_| Operator::Has), + map(tag(OP_EQUALS_COMP), |_| Operator::Equals), + map(tag(OP_NOT_EQUALS), |_| Operator::NotEquals), + map(tag(OP_LESS_EQUAL), |_| Operator::LessThanOrEqual), + map(tag(OP_GREATER_EQUAL), |_| Operator::GreaterThanOrEqual), + map(tag(&[OPEN_BRACKET]), |_| Operator::LessThan), + map(tag(&[CLOSE_BRACKET]), |_| Operator::GreaterThan), + map(tag(OP_AND), |_| Operator::And), + map(tag(OP_OR), |_| Operator::Or), // Arithmetic operators (after comparison to avoid conflicts with <=, >=) - map(tag(b"+"), |_| Operator::Add), - map(tag(b"-"), |_| Operator::Subtract), - map(tag(b"*"), |_| Operator::Multiply), - map(tag(b"/"), |_| Operator::Divide), - map(tag(b"%"), |_| Operator::Modulo), + map(tag(PLUS), |_| Operator::Add), + map(tag(&[HYPHEN]), |_| Operator::Subtract), + map(tag(ASTERISK), |_| Operator::Multiply), + map(tag(SLASH), |_| Operator::Divide), + map(tag(PERCENT), |_| Operator::Modulo), ))(input) } @@ -1598,15 +1607,18 @@ fn interpolated_expression(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[ fn dict_literal(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { map( delimited( - tag(b"{"), + tag(&[OPEN_BRACE]), separated_list0( - tuple((multispace0, tag(b","), multispace0)), + tuple((multispace0, tag(&[COMMA]), multispace0)), tuple(( delimited(multispace0, primary_expr, multispace0), - preceded(tag(b":"), delimited(multispace0, primary_expr, multispace0)), + preceded( + tag(&[COLON]), + delimited(multispace0, primary_expr, multispace0), + ), )), ), - preceded(multispace0, tag(b"}")), + preceded(multispace0, tag(&[CLOSE_BRACE])), ), Expr::DictLiteral, )(input) @@ -1615,12 +1627,12 @@ fn dict_literal(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { fn list_literal(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { map( delimited( - tag(b"["), + tag(&[OPEN_SQ_BRACKET]), separated_list0( - tuple((multispace0, tag(b","), multispace0)), + tuple((multispace0, tag(&[COMMA]), multispace0)), delimited(multispace0, primary_expr, multispace0), ), - preceded(multispace0, tag(b"]")), + preceded(multispace0, tag(CLOSE_SQ_BRACKET)), ), Expr::ListLiteral, )(input) @@ -1632,9 +1644,9 @@ fn primary_expr(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { alt(( // Parse grouped expression: (expr) delimited( - tag(b"("), + tag(OPEN_PAREN), delimited(multispace0, expr, multispace0), - tag(b")"), + tag(CLOSE_PAREN), ), // Parse dictionary literal: {key:value, key:value} dict_literal, @@ -1676,7 +1688,7 @@ fn unary_expr(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { alt(( // Parse negation: !expr (recursively handles multiple !) map( - preceded(tag(b"!"), preceded(multispace0, unary_expr)), + preceded(tag(&[EXCLAMATION]), preceded(multispace0, unary_expr)), |expr| Expr::Not(Box::new(expr)), ), // Otherwise parse primary expression From c81c3d754d08572a8a119b847729de796b581d49 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Thu, 26 Feb 2026 11:17:18 -0600 Subject: [PATCH 078/119] refactor(include): streamline handling of include parameters and attributes --- esi/src/lib.rs | 21 ++++++++------------- esi/src/parser.rs | 17 +++++++++-------- esi/src/parser_types.rs | 6 +++--- esi/tests/parser.rs | 38 +++++++++++++++++++------------------- 4 files changed, 39 insertions(+), 43 deletions(-) diff --git a/esi/src/lib.rs b/esi/src/lib.rs index 97d7f8a..845e92f 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -563,11 +563,10 @@ impl Processor { /// Handle esi:include tag fn handle_include( &mut self, - params: &[(String, Expr)], attrs: &parser_types::IncludeAttributes, dispatcher: &FragmentRequestDispatcher, ) -> Result { - let queued_element = self.process_include_tag(params, attrs, dispatcher)?; + let queued_element = self.process_include_tag(attrs, dispatcher)?; self.queue.push_back(queued_element); Ok(false) } @@ -675,9 +674,9 @@ impl Processor { } } } - parser_types::Element::Esi(parser_types::Tag::Include { params, attrs }) => { + parser_types::Element::Esi(parser_types::Tag::Include { attrs }) => { // Dispatch the include and add to queue - let queued_element = self.process_include_tag(¶ms, &attrs, dispatcher)?; + let queued_element = self.process_include_tag(&attrs, dispatcher)?; queue.push(queued_element); } parser_types::Element::Esi(parser_types::Tag::Choose { @@ -792,9 +791,7 @@ impl Processor { Element::Esi(Tag::Vars { name }) => self.handle_vars(name), - Element::Esi(Tag::Include { params, attrs }) => { - self.handle_include(¶ms, &attrs, dispatcher) - } + Element::Esi(Tag::Include { attrs }) => self.handle_include(&attrs, dispatcher), Element::Esi(Tag::Choose { when_branches, @@ -834,18 +831,16 @@ impl Processor { /// Returns a `QueuedElement` ready to be added to any queue (main/attempt/except) fn process_include_tag( &mut self, - params: &[(String, Expr)], attrs: &parser_types::IncludeAttributes, dispatcher: &FragmentRequestDispatcher, ) -> Result { - self.dispatch_include_to_element(params, attrs, dispatcher) + self.dispatch_include_to_element(attrs, dispatcher) } /// Dispatch an include and return a `QueuedElement` (for flexible queue insertion) /// This is the single source of truth for include dispatching logic fn dispatch_include_to_element( &mut self, - params: &[(String, Expr)], attrs: &parser_types::IncludeAttributes, dispatcher: &FragmentRequestDispatcher, ) -> Result { @@ -862,15 +857,15 @@ impl Processor { // Evaluate params and append to URL // Use Cow to avoid allocation when params are empty and bytes are valid UTF-8 - let final_src = if params.is_empty() { + let final_src = if attrs.params.is_empty() { src_bytes } else { let url_cow = String::from_utf8_lossy(&src_bytes); - let mut url = String::with_capacity(url_cow.len() + params.len() * 20); + let mut url = String::with_capacity(url_cow.len() + attrs.params.len() * 20); url.push_str(&url_cow); let mut separator = if url.contains('?') { '&' } else { '?' }; - for (name, value_expr) in params { + for (name, value_expr) in &attrs.params { let value = self.evaluate_expr_to_bytes(value_expr)?; let value_str = String::from_utf8_lossy(&value); // Direct string building is more efficient than format! diff --git a/esi/src/parser.rs b/esi/src/parser.rs index 1fddf01..c2292cd 100644 --- a/esi/src/parser.rs +++ b/esi/src/parser.rs @@ -986,7 +986,10 @@ fn esi_include(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { } /// Helper to extract include attributes from the attributes `HashMap` -fn extract_include_attrs(mut attrs: HashMap) -> IncludeAttributes { +fn extract_include_attrs( + mut attrs: HashMap, + params: Vec<(String, Expr)>, +) -> IncludeAttributes { let src = parse_attr_as_expr(take_attr(&mut attrs, "src")); let alt = take_attr_opt(&mut attrs, "alt").map(parse_attr_as_expr); let continue_on_error = attrs.get("onerror").is_some_and(|s| s == "continue"); @@ -1044,6 +1047,7 @@ fn extract_include_attrs(mut attrs: HashMap) -> IncludeAttribute appendheaders, removeheaders, setheaders, + params, } } @@ -1055,12 +1059,9 @@ fn esi_include_self_closing(input: &[u8]) -> IResult<&[u8], ParseResult, Error<& preceded(streaming_char::multispace0, streaming_self_closing), ), |attrs| { - let attrs = extract_include_attrs(attrs); + let attrs = extract_include_attrs(attrs, Vec::new()); - ParseResult::Single(Element::Esi(Tag::Include { - params: Vec::new(), - attrs, - })) + ParseResult::Single(Element::Esi(Tag::Include { attrs })) }, )(input) } @@ -1080,9 +1081,9 @@ fn esi_include_with_params(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[ ), )), |(attrs, params, _)| { - let attrs = extract_include_attrs(attrs); + let attrs = extract_include_attrs(attrs, params); - ParseResult::Single(Element::Esi(Tag::Include { params, attrs })) + ParseResult::Single(Element::Esi(Tag::Include { attrs })) }, )(input) } diff --git a/esi/src/parser_types.rs b/esi/src/parser_types.rs index 5c81438..36c1da9 100644 --- a/esi/src/parser_types.rs +++ b/esi/src/parser_types.rs @@ -25,6 +25,8 @@ pub struct IncludeAttributes { pub removeheaders: Vec, /// Headers to set on the request (replaces existing) pub setheaders: Vec<(String, Expr)>, + /// Child elements for query parameters + pub params: Vec<(String, Expr)>, } /// Represents a single when branch in a choose block @@ -38,9 +40,7 @@ pub struct WhenBranch { #[derive(Debug, PartialEq, Clone)] pub enum Tag { Include { - /// Child elements (not attributes) - params: Vec<(String, Expr)>, - /// All include tag attributes + /// All include tag attributes (including params) attrs: IncludeAttributes, }, Try { diff --git a/esi/tests/parser.rs b/esi/tests/parser.rs index 7d00143..df122b6 100644 --- a/esi/tests/parser.rs +++ b/esi/tests/parser.rs @@ -15,11 +15,11 @@ fn test_parse_basic_include() { // Find the Include tag let include_found = elements.iter().any(|element| { matches!(element, esi::parser_types::Element::Esi( - esi::parser_types::Tag::Include { attrs, params, .. } + esi::parser_types::Tag::Include { attrs, .. } ) if matches!(&attrs.src, esi::parser_types::Expr::String(Some(s)) if s == "https://example.com/hello") && attrs.alt.is_none() && !attrs.continue_on_error - && params.is_empty()) + && attrs.params.is_empty()) }); assert!( @@ -38,11 +38,11 @@ fn test_parse_include_with_alt_and_onerror() { let include_found = elements.iter().any(|element| { matches!(element, esi::parser_types::Element::Esi( - esi::parser_types::Tag::Include { attrs, params, .. } + esi::parser_types::Tag::Include { attrs, .. } ) if matches!(&attrs.src, esi::parser_types::Expr::String(Some(s)) if s == "abc") && matches!(&attrs.alt, Some(esi::parser_types::Expr::String(Some(a))) if a == "def") && attrs.continue_on_error - && params.is_empty()) + && attrs.params.is_empty()) }); assert!( @@ -82,11 +82,11 @@ fn test_parse_include_with_onerror() { let include_found = elements.iter().any(|element| { matches!(element, esi::parser_types::Element::Esi( - esi::parser_types::Tag::Include { attrs, params, .. } + esi::parser_types::Tag::Include { attrs, .. } ) if matches!(&attrs.src, esi::parser_types::Expr::String(Some(s)) if s == "/_fragments/content.html") && attrs.alt.is_none() && attrs.continue_on_error - && params.is_empty()) + && attrs.params.is_empty()) }); assert!(include_found, "Should find Include with onerror=continue"); @@ -104,13 +104,13 @@ fn test_parse_include_with_single_param() { let include_found = elements.iter().any(|element| { matches!(element, esi::parser_types::Element::Esi( - esi::parser_types::Tag::Include { attrs, params, .. } + esi::parser_types::Tag::Include { attrs, .. } ) if matches!(&attrs.src, esi::parser_types::Expr::String(Some(s)) if s == "/fragment") && attrs.alt.is_none() && !attrs.continue_on_error - && params.len() == 1 - && params[0].0 == "foo" - && matches!(¶ms[0].1, esi::parser_types::Expr::String(Some(v)) if v == "bar")) + && attrs.params.len() == 1 + && attrs.params[0].0 == "foo" + && matches!(&attrs.params[0].1, esi::parser_types::Expr::String(Some(v)) if v == "bar")) }); assert!(include_found, "Should find Include with one param"); @@ -130,14 +130,14 @@ fn test_parse_include_with_multiple_params() { let include_found = elements.iter().any(|element| { matches!(element, esi::parser_types::Element::Esi( - esi::parser_types::Tag::Include { attrs, params, .. } + esi::parser_types::Tag::Include { attrs, .. } ) if matches!(&attrs.src, esi::parser_types::Expr::String(Some(s)) if s == "/fragment") && matches!(&attrs.alt, Some(esi::parser_types::Expr::String(Some(a))) if a == "/fallback") && attrs.continue_on_error - && params.len() == 3 - && params[0].0 == "user" && matches!(¶ms[0].1, esi::parser_types::Expr::String(Some(v)) if v == "alice") - && params[1].0 == "role" && matches!(¶ms[1].1, esi::parser_types::Expr::String(Some(v)) if v == "admin") - && params[2].0 == "id" && matches!(¶ms[2].1, esi::parser_types::Expr::Integer(123))) + && attrs.params.len() == 3 + && attrs.params[0].0 == "user" && matches!(&attrs.params[0].1, esi::parser_types::Expr::String(Some(v)) if v == "alice") + && attrs.params[1].0 == "role" && matches!(&attrs.params[1].1, esi::parser_types::Expr::String(Some(v)) if v == "admin") + && attrs.params[2].0 == "id" && matches!(&attrs.params[2].1, esi::parser_types::Expr::Integer(123))) }); assert!(include_found, "Should find Include with multiple params"); @@ -153,8 +153,8 @@ fn test_parse_include_self_closing_has_no_params() { let include_found = elements.iter().any(|element| { matches!(element, esi::parser_types::Element::Esi( - esi::parser_types::Tag::Include { attrs, params, .. } - ) if matches!(&attrs.src, esi::parser_types::Expr::String(Some(s)) if s == "/test") && params.is_empty()) + esi::parser_types::Tag::Include { attrs, .. } + ) if matches!(&attrs.src, esi::parser_types::Expr::String(Some(s)) if s == "/test") && attrs.params.is_empty()) }); assert!(include_found, "Self-closing include should have no params"); @@ -201,10 +201,10 @@ fn test_parse_param_value_with_variable_expression() { // Check what the param value looks like let include_found = elements.iter().find_map(|element| { - if let esi::parser_types::Element::Esi(esi::parser_types::Tag::Include { params, .. }) = + if let esi::parser_types::Element::Esi(esi::parser_types::Tag::Include { attrs, .. }) = element { - Some(params) + Some(&attrs.params) } else { None } From 80e4927595f710be22ad15f3d89eb3690351ee62 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Thu, 26 Feb 2026 11:55:20 -0600 Subject: [PATCH 079/119] feat(cache): add uncacheable flag to EvalContext and update cache control logic --- esi/src/expression.rs | 66 +++++++++++++++++++++++++++++++++++++++++-- esi/src/functions.rs | 2 +- esi/src/lib.rs | 3 +- 3 files changed, 66 insertions(+), 5 deletions(-) diff --git a/esi/src/expression.rs b/esi/src/expression.rs index b6a99de..fe2ad6a 100644 --- a/esi/src/expression.rs +++ b/esi/src/expression.rs @@ -304,6 +304,8 @@ pub struct EvalContext { query_params_cache: std::cell::RefCell>>>, /// Minimum TTL seen across all cached includes (in seconds) for rendered document cacheability min_ttl: Option, + /// Flag indicating if the rendered document should not be cached (due to `private`/`no-cache`/`Set-Cookie` in any include) + is_uncacheable: bool, } impl Default for EvalContext { fn default() -> Self { @@ -317,6 +319,7 @@ impl Default for EvalContext { response_body_override: None, query_params_cache: std::cell::RefCell::new(None), min_ttl: None, + is_uncacheable: false, } } } @@ -335,6 +338,7 @@ impl EvalContext { response_body_override: None, query_params_cache: std::cell::RefCell::new(None), min_ttl: None, + is_uncacheable: false, } } @@ -421,7 +425,7 @@ impl EvalContext { Value::Null } else { let mut dict = HashMap::new(); - for (key, values) in params.iter() { + for (key, values) in params { let value = match values.len() { 0 => Value::Null, 1 => Value::Text(values[0].clone()), @@ -516,8 +520,17 @@ impl EvalContext { self.min_ttl = Some(self.min_ttl.map_or(ttl, |current_min| current_min.min(ttl))); } + /// Mark the rendered document as uncacheable (e.g., when an include has Set-Cookie or Cache-Control: private) + pub fn mark_document_uncacheable(&mut self) { + self.is_uncacheable = true; + } + /// Get the cache control header value for the rendered document pub fn cache_control_header(&self, rendered_ttl: Option) -> Option { + // If any include was uncacheable (private, no-cache, set-cookie), mark document as uncacheable + if self.is_uncacheable { + return Some("private, no-cache".to_string()); + } let ttl = rendered_ttl.or(self.min_ttl)?; Some(format!("public, max-age={ttl}")) } @@ -668,7 +681,7 @@ impl From for Value { impl Display for Value { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Integer(i) => write!(f, "{}", i), + Self::Integer(i) => write!(f, "{i}"), Self::Text(b) => write!(f, "{}", String::from_utf8_lossy(b.as_ref())), Self::Boolean(b) => write!(f, "{}", if *b { "true" } else { "false" }), Self::List(items) => write!(f, "{}", items_to_string(items)), @@ -690,7 +703,7 @@ fn items_to_string(items: &[Value]) -> String { } fn dict_to_string(map: &HashMap) -> String { - let mut parts: Vec<_> = map.iter().map(|(k, v)| format!("{k}={}", v)).collect(); + let mut parts: Vec<_> = map.iter().map(|(k, v)| format!("{k}={v}")).collect(); parts.sort(); parts.join("&") } @@ -1356,4 +1369,51 @@ mod tests { let result = ctx.get_variable("QUERY_STRING", Some("nonexistent")); assert_eq!(result, Value::Null); } + + #[test] + fn test_cache_control_header_uncacheable() { + let mut ctx = EvalContext::new(); + + // Test that marking document uncacheable returns private, no-cache + ctx.mark_document_uncacheable(); + assert_eq!( + ctx.cache_control_header(None), + Some("private, no-cache".to_string()) + ); + + // Even with rendered_ttl set, uncacheable should take precedence + assert_eq!( + ctx.cache_control_header(Some(600)), + Some("private, no-cache".to_string()) + ); + } + + #[test] + fn test_cache_control_header_with_min_ttl() { + let mut ctx = EvalContext::new(); + + // Test with no TTL set + assert_eq!(ctx.cache_control_header(None), None); + + // Test with min_ttl set + ctx.update_cache_min_ttl(300); + assert_eq!( + ctx.cache_control_header(None), + Some("public, max-age=300".to_string()) + ); + + // Test with rendered_ttl override + assert_eq!( + ctx.cache_control_header(Some(600)), + Some("public, max-age=600".to_string()) + ); + + // Test that min_ttl tracks minimum across updates + ctx.update_cache_min_ttl(600); + ctx.update_cache_min_ttl(200); + assert_eq!( + ctx.cache_control_header(None), + Some("public, max-age=200".to_string()) + ); + } } diff --git a/esi/src/functions.rs b/esi/src/functions.rs index 9626ab0..e346805 100644 --- a/esi/src/functions.rs +++ b/esi/src/functions.rs @@ -583,7 +583,7 @@ pub fn time(args: &[Value]) -> Result { .map_err(|_| ExecutionError::FunctionError("system time before UNIX_EPOCH".to_string()))? .as_secs(); - let clamped = secs.min(i32::MAX as u64) as i32; + let clamped = i32::try_from(secs).unwrap_or(i32::MAX); Ok(Value::Integer(clamped)) } diff --git a/esi/src/lib.rs b/esi/src/lib.rs index 845e92f..316ddf7 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -1174,7 +1174,8 @@ impl Processor { Some(ttl) } Ok(None) => { - debug!("Response not cacheable"); + debug!("Response not cacheable (private/no-cache/set-cookie)"); + self.ctx.mark_document_uncacheable(); None } Err(e) => { From 8bd36de40145aa0efd588c776063e4d96d686322 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Thu, 26 Feb 2026 12:12:01 -0600 Subject: [PATCH 080/119] feat(range): implement range operator for list creation and add corresponding tests, cleaned up literals --- esi/src/expression.rs | 129 ++++++++++++++++++++++++++++++++++++++++ esi/src/literals.rs | 11 ++-- esi/src/parser.rs | 46 ++++++++++---- esi/src/parser_types.rs | 2 + esi/tests/esi-tests.rs | 40 +++++++++++++ 5 files changed, 211 insertions(+), 17 deletions(-) diff --git a/esi/src/expression.rs b/esi/src/expression.rs index fe2ad6a..7102ec5 100644 --- a/esi/src/expression.rs +++ b/esi/src/expression.rs @@ -132,6 +132,25 @@ fn eval_comparison( ctx: &mut EvalContext, ) -> Result { match operator { + Operator::Range => { + // Range operator creates a list: [start..end] + // Both operands must be integers + match (left_val, right_val) { + (Value::Integer(start), Value::Integer(end)) => { + let values: Vec = if start <= end { + // Ascending range: [1..5] -> [1, 2, 3, 4, 5] + (*start..=*end).map(Value::Integer).collect() + } else { + // Descending range: [5..1] -> [5, 4, 3, 2, 1] + (*end..=*start).rev().map(Value::Integer).collect() + }; + Ok(Value::List(values)) + } + _ => Err(ExecutionError::ExpressionError( + "Range operator (..) requires integer operands".to_string(), + )), + } + } Operator::Matches | Operator::MatchesInsensitive => { let test = left_val.to_string(); let pattern = right_val.to_string(); @@ -1416,4 +1435,114 @@ mod tests { Some("public, max-age=200".to_string()) ); } + + #[test] + fn test_range_operator_ascending() -> Result<()> { + let result = evaluate_expression("[1..5]", &mut EvalContext::new())?; + assert_eq!( + result, + Value::List(vec![ + Value::Integer(1), + Value::Integer(2), + Value::Integer(3), + Value::Integer(4), + Value::Integer(5), + ]) + ); + Ok(()) + } + + #[test] + fn test_range_operator_descending() -> Result<()> { + let result = evaluate_expression("[5..1]", &mut EvalContext::new())?; + assert_eq!( + result, + Value::List(vec![ + Value::Integer(5), + Value::Integer(4), + Value::Integer(3), + Value::Integer(2), + Value::Integer(1), + ]) + ); + Ok(()) + } + + #[test] + fn test_range_operator_single_element() -> Result<()> { + let result = evaluate_expression("[3..3]", &mut EvalContext::new())?; + assert_eq!(result, Value::List(vec![Value::Integer(3)])); + Ok(()) + } + + #[test] + fn test_range_operator_with_variables() -> Result<()> { + let result = evaluate_expression( + "[$(start)..$(end)]", + &mut EvalContext::from([ + ("start".to_string(), Value::Integer(1)), + ("end".to_string(), Value::Integer(10)), + ]), + )?; + assert_eq!( + result, + Value::List(vec![ + Value::Integer(1), + Value::Integer(2), + Value::Integer(3), + Value::Integer(4), + Value::Integer(5), + Value::Integer(6), + Value::Integer(7), + Value::Integer(8), + Value::Integer(9), + Value::Integer(10), + ]) + ); + Ok(()) + } + + #[test] + fn test_range_operator_in_expression() -> Result<()> { + // Test that range can be part of a list literal expression + let result = evaluate_expression("[1..3]", &mut EvalContext::new())?; + if let Value::List(items) = result { + assert_eq!(items.len(), 3); + assert_eq!(items[0], Value::Integer(1)); + assert_eq!(items[1], Value::Integer(2)); + assert_eq!(items[2], Value::Integer(3)); + } else { + panic!("Expected a list"); + } + Ok(()) + } + + #[test] + fn test_range_operator_negative_numbers() -> Result<()> { + let result = evaluate_expression("[-2..2]", &mut EvalContext::new())?; + assert_eq!( + result, + Value::List(vec![ + Value::Integer(-2), + Value::Integer(-1), + Value::Integer(0), + Value::Integer(1), + Value::Integer(2), + ]) + ); + Ok(()) + } + + #[test] + fn test_range_operator_requires_integers() { + let result = evaluate_expression( + "['a'..'z']", + &mut EvalContext::new(), + ); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("requires integer operands")); + } } diff --git a/esi/src/literals.rs b/esi/src/literals.rs index 98b263a..6fc6d35 100644 --- a/esi/src/literals.rs +++ b/esi/src/literals.rs @@ -31,7 +31,6 @@ pub const OPEN_BRACKET: u8 = b'<'; pub const CLOSE_BRACKET: u8 = b'>'; // Multi-byte tag sequences -pub const SLASH: &[u8] = b"/"; pub const TAG_SELF_CLOSE: &[u8] = b"/>"; pub const TAG_OPEN_CLOSE: &[u8] = b" IResult<&[u8], Operator, Error<&[u8]>> { map(tag(OP_AND), |_| Operator::And), map(tag(OP_OR), |_| Operator::Or), // Arithmetic operators (after comparison to avoid conflicts with <=, >=) - map(tag(PLUS), |_| Operator::Add), + map(tag(OP_ADD), |_| Operator::Add), map(tag(&[HYPHEN]), |_| Operator::Subtract), - map(tag(ASTERISK), |_| Operator::Multiply), - map(tag(SLASH), |_| Operator::Divide), - map(tag(PERCENT), |_| Operator::Modulo), + map(tag(OP_MULTIPLY), |_| Operator::Multiply), + map(tag(OP_DIVIDE), |_| Operator::Divide), + map(tag(OP_MODULO), |_| Operator::Modulo), + // Note: Range (..) is NOT in the general operator list - it's only parsed in list literals ))(input) } @@ -1626,16 +1627,35 @@ fn dict_literal(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { } fn list_literal(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { - map( - delimited( - tag(&[OPEN_SQ_BRACKET]), - separated_list0( - tuple((multispace0, tag(&[COMMA]), multispace0)), - delimited(multispace0, primary_expr, multispace0), + delimited( + tag(&[OPEN_SQ_BRACKET]), + alt(( + // Try range first: [start..end] + map( + tuple(( + delimited(multispace0, primary_expr, multispace0), + tag(OP_RANGE), + delimited(multispace0, primary_expr, multispace0), + )), + |(start, _, end)| { + // Create a Comparison expression with Range operator + Expr::Comparison { + left: Box::new(start), + operator: Operator::Range, + right: Box::new(end), + } + }, ), - preceded(multispace0, tag(CLOSE_SQ_BRACKET)), - ), - Expr::ListLiteral, + // Otherwise parse as regular list: [item, item, ...] + map( + separated_list0( + tuple((multispace0, tag(&[COMMA]), multispace0)), + delimited(multispace0, primary_expr, multispace0), + ), + Expr::ListLiteral, + ), + )), + preceded(multispace0, tag(CLOSE_SQ_BRACKET)), )(input) } diff --git a/esi/src/parser_types.rs b/esi/src/parser_types.rs index 36c1da9..32049ad 100644 --- a/esi/src/parser_types.rs +++ b/esi/src/parser_types.rs @@ -125,4 +125,6 @@ pub enum Operator { Multiply, Divide, Modulo, + // Range operator (for list creation) + Range, } diff --git a/esi/tests/esi-tests.rs b/esi/tests/esi-tests.rs index b8acee5..33634b9 100644 --- a/esi/tests/esi-tests.rs +++ b/esi/tests/esi-tests.rs @@ -1009,6 +1009,46 @@ fn test_foreach_dict_literal() { ); } +// Test foreach with range operator +#[test] +fn test_foreach_with_range() { + init_logs(); + let input = r#"$(n) "#; + let req = Request::get("http://example.com/test"); + let result = process_esi_document(input, req).expect("Processing should succeed"); + assert_eq!( + result, "1 2 3 4 5 6 7 8 9 10 ", + "Should iterate from 1 to 10" + ); +} + +// Test foreach with descending range +#[test] +fn test_foreach_with_range_descending() { + init_logs(); + let input = r#"$(n),"#; + let req = Request::get("http://example.com/test"); + let result = process_esi_document(input, req).expect("Processing should succeed"); + assert_eq!(result, "5,4,3,2,1,", "Should iterate from 5 down to 1"); +} + +// Test foreach with range and variables +#[test] +fn test_foreach_with_range_variables() { + init_logs(); + let input = r#" + + + $(i) + "#; + let req = Request::get("http://example.com/test"); + let result = process_esi_document(input, req).expect("Processing should succeed"); + assert!( + result.contains("1 2 3 4 5"), + "Should use variable-based range" + ); +} + // Test nested foreach with break - ensure break only affects inner loop #[test] fn test_nested_foreach_with_break() { From 24cf69d5dea5f97951cbd199026e6d45e321b202 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Thu, 26 Feb 2026 21:44:01 -0600 Subject: [PATCH 081/119] feat(eval): introduce tag with dynamic content assembly support and corresponding tests --- README.md | 66 +++++++- esi/src/expression.rs | 19 +-- esi/src/lib.rs | 152 ++++++++++++++++++- esi/src/literals.rs | 3 + esi/src/parser.rs | 58 ++++++- esi/src/parser_types.rs | 16 ++ esi/tests/eval_tests.rs | 324 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 622 insertions(+), 16 deletions(-) create mode 100644 esi/tests/eval_tests.rs diff --git a/README.md b/README.md index 7f773eb..46e6d6e 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ This crate provides a streaming Edge Side Includes parser and executor designed The implementation is a subset of Akamai ESI 5.0 supporting the following tags: -- `` (+ `alt`, `onerror="continue"`) +- `` +- `` - evaluates included content as ESI - `` | `` | `` - `` | `` (with subscript support for dict/list assignment) - `` | `` | `` @@ -14,6 +15,69 @@ The implementation is a subset of Akamai ESI 5.0 supporting the following tags: **Note:** The following tags support nested ESI tags: ``, ``, ``, ``, ``, ``, ``, and `` (long form only). +**Dynamic Content Assembly (DCA)**: Both `` and `` support the `dca` attribute: + +- `dca="none"` (default): For `include`, inserts raw content without ESI processing. For `eval`, fragment executes in parent's context (variables shared). +- `dca="esi"`: Two-phase processing: fragment is first processed in an isolated context, then the output is processed in parent's context (variables from phase 1 don't leak, but output can contain ESI tags). + +**Include vs Eval**: + +- ``: Fetches content from origin + - `dca="none"`: Inserts content verbatim (no ESI processing) + - `dca="esi"`: Parses and evaluates content as ESI before insertion +- ``: Fetches content and **always** parses it as ESI (blocking operation) + - `dca="none"`: Evaluates in parent's namespace (variables from fragment affect parent) + - `dca="esi"`: **Two-phase**: Phase 1 processes fragment in isolated context (variables set here stay isolated), then Phase 2 processes the output in parent's context (output can contain ESI that accesses parent variables) + +### Include/Eval Attributes + +Both `` and `` support the following attributes: + +**Required:** + +- `src="url"` - Source URL to fetch (supports ESI expressions) + +**Fallback & Error Handling:** + +- `alt="url"` - Fallback URL if primary request fails (include only, eval uses try/except) +- `onerror="continue"` - On error, delete the tag with no output (continue processing without failing) + +**Content Processing:** + +- `dca="none|esi"` - Dynamic Content Assembly mode (default: `none`) + - `none`: For include, insert content as-is. For eval, process in parent's context (single-phase). + - `esi`: For include, parse and evaluate as ESI. For eval, two-phase processing: first in isolated context, then output processed in parent context. + +**Caching:** + +- `ttl="duration"` - Cache time-to-live (e.g., `"120m"`, `"1h"`, `"2d"`, `"0s"` to disable) +- `no-store="true"` - Bypass cache entirely + +**Request Configuration:** + +- `maxwait="milliseconds"` - Request timeout in milliseconds +- `method="GET|POST"` - HTTP method (default: `GET`) +- `entity="body"` - Request body for POST requests + +**Headers:** + +- `appendheaders="header:value"` - Append headers to the request +- `removeheaders="header1,header2"` - Remove headers from the request +- `setheaders="header:value"` - Set/replace headers on the request + +**Parameters:** + +- Nested `` elements append query parameters to the URL + +**Example:** + +```html + + + + +``` + Other tags will be ignored and served to the client as-is. ### Expression Features diff --git a/esi/src/expression.rs b/esi/src/expression.rs index 7102ec5..6a2c1ce 100644 --- a/esi/src/expression.rs +++ b/esi/src/expression.rs @@ -540,7 +540,7 @@ impl EvalContext { } /// Mark the rendered document as uncacheable (e.g., when an include has Set-Cookie or Cache-Control: private) - pub fn mark_document_uncacheable(&mut self) { + pub const fn mark_document_uncacheable(&mut self) { self.is_uncacheable = true; } @@ -1392,14 +1392,14 @@ mod tests { #[test] fn test_cache_control_header_uncacheable() { let mut ctx = EvalContext::new(); - + // Test that marking document uncacheable returns private, no-cache ctx.mark_document_uncacheable(); assert_eq!( ctx.cache_control_header(None), Some("private, no-cache".to_string()) ); - + // Even with rendered_ttl set, uncacheable should take precedence assert_eq!( ctx.cache_control_header(Some(600)), @@ -1410,23 +1410,23 @@ mod tests { #[test] fn test_cache_control_header_with_min_ttl() { let mut ctx = EvalContext::new(); - + // Test with no TTL set assert_eq!(ctx.cache_control_header(None), None); - + // Test with min_ttl set ctx.update_cache_min_ttl(300); assert_eq!( ctx.cache_control_header(None), Some("public, max-age=300".to_string()) ); - + // Test with rendered_ttl override assert_eq!( ctx.cache_control_header(Some(600)), Some("public, max-age=600".to_string()) ); - + // Test that min_ttl tracks minimum across updates ctx.update_cache_min_ttl(600); ctx.update_cache_min_ttl(200); @@ -1535,10 +1535,7 @@ mod tests { #[test] fn test_range_operator_requires_integers() { - let result = evaluate_expression( - "['a'..'z']", - &mut EvalContext::new(), - ); + let result = evaluate_expression("['a'..'z']", &mut EvalContext::new()); assert!(result.is_err()); assert!(result .unwrap_err() diff --git a/esi/src/lib.rs b/esi/src/lib.rs index 316ddf7..63665fa 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -73,6 +73,8 @@ struct FragmentMetadata { removeheaders: Vec, /// Headers to set on the request setheaders: Vec<(String, Bytes)>, + /// Dynamic content assembly mode - whether to process included content as ESI + dca: parser_types::DcaMode, } /// Representation of an ESI fragment request with its metadata and pending response @@ -571,6 +573,117 @@ impl Processor { Ok(false) } + /// Handle esi:eval tag - BLOCKING operation that fetches and evaluates content as ESI + /// The dca attribute determines how eval processes the fragment: + /// - dca="none" (default): Fragment executed in parent's context (shared variables) + /// - dca="esi": Fragment executed in isolated context (output only) + fn handle_eval( + &mut self, + attrs: &parser_types::IncludeAttributes, + dispatcher: &FragmentRequestDispatcher, + output_writer: &mut impl Write, + ) -> Result { + // Build and dispatch the request (similar to include) + let queued_element = self.dispatch_include_to_element(attrs, dispatcher)?; + + // Eval is BLOCKING - wait for the response immediately + match queued_element { + QueuedElement::Include(fragment) => { + // Wait for the fragment to complete + let response = fragment.pending_fragment.wait()?; + + // Check if successful + if !response.get_status().is_success() { + if fragment.metadata.continue_on_error { + // Per ESI spec: onerror="continue" deletes the tag with no output + return Ok(false); + } else { + return Err(ExecutionError::ExpressionError(format!( + "Eval request failed with status: {}", + response.get_status() + ))); + } + } + + // Get the response body + let body_bytes = response.into_body_bytes(); + let body_as_bytes = Bytes::from(body_bytes); + + // ALWAYS parse as ESI (this is the key difference from include) + let (rest, elements) = parser::parse_remainder(&body_as_bytes).map_err(|e| { + ExecutionError::ExpressionError(format!("Failed to parse eval fragment: {}", e)) + })?; + + if !rest.is_empty() { + return Err(ExecutionError::ExpressionError( + "Incomplete parse of eval fragment".to_string(), + )); + } + + // Check dca mode to determine processing context + if fragment.metadata.dca == parser_types::DcaMode::Esi { + // dca="esi": TWO-PHASE processing + // Phase 1: Process fragment in ISOLATED context + let mut isolated_processor = Self::new( + Some(self.ctx.get_request().clone_without_body()), + self.configuration.clone(), + ); + let mut isolated_output = Vec::new(); + + for element in elements { + isolated_processor.process_element_streaming( + element, + &mut isolated_output, + dispatcher, + )?; + } + + // Phase 2: Parse the isolated output as ESI and process in PARENT's context + // This is why variables don't leak: they only exist in phase 1 + let isolated_bytes = Bytes::from(isolated_output); + let (rest, output_elements) = parser::parse_remainder(&isolated_bytes) + .map_err(|e| { + ExecutionError::ExpressionError(format!( + "Failed to parse eval isolated output: {}", + e + )) + })?; + + if !rest.is_empty() { + return Err(ExecutionError::ExpressionError( + "Incomplete parse of eval isolated output".to_string(), + )); + } + + for element in output_elements { + let break_encountered = + self.process_element_streaming(element, output_writer, dispatcher)?; + if break_encountered { + return Ok(true); + } + } + } else { + // dca="none": SINGLE-PHASE processing in PARENT's context + // Fragment included first, then executed in parent (variables affect parent) + for element in elements { + let break_encountered = + self.process_element_streaming(element, output_writer, dispatcher)?; + if break_encountered { + return Ok(true); // Propagate break from eval'd content + } + } + } + + Ok(false) + } + QueuedElement::Content(_content) => { + // Error with continue_on_error - insert nothing per spec + Ok(false) + } + _ => unreachable!("dispatch_include_to_element should only return Include or Content"), + } + } + /// Handle esi:choose tag fn handle_choose( &mut self, @@ -793,6 +906,10 @@ impl Processor { Element::Esi(Tag::Include { attrs }) => self.handle_include(&attrs, dispatcher), + Element::Esi(Tag::Eval { attrs }) => { + self.handle_eval(&attrs, dispatcher, output_writer) + } + Element::Esi(Tag::Choose { when_branches, otherwise_events, @@ -987,6 +1104,7 @@ impl Processor { appendheaders, removeheaders: attrs.removeheaders.clone(), setheaders, + dca: attrs.dca, }) } @@ -1291,7 +1409,7 @@ impl Processor { /// Process an include from the queue (wait and write, handle alt) fn process_include_from_queue( - &self, + &mut self, fragment: Fragment, output_writer: &mut impl Write, dispatcher: &FragmentRequestDispatcher, @@ -1313,8 +1431,36 @@ impl Processor { // Check if successful if final_response.get_status().is_success() { let body_bytes = final_response.into_body_bytes(); - // Write Bytes directly - no UTF-8 conversion needed! - output_writer.write_all(&body_bytes)?; + + // Check if we need to process as ESI (dca="esi") + if fragment.metadata.dca == parser_types::DcaMode::Esi { + // Parse and process the content as ESI + let body_as_bytes = Bytes::from(body_bytes); + let (rest, elements) = parser::parse_remainder(&body_as_bytes).map_err(|e| { + ExecutionError::ExpressionError(format!( + "Failed to parse include fragment with dca=esi: {}", + e + )) + })?; + + if !rest.is_empty() { + return Err(ExecutionError::ExpressionError( + "Incomplete parse of include fragment with dca=esi".to_string(), + )); + } + + // Process each element in the current namespace + for element in elements { + let break_encountered = + self.process_element_streaming(element, output_writer, dispatcher)?; + if break_encountered { + return Ok(()); // Break from foreach, stop processing + } + } + } else { + // Write Bytes directly - no UTF-8 conversion needed! + output_writer.write_all(&body_bytes)?; + } Ok(()) } else if let Some(alt_src) = fragment.alt_bytes { // Try alt - reuse metadata from original request diff --git a/esi/src/literals.rs b/esi/src/literals.rs index 6fc6d35..f1d678f 100644 --- a/esi/src/literals.rs +++ b/esi/src/literals.rs @@ -49,6 +49,7 @@ pub const HTML_COMMENT_CLOSE: &[u8] = b"-->"; // ESI opening tags pub const TAG_ESI_ASSIGN_OPEN: &[u8] = b""; pub const TAG_ESI_COMMENT_OPEN: &[u8] = b""; pub const TAG_ESI_INCLUDE_CLOSE: &[u8] = b""; +pub const TAG_ESI_EVAL_CLOSE: &[u8] = b""; pub const TAG_ESI_VARS_CLOSE: &[u8] = b""; pub const TAG_ESI_TEXT_CLOSE: &[u8] = b""; pub const TAG_ESI_CHOOSE_CLOSE: &[u8] = b""; @@ -85,6 +87,7 @@ pub const ESI_CLOSE_PREFIX: &[u8] = b" DcaMode::Esi, + _ => DcaMode::None, // Default or unrecognized values + }; + let ttl = take_attr_opt(&mut attrs, "ttl"); let maxwait = take_attr_opt(&mut attrs, "maxwait").and_then(|s| s.parse::().ok()); let no_store = attrs @@ -1039,6 +1046,7 @@ fn extract_include_attrs( src, alt, continue_on_error, + dca, ttl, maxwait, no_store, @@ -1088,6 +1096,53 @@ fn esi_include_with_params(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[ )(input) } +/// Parse tag - similar to include but always evaluates as ESI +/// Note: eval does NOT support alt attribute - use try/except instead +fn esi_eval(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { + alt((esi_eval_self_closing, esi_eval_with_params))(input) +} + +fn esi_eval_self_closing(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { + map( + delimited( + streaming_bytes::tag(TAG_ESI_EVAL_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_self_closing), + ), + |attrs| { + let mut attrs = extract_include_attrs(attrs, Vec::new()); + // Eval does not support alt - clear it if somehow present + attrs.alt = None; + + ParseResult::Single(Element::Esi(Tag::Eval { attrs })) + }, + )(input) +} + +fn esi_eval_with_params(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { + map( + tuple(( + delimited( + streaming_bytes::tag(TAG_ESI_EVAL_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_close_bracket), + ), + many0(preceded(streaming_char::multispace0, esi_param)), + preceded( + streaming_char::multispace0, + streaming_bytes::tag(TAG_ESI_EVAL_CLOSE), + ), + )), + |(attrs, params, _)| { + let mut attrs = extract_include_attrs(attrs, params); + // Eval does not support alt - clear it if somehow present + attrs.alt = None; + + ParseResult::Single(Element::Esi(Tag::Eval { attrs })) + }, + )(input) +} + fn esi_param(input: &[u8]) -> IResult<&[u8], (String, Expr), Error<&[u8]>> { map( delimited( @@ -1272,6 +1327,7 @@ fn tag_handler<'a>( // ESI tags - pass start position to parse from esi_assign(original, start), TAG_NAME_ESI_INCLUDE => esi_include(start), + TAG_NAME_ESI_EVAL => esi_eval(start), TAG_NAME_ESI_VARS => esi_vars(original, start), TAG_NAME_ESI_COMMENT => esi_comment(start), TAG_NAME_ESI_REMOVE => esi_remove(start), diff --git a/esi/src/parser_types.rs b/esi/src/parser_types.rs index 32049ad..7099bc5 100644 --- a/esi/src/parser_types.rs +++ b/esi/src/parser_types.rs @@ -1,5 +1,15 @@ use bytes::Bytes; +/// Dynamic Content Assembly mode for esi:include and esi:eval +#[derive(Default, Debug, PartialEq, Clone, Copy)] +pub enum DcaMode { + #[default] + /// No pre-processing (default) - fragment returned as-is + None, + /// Fragment is processed as ESI by origin before returning + Esi, +} + /// All attributes for esi:include tags #[derive(Debug, PartialEq, Clone)] pub struct IncludeAttributes { @@ -9,6 +19,8 @@ pub struct IncludeAttributes { pub alt: Option, /// Whether to continue on error (from onerror="continue") pub continue_on_error: bool, + /// Dynamic Content Assembly mode - controls pre-processing + pub dca: DcaMode, /// Time-To-Live for caching (e.g., "120m", "1h", "2d", "0s") pub ttl: Option, /// Timeout in milliseconds for the request @@ -43,6 +55,10 @@ pub enum Tag { /// All include tag attributes (including params) attrs: IncludeAttributes, }, + Eval { + /// All eval tag attributes (same as include but no alt) + attrs: IncludeAttributes, + }, Try { attempt_events: Vec>, except_events: Vec, diff --git a/esi/tests/eval_tests.rs b/esi/tests/eval_tests.rs new file mode 100644 index 0000000..c89e776 --- /dev/null +++ b/esi/tests/eval_tests.rs @@ -0,0 +1,324 @@ +use esi::{Configuration, Processor}; +use fastly::{Request, Response}; +use std::collections::HashMap; +use std::sync::{Arc, Mutex}; + +/// Test that esi:eval with dca="none" processes in parent's context (spec Example 1) +/// Variables from fragment ARE accessible in parent +#[test] +fn test_eval_dca_none_parent_context() -> esi::Result<()> { + // Parent sets pvar1=7 and pvar2=8, then evals fragment with dca="none" + let input = r#" + + + +pvar1 = $(pvar1) + pvar2 = $(pvar2) + fvar = $(fvar) +"#; + + let dispatcher = + |_req: Request, _maxwait: Option| -> esi::Result { + // Fragment sets fvar=9 and pvar2=0 + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + Response::from_body( + r#" + +"#, + ), + ))) + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new(None, Configuration::default()); + processor.process_stream(reader, &mut output, Some(&dispatcher), None)?; + + let result = String::from_utf8(output).unwrap(); + // With dca="none", fragment executes in parent context + // So parent's pvar1=7 stays, fragment's pvar2=0 overrides parent's pvar2=8, fragment's fvar=9 is set + assert_eq!( + result.trim(), + r#"pvar1 = 7 + pvar2 = 0 + fvar = 9"#, + "Fragment should execute in parent context, variables should be shared/overridden" + ); + Ok(()) +} + +/// Test that esi:eval with dca="esi" processes in isolated context (spec Example 2) +/// Variables from fragment are NOT accessible in parent +#[test] +fn test_eval_dca_esi_isolated_context() -> esi::Result<()> { + // Same setup as Example 1, but with dca="esi" + let input = r#" + + + +pvar1 = $(pvar1) + pvar2 = $(pvar2) + fvar = $(fvar) +"#; + + let dispatcher = + |_req: Request, _maxwait: Option| -> esi::Result { + // Fragment sets fvar=9 and pvar2=0 (same as Example 1) + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + Response::from_body( + r#" + +"#, + ), + ))) + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new(None, Configuration::default()); + processor.process_stream(reader, &mut output, Some(&dispatcher), None)?; + + let result = String::from_utf8(output).unwrap(); + // With dca="esi", fragment executes in ISOLATED context first + // Fragment's variables DON'T affect parent, only the output (which is empty) is inserted + assert_eq!( + result.trim(), + r#"pvar1 = 7 + pvar2 = 8 + fvar ="#, + "Parent variables should remain unchanged, fragment variables should not leak" + ); + Ok(()) +} + +/// Test that esi:eval with dca="esi" inserts the output from isolated processing +#[test] +fn test_eval_dca_esi_with_output() -> esi::Result<()> { + let input = r#" + + +After: $(fragment_var)"#; + + let dispatcher = + |_req: Request, _maxwait: Option| -> esi::Result { + // Fragment sets a variable and outputs text + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + Response::from_body( + r#" + +Output from fragment"#, + ), + ))) + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new(None, Configuration::default()); + processor.process_stream(reader, &mut output, Some(&dispatcher), None)?; + + let result = String::from_utf8(output).unwrap(); + // With dca="esi", phase 1 processes fragment in isolation (output produced, vars stay isolated) + // Phase 2 processes that output in parent context (fragment_var not accessible) + assert_eq!( + result.trim(), + "Output from fragment\nAfter:", + "Should output text from fragment, but fragment variables should not leak to parent" + ); + Ok(()) +} + +/// Test that include with dca="none" inserts content verbatim (no ESI processing) +#[test] +fn test_include_dca_none_no_processing() -> esi::Result<()> { + let input = r#""#; + + let dispatcher = + |_req: Request, _maxwait: Option| -> esi::Result { + // Return content with ESI tags - should NOT be processed + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + Response::from_body( + r#"X is $(x)"#, + ), + ))) + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new(None, Configuration::default()); + processor.process_stream(reader, &mut output, Some(&dispatcher), None)?; + + let result = String::from_utf8(output).unwrap(); + assert_eq!( + result, r#"X is $(x)"#, + "dca='none' should insert content verbatim without ESI processing" + ); + Ok(()) +} + +/// Test that include with dca="esi" processes content as ESI +#[test] +fn test_include_dca_esi_processes_content() -> esi::Result<()> { + let input = r#""#; + + let dispatcher = + |_req: Request, _maxwait: Option| -> esi::Result { + // Return ESI content - should be processed + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + Response::from_body( + r#"Y is $(y)"#, + ), + ))) + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new(None, Configuration::default()); + processor.process_stream(reader, &mut output, Some(&dispatcher), None)?; + + let result = String::from_utf8(output).unwrap(); + assert_eq!(result, "Y is 99", "dca='esi' should process content as ESI"); + Ok(()) +} + +/// Test that include with dca="esi" processes in parent namespace (like eval) +#[test] +fn test_include_dca_esi_parent_namespace() -> esi::Result<()> { + let input = r#"After include: $(shared_var)"#; + + let dispatcher = + |_req: Request, _maxwait: Option| -> esi::Result { + // Set a variable in the included ESI + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + Response::from_body(r#""#), + ))) + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new(None, Configuration::default()); + processor.process_stream(reader, &mut output, Some(&dispatcher), None)?; + + let result = String::from_utf8(output).unwrap(); + assert_eq!( + result, "After include: shared", + "Include with dca='esi' should process in parent namespace" + ); + Ok(()) +} + +/// Test complex scenario: include respects dca, eval always processes as ESI +#[test] +fn test_eval_vs_include_dca_difference() -> esi::Result<()> { + let input = r#""#; + + // Track which URLs were called + let calls = Arc::new(Mutex::new(HashMap::new())); + let calls_clone = calls.clone(); + + let dispatcher = + move |req: Request, _maxwait: Option| -> esi::Result { + let url = req.get_url().to_string(); + calls_clone.lock().unwrap().insert(url.clone(), true); + + let content = match url.as_str() { + "http://example.com/raw" => r#"RAW"#, + "http://example.com/processed" => r#"PROCESSED"#, + _ => "UNKNOWN", + }; + + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + Response::from_body(content), + ))) + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new(None, Configuration::default()); + processor.process_stream(reader, &mut output, Some(&dispatcher), None)?; + + let result = String::from_utf8(output).unwrap(); + // Include without dca should insert verbatim (ESI not processed) + // Eval without dca defaults to "none" which processes in parent context + assert_eq!( + result, r#"RAWPROCESSED"#, + "Include without dca should insert verbatim, eval should process as ESI" + ); + + // Verify both URLs were called + let call_map = calls.lock().unwrap(); + assert!(call_map.contains_key("http://example.com/raw")); + assert!(call_map.contains_key("http://example.com/processed")); + Ok(()) +} + +/// Test that eval with onerror="continue" inserts nothing on failure (per ESI spec) +#[test] +fn test_eval_onerror_continue() -> esi::Result<()> { + let input = r#"BeforeAfter"#; + + let dispatcher = + |_req: Request, _maxwait: Option| -> esi::Result { + // Return a failed response + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + Response::from_status(500), + ))) + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new(None, Configuration::default()); + processor.process_stream(reader, &mut output, Some(&dispatcher), None)?; + + let result = String::from_utf8(output).unwrap(); + // Per ESI spec: onerror="continue" deletes the tag with no output (not even a comment) + assert_eq!( + result, "BeforeAfter", + "onerror='continue' should insert nothing on failure" + ); + Ok(()) +} + +/// Test nested ESI in eval +#[test] +fn test_eval_with_nested_esi() -> esi::Result<()> { + let input = r#""#; + + let call_count = Arc::new(Mutex::new(0)); + let call_count_clone = call_count.clone(); + + let dispatcher = move |req: Request, + _maxwait: Option| + -> esi::Result { + let url = req.get_url().to_string(); + *call_count_clone.lock().unwrap() += 1; + + let content = match url.as_str() { + "http://example.com/nested" => { + // Return ESI with a choose block + r#"ChosenNot"# + } + _ => "UNKNOWN", + }; + + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + Response::from_body(content), + ))) + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new(None, Configuration::default()); + processor.process_stream(reader, &mut output, Some(&dispatcher), None)?; + + let result = String::from_utf8(output).unwrap(); + assert_eq!( + result, "Chosen", + "eval should process nested ESI constructs" + ); + assert_eq!( + *call_count.lock().unwrap(), + 1, + "Should only call dispatcher once" + ); + Ok(()) +} From 72847d1453048600483a36d50758f2e94a539d13 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Fri, 27 Feb 2026 11:31:02 -0600 Subject: [PATCH 082/119] feat(eval): add HTTP headers caching and parsing support in EvalContext --- esi/src/expression.rs | 128 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 110 insertions(+), 18 deletions(-) diff --git a/esi/src/expression.rs b/esi/src/expression.rs index 6a2c1ce..434de8a 100644 --- a/esi/src/expression.rs +++ b/esi/src/expression.rs @@ -321,6 +321,8 @@ pub struct EvalContext { response_body_override: Option, /// Cached parsed query string parameters (lazy-loaded for performance) query_params_cache: std::cell::RefCell>>>, + /// Cached parsed HTTP headers (lazy-loaded for performance) + http_headers_cache: std::cell::RefCell>>>, /// Minimum TTL seen across all cached includes (in seconds) for rendered document cacheability min_ttl: Option, /// Flag indicating if the rendered document should not be cached (due to `private`/`no-cache`/`Set-Cookie` in any include) @@ -337,6 +339,7 @@ impl Default for EvalContext { response_status: None, response_body_override: None, query_params_cache: std::cell::RefCell::new(None), + http_headers_cache: std::cell::RefCell::new(HashMap::new()), min_ttl: None, is_uncacheable: false, } @@ -356,6 +359,7 @@ impl EvalContext { response_status: None, response_body_override: None, query_params_cache: std::cell::RefCell::new(None), + http_headers_cache: std::cell::RefCell::new(HashMap::new()), min_ttl: None, is_uncacheable: false, } @@ -423,6 +427,45 @@ impl EvalContext { self.query_params_cache.borrow() } + fn parse_http_header(&self, header: &str) -> Option> { + let value = self.request.get_header(header)?.to_str().ok()?; + + // Try to parse as semicolon-separated key=value pairs + let mut dict = HashMap::new(); + let mut has_pairs = false; + + for pair in value.split(';') { + let trimmed = pair.trim(); + if let Some((k, v)) = trimmed.split_once('=') { + dict.insert( + k.trim().to_string(), + Value::Text(v.trim().to_owned().into()), + ); + has_pairs = true; + } + } + + if has_pairs { + Some(dict) + } else { + None // Plain text header, not key=value format + } + } + + fn get_http_header_dict( + &self, + header: &str, + ) -> std::cell::Ref<'_, HashMap>>> { + // Check if we've already parsed this header + if !self.http_headers_cache.borrow().contains_key(header) { + let parsed = self.parse_http_header(header); + self.http_headers_cache + .borrow_mut() + .insert(header.to_string(), parsed); + } + self.http_headers_cache.borrow() + } + pub fn get_variable(&self, key: &str, subkey: Option<&str>) -> Value { match key { VAR_REQUEST_METHOD => Value::Text(self.request.get_method_str().to_string().into()), @@ -470,23 +513,38 @@ impl EvalContext { } _ if key.starts_with(VAR_HTTP_PREFIX) => { let header = key.strip_prefix(VAR_HTTP_PREFIX).unwrap_or_default(); - self.request.get_header(header).map_or(Value::Null, |h| { - let value = h.to_str().unwrap_or_default().to_owned(); - subkey.map_or_else( - || Value::Text(value.clone().into()), - |field| { - value - .split(';') - .find_map(|s| { - s.trim() - .split_once('=') - .filter(|(key, _)| *key == field) - .map(|(_, val)| Value::Text(val.to_owned().into())) - }) - .unwrap_or(Value::Null) - }, - ) - }) + + // Get raw header value + let raw_value = self + .request + .get_header(header) + .and_then(|h| h.to_str().ok()) + .unwrap_or(""); + + if raw_value.is_empty() { + return Value::Null; + } + + subkey.map_or_else( + || { + // Without subkey: try to return as Dict if parseable, else Text + let cache = self.get_http_header_dict(header); + if let Some(Some(dict)) = cache.get(header) { + Value::Dict(dict.clone()) + } else { + Value::Text(raw_value.to_owned().into()) + } + }, + |field| { + // With subkey: look up in parsed dict + let cache = self.get_http_header_dict(header); + if let Some(Some(dict)) = cache.get(header) { + dict.get(field).cloned().unwrap_or(Value::Null) + } else { + Value::Null + } + }, + ) } _ => { let stored = self.vars.get(key).cloned().unwrap_or(Value::Null); @@ -526,8 +584,9 @@ impl EvalContext { pub fn set_request(&mut self, request: Request) { self.request = request; - // Clear cached query params when request changes + // Clear cached query params and headers when request changes *self.query_params_cache.borrow_mut() = None; + self.http_headers_cache.borrow_mut().clear(); } pub const fn get_request(&self) -> &Request { @@ -1176,6 +1235,39 @@ mod tests { assert_eq!(result, Value::Null); Ok(()) } + + #[test] + fn test_eval_get_header_as_dict() -> Result<()> { + let mut ctx = EvalContext::new(); + let mut req = Request::new(Method::GET, URL_LOCALHOST); + req.set_header("Cookie", "id=571; visits=42"); + ctx.set_request(req); + + // Without subkey, should return Dict + let result = evaluate_expression("$(HTTP_COOKIE)", &mut ctx)?; + match result { + Value::Dict(map) => { + assert_eq!(map.get("id"), Some(&Value::Text("571".into()))); + assert_eq!(map.get("visits"), Some(&Value::Text("42".into()))); + assert_eq!(map.len(), 2); + } + _ => panic!("Expected Dict, got {:?}", result), + } + + // Verify cache works - access field after accessing full dict + let result = evaluate_expression("$(HTTP_COOKIE{'visits'})", &mut ctx)?; + assert_eq!(result, Value::Text("42".into())); + + // Plain text headers without key=value pairs should still return Text + let mut req2 = Request::new(Method::GET, URL_LOCALHOST); + req2.set_header("host", "example.com"); + ctx.set_request(req2); + let result = evaluate_expression("$(HTTP_HOST)", &mut ctx)?; + assert_eq!(result, Value::Text("example.com".into())); + + Ok(()) + } + #[test] fn test_logical_operators_with_parentheses() { let mut ctx = EvalContext::new(); From fa1c390daff9d4f39c3f82240711cfeebe3060a3 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Fri, 27 Feb 2026 16:18:45 -0600 Subject: [PATCH 083/119] feat(eval): enhance HTTP header parsing for cookies and add string indexing support --- esi/src/expression.rs | 106 ++++++++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 31 deletions(-) diff --git a/esi/src/expression.rs b/esi/src/expression.rs index 434de8a..8d84de1 100644 --- a/esi/src/expression.rs +++ b/esi/src/expression.rs @@ -429,26 +429,41 @@ impl EvalContext { fn parse_http_header(&self, header: &str) -> Option> { let value = self.request.get_header(header)?.to_str().ok()?; + let header_lower = header.to_lowercase(); + + // Cookie: semicolon-separated key=value pairs + if header_lower == "cookie" { + let mut dict = HashMap::new(); + for pair in value.split(';') { + let trimmed = pair.trim(); + if let Some((k, v)) = trimmed.split_once('=') { + dict.insert( + k.trim().to_string(), + Value::Text(v.trim().to_owned().into()), + ); + } + } + return if dict.is_empty() { None } else { Some(dict) }; + } - // Try to parse as semicolon-separated key=value pairs + // All other headers: comma-separated values (strip quality params like ;q=0.9) + // Creates Dict where key=value for membership testing: {"gzip": "gzip", "br": "br"} let mut dict = HashMap::new(); - let mut has_pairs = false; - - for pair in value.split(';') { - let trimmed = pair.trim(); - if let Some((k, v)) = trimmed.split_once('=') { + for item in value.split(',') { + // Strip quality value: "gzip;q=0.9" → "gzip" + let item_value = item.split(';').next().unwrap_or("").trim(); + if !item_value.is_empty() { dict.insert( - k.trim().to_string(), - Value::Text(v.trim().to_owned().into()), + item_value.to_string(), + Value::Text(item_value.to_owned().into()), ); - has_pairs = true; } } - if has_pairs { - Some(dict) + if dict.is_empty() { + None // Plain text header } else { - None // Plain text header, not key=value format + Some(dict) } } @@ -527,16 +542,11 @@ impl EvalContext { subkey.map_or_else( || { - // Without subkey: try to return as Dict if parseable, else Text - let cache = self.get_http_header_dict(header); - if let Some(Some(dict)) = cache.get(header) { - Value::Dict(dict.clone()) - } else { - Value::Text(raw_value.to_owned().into()) - } + // Without subkey: return raw header value as Text + Value::Text(raw_value.to_owned().into()) }, |field| { - // With subkey: look up in parsed dict + // With subkey: parse and look up specific field let cache = self.get_http_header_dict(header); if let Some(Some(dict)) = cache.get(header) { dict.get(field).cloned().unwrap_or(Value::Null) @@ -622,11 +632,23 @@ impl From<[(String, Value); N]> for EvalContext { fn get_subvalue(parent: &Value, subkey: &str) -> Value { if let Ok(idx) = subkey.parse::() { + // Try list index first if let Value::List(items) = parent { return items.get(idx).cloned().unwrap_or(Value::Null); } + + // String-as-list: character access by index + if let Value::Text(s) = parent { + let text = std::str::from_utf8(s.as_ref()).unwrap_or(""); + return text + .chars() + .nth(idx) + .map(|c| Value::Text(c.to_string().into())) + .unwrap_or(Value::Null); + } } + // Dict string-key lookup if let Value::Dict(map) = parent { return map.get(subkey).cloned().unwrap_or(Value::Null); } @@ -1243,22 +1265,22 @@ mod tests { req.set_header("Cookie", "id=571; visits=42"); ctx.set_request(req); - // Without subkey, should return Dict + // Without subkey, should return raw Text let result = evaluate_expression("$(HTTP_COOKIE)", &mut ctx)?; - match result { - Value::Dict(map) => { - assert_eq!(map.get("id"), Some(&Value::Text("571".into()))); - assert_eq!(map.get("visits"), Some(&Value::Text("42".into()))); - assert_eq!(map.len(), 2); - } - _ => panic!("Expected Dict, got {:?}", result), - } + assert_eq!(result, Value::Text("id=571; visits=42".into())); - // Verify cache works - access field after accessing full dict + // With subkey, should parse and return the field value let result = evaluate_expression("$(HTTP_COOKIE{'visits'})", &mut ctx)?; assert_eq!(result, Value::Text("42".into())); - // Plain text headers without key=value pairs should still return Text + let result = evaluate_expression("$(HTTP_COOKIE{'id'})", &mut ctx)?; + assert_eq!(result, Value::Text("571".into())); + + // Non-existent field returns Null + let result = evaluate_expression("$(HTTP_COOKIE{'nonexistent'})", &mut ctx)?; + assert_eq!(result, Value::Null); + + // Plain text headers still work let mut req2 = Request::new(Method::GET, URL_LOCALHOST); req2.set_header("host", "example.com"); ctx.set_request(req2); @@ -1268,6 +1290,28 @@ mod tests { Ok(()) } + #[test] + fn test_string_as_list_character_access() -> Result<()> { + let mut ctx = EvalContext::new(); + ctx.set_variable("a_string", None, Value::Text("abcde".into()))?; + + // Access individual characters by index + let result = evaluate_expression("$(a_string{0})", &mut ctx)?; + assert_eq!(result, Value::Text("a".into())); + + let result = evaluate_expression("$(a_string{3})", &mut ctx)?; + assert_eq!(result, Value::Text("d".into())); + + let result = evaluate_expression("$(a_string{4})", &mut ctx)?; + assert_eq!(result, Value::Text("e".into())); + + // Out of bounds returns Null + let result = evaluate_expression("$(a_string{10})", &mut ctx)?; + assert_eq!(result, Value::Null); + + Ok(()) + } + #[test] fn test_logical_operators_with_parentheses() { let mut ctx = EvalContext::new(); From 59ad8bd95f53bebfa2cd0654dea252b09f02a53d Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Sat, 28 Feb 2026 10:29:30 -0600 Subject: [PATCH 084/119] feat(fragment): refactor `FragmentMetadata` and enhance request parameter evaluation Also a fix for `dsa="esi"` for alt for esi:includes --- esi/src/lib.rs | 256 +++++++++++++++++++++++++++---------------------- 1 file changed, 139 insertions(+), 117 deletions(-) diff --git a/esi/src/lib.rs b/esi/src/lib.rs index 63665fa..3f4b0d8 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -10,7 +10,7 @@ mod parser; pub mod parser_types; use crate::expression::EvalContext; -use crate::parser_types::Expr; +use crate::parser_types::{DcaMode, Expr}; use bytes::{Buf, Bytes, BytesMut}; use fastly::http::request::{PendingRequest, PollResult}; use fastly::http::{header, Method, StatusCode, Url}; @@ -50,31 +50,28 @@ impl From for PendingFragmentContent { } } -/// Common metadata shared between main and alt fragment requests -#[derive(Clone)] +/// Evaluated fragment request metadata +/// Store evaluated values once to avoid re-evaluation on alt fallback struct FragmentMetadata { - /// Whether to continue on error - continue_on_error: bool, - /// Optional TTL override from the include tag (in seconds) - ttl_override: Option, - /// Optional timeout in milliseconds for this specific request - maxwait: Option, - // Request building parameters - is_escaped: bool, - /// Whether the request should be cached or not - cacheable: bool, /// HTTP method to use for the request (default GET) method: Option, /// Optional body for POST requests entity: Option, + /// Headers to set on the request + setheaders: Vec<(String, Bytes)>, /// Headers to append to the request appendheaders: Vec<(String, Bytes)>, /// Headers to remove from the request removeheaders: Vec, - /// Headers to set on the request - setheaders: Vec<(String, Bytes)>, - /// Dynamic content assembly mode - whether to process included content as ESI - dca: parser_types::DcaMode, + /// Whether the request should be cached or not + cacheable: bool, + /// Optional TTL override from the include tag (in seconds) + ttl_override: Option, + // Flags needed for fragment processing + continue_on_error: bool, + /// Optional timeout in milliseconds for this specific request + maxwait: Option, + dca: DcaMode, } /// Representation of an ESI fragment request with its metadata and pending response @@ -85,7 +82,7 @@ pub struct Fragment { pub(crate) alt_bytes: Option, /// The pending fragment response, which can be polled to retrieve the content pub(crate) pending_fragment: PendingFragmentContent, - /// Common fragment metadata (shared with alt) + /// Evaluated parameters (reusable for alt fallback) pub(crate) metadata: FragmentMetadata, } @@ -621,7 +618,7 @@ impl Processor { } // Check dca mode to determine processing context - if fragment.metadata.dca == parser_types::DcaMode::Esi { + if fragment.metadata.dca == DcaMode::Esi { // dca="esi": TWO-PHASE processing // Phase 1: Process fragment in ISOLATED context let mut isolated_processor = Self::new( @@ -954,6 +951,63 @@ impl Processor { self.dispatch_include_to_element(attrs, dispatcher) } + /// Evaluate request parameters from `IncludeAttributes` and return a `FragmentMetadata` struct + /// + /// Evaluate original tag attributes and compute all values needed for dispatching a fragment request + fn evaluate_request_params( + &mut self, + attrs: &parser_types::IncludeAttributes, + ) -> Result { + // Parse TTL if provided (it's a literal string like "120m", not an expression) + let ttl_override = attrs + .ttl + .as_ref() + .and_then(|ttl_str| cache::parse_ttl(ttl_str)); + + // Evaluate method if provided + let method = attrs + .method + .as_ref() + .map(|e| self.evaluate_expr_to_bytes(e)) + .transpose()?; + + // Evaluate entity if provided + let entity = attrs + .entity + .as_ref() + .map(|e| self.evaluate_expr_to_bytes(e)) + .transpose()?; + + // Evaluate header values + let mut setheaders = Vec::with_capacity(attrs.setheaders.len()); + for (name, value_expr) in &attrs.setheaders { + let value_bytes = self.evaluate_expr_to_bytes(value_expr)?; + setheaders.push((name.clone(), value_bytes)); + } + + let mut appendheaders = Vec::with_capacity(attrs.appendheaders.len()); + for (name, value_expr) in &attrs.appendheaders { + let value_bytes = self.evaluate_expr_to_bytes(value_expr)?; + appendheaders.push((name.clone(), value_bytes)); + } + + // Determine if the fragment should be cached + let cacheable = !attrs.no_store && self.configuration.cache.is_includes_cacheable; + + Ok(FragmentMetadata { + method, + entity, + setheaders, + appendheaders, + removeheaders: attrs.removeheaders.clone(), + cacheable, + ttl_override, + continue_on_error: attrs.continue_on_error, + maxwait: attrs.maxwait, + dca: attrs.dca, + }) + } + /// Dispatch an include and return a `QueuedElement` (for flexible queue insertion) /// This is the single source of truth for include dispatching logic fn dispatch_include_to_element( @@ -999,10 +1053,11 @@ impl Processor { self.ctx.get_request().clone_without_body(), &final_src, &metadata, + &self.configuration, )?; let req_clone = req.clone_without_body(); - match dispatcher(req_clone, attrs.maxwait) { + match dispatcher(req_clone, metadata.maxwait) { Ok(pending_fragment) => { let fragment = Fragment { req, @@ -1012,17 +1067,18 @@ impl Processor { }; Ok(QueuedElement::Include(Box::new(fragment))) } - Err(_) if attrs.continue_on_error => { + Err(_) if metadata.continue_on_error => { // Try alt or add error placeholder if let Some(alt_src) = &alt_bytes { let alt_req = build_fragment_request( self.ctx.get_request().clone_without_body(), alt_src, &metadata, + &self.configuration, )?; let alt_req_without_body = alt_req.clone_without_body(); - dispatcher(alt_req_without_body, attrs.maxwait).map_or_else( + dispatcher(alt_req_without_body, metadata.maxwait).map_or_else( |_| { Ok(QueuedElement::Content(Bytes::from_static( b"", @@ -1051,63 +1107,6 @@ impl Processor { } } - /// Evaluate request parameters from `IncludeAttributes` - fn evaluate_request_params( - &mut self, - attrs: &parser_types::IncludeAttributes, - ) -> Result { - // Parse TTL if provided (it's a literal string like "120m", not an expression) - let ttl_override = attrs - .ttl - .as_ref() - .and_then(|ttl_str| cache::parse_ttl(ttl_str)); - - // Evaluate method if provided - let method = attrs - .method - .as_ref() - .map(|e| self.evaluate_expr_to_bytes(e)) - .transpose()?; - - // Evaluate entity if provided - let entity = attrs - .entity - .as_ref() - .map(|e| self.evaluate_expr_to_bytes(e)) - .transpose()?; - - // Evaluate header values - let mut appendheaders = Vec::with_capacity(attrs.appendheaders.len()); - for (name, value_expr) in &attrs.appendheaders { - let value_bytes = self.evaluate_expr_to_bytes(value_expr)?; - appendheaders.push((name.clone(), value_bytes)); - } - - let mut setheaders = Vec::with_capacity(attrs.setheaders.len()); - for (name, value_expr) in &attrs.setheaders { - let value_bytes = self.evaluate_expr_to_bytes(value_expr)?; - setheaders.push((name.clone(), value_bytes)); - } - - // Determine if the fragment should be cached - // cacheable=true means cache it, cacheable=false means bypass cache (set_pass) - let cacheable = !attrs.no_store && self.configuration.cache.is_includes_cacheable; - - Ok(FragmentMetadata { - continue_on_error: attrs.continue_on_error, - ttl_override, - maxwait: attrs.maxwait, - is_escaped: self.configuration.is_escaped_content, - cacheable, - method, - entity, - appendheaders, - removeheaders: attrs.removeheaders.clone(), - setheaders, - dca: attrs.dca, - }) - } - /// Check ready queue items - non-blocking poll /// Process any fragments that are already completed without blocking fn process_ready_queue_items( @@ -1431,44 +1430,21 @@ impl Processor { // Check if successful if final_response.get_status().is_success() { let body_bytes = final_response.into_body_bytes(); - - // Check if we need to process as ESI (dca="esi") - if fragment.metadata.dca == parser_types::DcaMode::Esi { - // Parse and process the content as ESI - let body_as_bytes = Bytes::from(body_bytes); - let (rest, elements) = parser::parse_remainder(&body_as_bytes).map_err(|e| { - ExecutionError::ExpressionError(format!( - "Failed to parse include fragment with dca=esi: {}", - e - )) - })?; - - if !rest.is_empty() { - return Err(ExecutionError::ExpressionError( - "Incomplete parse of include fragment with dca=esi".to_string(), - )); - } - - // Process each element in the current namespace - for element in elements { - let break_encountered = - self.process_element_streaming(element, output_writer, dispatcher)?; - if break_encountered { - return Ok(()); // Break from foreach, stop processing - } - } - } else { - // Write Bytes directly - no UTF-8 conversion needed! - output_writer.write_all(&body_bytes)?; - } + self.process_fragment_body( + body_bytes, + &fragment.metadata.dca, + output_writer, + dispatcher, + )?; Ok(()) } else if let Some(alt_src) = fragment.alt_bytes { - // Try alt - reuse metadata from original request + // Try alt - reuse pre-evaluated params debug!("Main request failed, trying alt"); let alt_req = build_fragment_request( self.ctx.get_request().clone_without_body(), &alt_src, &fragment.metadata, + &self.configuration, )?; let alt_req_without_body = alt_req.clone_without_body(); @@ -1483,8 +1459,12 @@ impl Processor { }; let body_bytes = final_alt.into_body_bytes(); - // Write Bytes directly - no UTF-8 conversion needed! - output_writer.write_all(&body_bytes)?; + self.process_fragment_body( + body_bytes, + &fragment.metadata.dca, + output_writer, + dispatcher, + )?; Ok(()) } Err(_) if continue_on_error => { @@ -1505,6 +1485,47 @@ impl Processor { ))) } } + + /// Process fragment body based on dca mode + /// - dca="esi": Parse and process content as ESI + /// - dca="none": Write raw content + fn process_fragment_body( + &mut self, + body_bytes: Vec, + dca_mode: &DcaMode, + output_writer: &mut impl Write, + dispatcher: &FragmentRequestDispatcher, + ) -> Result<()> { + if *dca_mode == DcaMode::Esi { + // Parse and process the content as ESI + let body_as_bytes = Bytes::from(body_bytes); + let (rest, elements) = parser::parse_remainder(&body_as_bytes).map_err(|e| { + ExecutionError::ExpressionError(format!( + "Failed to parse fragment with dca=esi: {}", + e + )) + })?; + + if !rest.is_empty() { + return Err(ExecutionError::ExpressionError( + "Incomplete parse of fragment with dca=esi".to_string(), + )); + } + + // Process each element in the current namespace + for element in elements { + let break_encountered = + self.process_element_streaming(element, output_writer, dispatcher)?; + if break_encountered { + return Ok(()); // Break from foreach, stop processing + } + } + } else { + // dca="none" (default): Write raw content + output_writer.write_all(&body_bytes)?; + } + Ok(()) + } } // Default fragment request dispatcher that uses the request's hostname as backend @@ -1548,12 +1569,13 @@ fn build_fragment_request( mut request: Request, url: &Bytes, metadata: &FragmentMetadata, + config: &Configuration, ) -> Result { // Convert Bytes to str for URL parsing let url_str = std::str::from_utf8(url) .map_err(|_| ExecutionError::ExpressionError("Invalid UTF-8 in URL".to_string()))?; - let escaped_url = if metadata.is_escaped { + let escaped_url = if config.is_escaped_content { Cow::Owned(html_escape::decode_html_entities(url_str).into_owned()) } else { Cow::Borrowed(url_str) @@ -1584,7 +1606,7 @@ fn build_fragment_request( request.set_header(header::HOST, &hostname); - // Set HTTP method (default is GET) + // Set HTTP method (default is GET) - use pre-evaluated value if let Some(method_bytes) = &metadata.method { let method_str = std::str::from_utf8(method_bytes) .map_err(|_| ExecutionError::ExpressionError("Invalid UTF-8 in method".to_string()))? @@ -1601,7 +1623,7 @@ fn build_fragment_request( } } - // Set POST body if provided + // Set POST body if provided - use pre-evaluated value if let Some(entity_bytes) = &metadata.entity { if request.get_method() == Method::POST { request.set_body(entity_bytes.as_ref()); @@ -1614,12 +1636,12 @@ fn build_fragment_request( request.remove_header(header_name); } - // 2. Set headers (replace existing) + // 2. Set headers (replace existing) - use pre-evaluated values for (name, value) in &metadata.setheaders { request.set_header(name, value.as_ref()); } - // 3. Append headers (add to existing) + // 3. Append headers (add to existing) - use pre-evaluated values for (name, value) in &metadata.appendheaders { request.append_header(name, value.as_ref()); } From ed11f3aaca1bc07b68eeb12a35382815af42abfb Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Sat, 28 Feb 2026 13:58:43 -0600 Subject: [PATCH 085/119] feat(functions): implement user-defined functions with recursion support --- esi/src/config.rs | 9 + esi/src/expression.rs | 406 ++++++++++++++++++++++++++++++++++++++++ esi/src/lib.rs | 14 ++ esi/src/literals.rs | 6 + esi/src/parser.rs | 105 ++++++++++- esi/src/parser_types.rs | 7 + esi/tests/esi-tests.rs | 140 ++++++++++++++ 7 files changed, 686 insertions(+), 1 deletion(-) diff --git a/esi/src/config.rs b/esi/src/config.rs index 84a3984..2c263e6 100644 --- a/esi/src/config.rs +++ b/esi/src/config.rs @@ -24,6 +24,8 @@ pub struct Configuration { pub is_escaped_content: bool, /// Cache configuration for ESI includes pub cache: CacheConfig, + /// Maximum recursion depth for user-defined function calls (per ESI spec, default: 5) + pub function_recursion_depth: usize, } impl Default for Configuration { @@ -32,6 +34,7 @@ impl Default for Configuration { namespace: String::from("esi"), is_escaped_content: true, cache: CacheConfig::default(), + function_recursion_depth: 5, } } } @@ -55,4 +58,10 @@ impl Configuration { self.cache = cache; self } + + /// Configure maximum recursion depth for user-defined function calls + pub const fn with_max_function_recursion_depth(mut self, depth: usize) -> Self { + self.function_recursion_depth = depth; + self + } } diff --git a/esi/src/expression.rs b/esi/src/expression.rs index 8d84de1..0026d2e 100644 --- a/esi/src/expression.rs +++ b/esi/src/expression.rs @@ -11,6 +11,30 @@ use crate::{ ExecutionError, Result, }; +/// Registry for user-defined ESI functions +/// Functions are defined using tags and can be called within expressions +#[derive(Debug, Clone, Default)] +pub struct FunctionRegistry { + /// Map from function name to function body (Vec) + functions: HashMap>, +} + +impl FunctionRegistry { + pub fn new() -> Self { + Self { + functions: HashMap::new(), + } + } + + pub fn register(&mut self, name: String, body: Vec) { + self.functions.insert(name, body); + } + + pub fn get(&self, name: &str) -> Option<&Vec> { + self.functions.get(name) + } +} + /// Evaluates a nom-parsed expression directly without re-lexing/parsing /// /// This function takes an expression that was already parsed by the nom parser @@ -327,6 +351,12 @@ pub struct EvalContext { min_ttl: Option, /// Flag indicating if the rendered document should not be cached (due to `private`/`no-cache`/`Set-Cookie` in any include) is_uncacheable: bool, + /// Stack of function call arguments for user-defined functions (supports nested calls) + args_stack: Vec>, + /// Registry for user-defined ESI functions + function_registry: FunctionRegistry, + /// Maximum recursion depth for user-defined function calls (per ESI spec, default: 5) + max_function_recursion_depth: usize, } impl Default for EvalContext { fn default() -> Self { @@ -342,6 +372,9 @@ impl Default for EvalContext { http_headers_cache: std::cell::RefCell::new(HashMap::new()), min_ttl: None, is_uncacheable: false, + args_stack: Vec::new(), + function_registry: FunctionRegistry::new(), + max_function_recursion_depth: 5, } } } @@ -362,6 +395,9 @@ impl EvalContext { http_headers_cache: std::cell::RefCell::new(HashMap::new()), min_ttl: None, is_uncacheable: false, + args_stack: Vec::new(), + function_registry: FunctionRegistry::new(), + max_function_recursion_depth: 5, } } @@ -483,6 +519,26 @@ impl EvalContext { pub fn get_variable(&self, key: &str, subkey: Option<&str>) -> Value { match key { + VAR_ARGS => { + // Handle $(ARGS) and $(ARGS{n}) + match self.current_args() { + None => Value::Null, + Some(args) => subkey.map_or_else( + || { + // $(ARGS) without subscript - return list of all arguments + Value::List(args.clone()) + }, + |sub| { + // $(ARGS{n}) - return nth argument (0-indexed per ESI spec) + if let Ok(index) = sub.parse::() { + args.get(index).cloned().unwrap_or(Value::Null) + } else { + Value::Null + } + }, + ), + } + } VAR_REQUEST_METHOD => Value::Text(self.request.get_method_str().to_string().into()), VAR_REQUEST_PATH => Value::Text(self.request.get_path().to_string().into()), VAR_REMOTE_ADDR => Value::Text( @@ -622,6 +678,36 @@ impl EvalContext { let ttl = rendered_ttl.or(self.min_ttl)?; Some(format!("public, max-age={ttl}")) } + + /// Push a new set of function arguments onto the stack (for user-defined function calls) + pub fn push_args(&mut self, args: Vec) { + self.args_stack.push(args); + } + + /// Pop the current function arguments from the stack + pub fn pop_args(&mut self) { + self.args_stack.pop(); + } + + /// Get the current function arguments (if any) + pub fn current_args(&self) -> Option<&Vec> { + self.args_stack.last() + } + + /// Register a user-defined function + pub fn register_function(&mut self, name: String, body: Vec) { + self.function_registry.register(name, body); + } + + /// Get a user-defined function body + pub fn get_function(&self, name: &str) -> Option<&Vec> { + self.function_registry.get(name) + } + + /// Set maximum recursion depth for user-defined function calls + pub const fn set_max_function_recursion_depth(&mut self, depth: usize) { + self.max_function_recursion_depth = depth; + } } impl From<[(String, Value); N]> for EvalContext { @@ -808,7 +894,234 @@ fn dict_to_string(map: &HashMap) -> String { parts.join("&") } +/// Process a single element within a function body, accumulating output and checking for returns +/// This is a helper for call_user_function that can be called recursively +/// +/// Returns: +/// - `Ok(Some(value))` - Explicit return with value +/// - `Ok(None)` - Continue processing +fn process_function_element( + element: &Element, + output: &mut Vec, + ctx: &mut EvalContext, + should_break: &mut bool, +) -> Result> { + match element { + Element::Esi(crate::parser_types::Tag::Return { value }) => { + // Evaluate the return expression and signal early return + Ok(Some(eval_expr(value, ctx)?)) + } + Element::Text(text) | Element::Html(text) => { + output.extend_from_slice(text); + Ok(None) + } + Element::Expr(expr) => { + let value = eval_expr(expr, ctx)?; + output.extend_from_slice(value.to_bytes().as_ref()); + Ok(None) + } + Element::Esi(crate::parser_types::Tag::Assign { + name, + subscript, + value, + }) => { + let val = eval_expr(value, ctx)?; + let subscript_str = if let Some(sub_expr) = subscript { + Some(eval_expr(sub_expr, ctx)?.to_string()) + } else { + None + }; + ctx.set_variable(name, subscript_str.as_deref(), val)?; + Ok(None) + } + Element::Esi(crate::parser_types::Tag::Vars { name }) => { + if let Some(match_name) = name { + ctx.set_match_name(match_name); + } + Ok(None) + } + Element::Esi(crate::parser_types::Tag::Choose { + when_branches, + otherwise_events, + }) => { + let mut chose_branch = false; + + for when_branch in when_branches { + if let Some(ref match_name) = when_branch.match_name { + ctx.set_match_name(match_name); + } + + match eval_expr(&when_branch.test, ctx) { + Ok(test_result) if test_result.to_bool() => { + // This branch matches - recursively process it + for elem in &when_branch.content { + if let Some(return_val) = process_function_element(elem, output, ctx, should_break)? { + return Ok(Some(return_val)); + } + if *should_break { + return Ok(None); + } + } + chose_branch = true; + break; + } + _ => continue, + } + } + + // No when matched - process otherwise + if !chose_branch { + for elem in otherwise_events { + if let Some(return_val) = process_function_element(elem, output, ctx, should_break)? { + return Ok(Some(return_val)); + } + if *should_break { + return Ok(None); + } + } + } + Ok(None) + } + Element::Esi(crate::parser_types::Tag::Foreach { + collection, + item, + content, + }) => { + // Evaluate the collection expression + let collection_value = eval_expr(collection, ctx)?; + + // Convert to a list if needed + let items = match &collection_value { + Value::List(items) => items.clone(), + Value::Dict(map) => { + // Convert dict to list of [key, value] pairs + map.iter() + .map(|(k, v)| { + Value::List(vec![Value::Text(k.clone().into()), v.clone()]) + }) + .collect() + } + Value::Null => Vec::new(), + other => vec![other.clone()], // Treat single values as a list of one + }; + + // Default item variable name if not specified + let item_var = item.as_ref().map(|s| s.as_str()).unwrap_or("item"); + + // Iterate through items + for item_value in items { + // Set the item variable + ctx.set_variable(item_var, None, item_value)?; + + // Process content for this iteration + for elem in content { + if let Some(return_val) = process_function_element(elem, output, ctx, should_break)? { + return Ok(Some(return_val)); + } + if *should_break { + break; // Break out of foreach loop + } + } + if *should_break { + *should_break = false; // Reset break flag after exiting loop + break; + } + } + Ok(None) + } + Element::Esi(crate::parser_types::Tag::Break) => { + // Signal break to exit foreach loop + *should_break = true; + Ok(None) + } + // Per ESI spec: functions cannot contain include, eval, or nested function definitions + Element::Esi(crate::parser_types::Tag::Include { .. }) => { + Err(ExecutionError::FunctionError( + "esi:include is not allowed in function bodies".to_string(), + )) + } + Element::Esi(crate::parser_types::Tag::Eval { .. }) => { + Err(ExecutionError::FunctionError( + "esi:eval is not allowed in function bodies".to_string(), + )) + } + Element::Esi(crate::parser_types::Tag::Function { .. }) => { + Err(ExecutionError::FunctionError( + "esi:function is not allowed in function bodies (nested function definitions are not supported)".to_string(), + )) + } + // Try/Except would require dispatcher context which isn't available in expression evaluation + // Silently ignore for now (could also error) + Element::Esi(crate::parser_types::Tag::Try { .. }) => Ok(None), + // Other tags that shouldn't appear - silently ignore + _ => Ok(None), + } +} + +/// Execute a user-defined ESI function +/// +/// Processes the function body elements, handling variable assignments and return statements. +/// Functions can access arguments via $(ARGS) variable. +/// Enforces maximum recursion depth per ESI specification. +/// +/// # Arguments +/// * `name` - Function name (for error messages) +/// * `body` - Function body elements to execute +/// * `args` - Function call arguments +/// * `ctx` - Evaluation context +/// +/// # Returns +/// * `Result` - The return value (from ) or accumulated text output +fn call_user_function( + name: &str, + body: &[Element], + args: &[Value], + ctx: &mut EvalContext, +) -> Result { + // Check recursion depth before proceeding + if ctx.args_stack.len() >= ctx.max_function_recursion_depth { + return Err(ExecutionError::FunctionError(format!( + "Maximum recursion depth ({}) exceeded for function '{}'", + ctx.max_function_recursion_depth, name + ))); + } + + // Push arguments onto the stack for $(ARGS) access + ctx.push_args(args.to_vec()); + + // Process function body, catching any errors to ensure cleanup + let result = (|| { + let mut output = Vec::new(); + let mut should_break = false; + + for element in body { + if let Some(return_value) = + process_function_element(element, &mut output, ctx, &mut should_break)? + { + return Ok(return_value); + } + // Break at function level doesn't make sense - ignore the flag + should_break = false; + } + + // No explicit return - return accumulated output as text + Ok(Value::Text(Bytes::from(output))) + })(); + + // Always pop arguments, even if there was an error + ctx.pop_args(); + + result +} + fn call_dispatch(identifier: &str, args: &[Value], ctx: &mut EvalContext) -> Result { + // First check if this is a user-defined function + // Clone the function body to avoid borrowing issues + if let Some(function_body) = ctx.get_function(identifier).cloned() { + return call_user_function(identifier, &function_body, args, ctx); + } + + // Fall back to built-in functions match identifier { FN_PING => Ok(Value::Text(FN_PONG.into())), FN_LOWER => functions::lower(args), @@ -1678,4 +1991,97 @@ mod tests { .to_string() .contains("requires integer operands")); } + + #[test] + fn test_args_variable_no_args() -> Result<()> { + // Without any args pushed, ARGS should be null + let ctx = &mut EvalContext::new(); + let result = ctx.get_variable("ARGS", None); + assert_eq!(result, Value::Null); + Ok(()) + } + + #[test] + fn test_args_variable_with_args() -> Result<()> { + // Push some arguments and test ARGS access + let mut ctx = EvalContext::new(); + ctx.push_args(vec![ + Value::Text("hello".into()), + Value::Integer(42), + Value::Text("world".into()), + ]); + + // Test $(ARGS) - should return list of all arguments + let result = ctx.get_variable("ARGS", None); + if let Value::List(items) = result { + assert_eq!(items.len(), 3); + assert_eq!(items[0], Value::Text("hello".into())); + assert_eq!(items[1], Value::Integer(42)); + assert_eq!(items[2], Value::Text("world".into())); + } else { + panic!("Expected a list"); + } + + // Test $(ARGS{0}) - should return first argument (0-indexed per ESI spec) + let result = ctx.get_variable("ARGS", Some("0")); + assert_eq!(result, Value::Text("hello".into())); + + // Test $(ARGS{1}) - should return second argument + let result = ctx.get_variable("ARGS", Some("1")); + assert_eq!(result, Value::Integer(42)); + + // Test $(ARGS{2}) - should return third argument + let result = ctx.get_variable("ARGS", Some("2")); + assert_eq!(result, Value::Text("world".into())); + + // Test $(ARGS{3}) - out of bounds, should be null + let result = ctx.get_variable("ARGS", Some("3")); + assert_eq!(result, Value::Null); + + // Test $(ARGS{4}) - out of bounds, should be null + let result = ctx.get_variable("ARGS", Some("4")); + assert_eq!(result, Value::Null); + + // Pop arguments + ctx.pop_args(); + + // After popping, ARGS should be null again + let result = ctx.get_variable("ARGS", None); + assert_eq!(result, Value::Null); + + Ok(()) + } + + #[test] + fn test_args_variable_nested_calls() -> Result<()> { + // Test nested function calls with different args + let mut ctx = EvalContext::new(); + + // First call with args [10, 20] + ctx.push_args(vec![Value::Integer(10), Value::Integer(20)]); + let result = ctx.get_variable("ARGS", Some("1")); + assert_eq!(result, Value::Integer(20)); + + // Nested call with args [30, 40, 50] + ctx.push_args(vec![ + Value::Integer(30), + Value::Integer(40), + Value::Integer(50), + ]); + let result = ctx.get_variable("ARGS", Some("0")); + assert_eq!(result, Value::Integer(30)); + let result = ctx.get_variable("ARGS", Some("2")); + assert_eq!(result, Value::Integer(50)); + + // Pop nested call + ctx.pop_args(); + + // Should be back to first call's args + let result = ctx.get_variable("ARGS", Some("0")); + assert_eq!(result, Value::Integer(10)); + let result = ctx.get_variable("ARGS", Some("1")); + assert_eq!(result, Value::Integer(20)); + + Ok(()) + } } diff --git a/esi/src/lib.rs b/esi/src/lib.rs index 3f4b0d8..0933a7d 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -179,6 +179,8 @@ impl Processor { } else { ctx.set_request(Request::new(Method::GET, "http://localhost")); } + // Apply configuration settings to context + ctx.set_max_function_recursion_depth(configuration.function_recursion_depth); Self { ctx, configuration, @@ -925,6 +927,18 @@ impl Processor { Element::Esi(Tag::Break) => Ok(true), + Element::Esi(Tag::Function { name, body }) => { + // Register user-defined function in the evaluation context + self.ctx.register_function(name, body); + Ok(false) + } + + Element::Esi(Tag::Return { .. }) => { + // Return tags should only appear inside function bodies, not at top level + // Ignore at top level + Ok(false) + } + Element::Esi(_) => Ok(false), // Other standalone tags shouldn't appear } } diff --git a/esi/src/literals.rs b/esi/src/literals.rs index f1d678f..cb479ff 100644 --- a/esi/src/literals.rs +++ b/esi/src/literals.rs @@ -64,6 +64,8 @@ pub const TAG_ESI_EXCEPT_OPEN: &[u8] = b""; pub const TAG_ESI_FOREACH_OPEN: &[u8] = b""; @@ -79,6 +81,7 @@ pub const TAG_ESI_ATTEMPT_CLOSE: &[u8] = b""; pub const TAG_ESI_EXCEPT_CLOSE: &[u8] = b""; pub const TAG_ESI_FOREACH_CLOSE: &[u8] = b""; pub const TAG_ESI_REMOVE_CLOSE: &[u8] = b""; +pub const TAG_ESI_FUNCTION_CLOSE: &[u8] = b""; // ESI prefix for detection pub const ESI_PREFIX: &[u8] = b"esi:"; @@ -100,6 +103,8 @@ pub const TAG_NAME_ESI_ATTEMPT: &[u8] = b"esi:attempt"; pub const TAG_NAME_ESI_EXCEPT: &[u8] = b"esi:except"; pub const TAG_NAME_ESI_FOREACH: &[u8] = b"esi:foreach"; pub const TAG_NAME_ESI_BREAK: &[u8] = b"esi:break"; +pub const TAG_NAME_ESI_FUNCTION: &[u8] = b"esi:function"; +pub const TAG_NAME_ESI_RETURN: &[u8] = b"esi:return"; pub const TAG_NAME_SCRIPT: &[u8] = b"script"; // ============================================================================ @@ -162,6 +167,7 @@ pub const VAR_REMOTE_ADDR: &str = "REMOTE_ADDR"; pub const VAR_QUERY_STRING: &str = "QUERY_STRING"; pub const VAR_HTTP_PREFIX: &str = "HTTP_"; pub const VAR_MATCHES: &str = "MATCHES"; +pub const VAR_ARGS: &str = "ARGS"; // Boolean Value Literals pub const BOOL_TRUE: &[u8] = b"true"; diff --git a/esi/src/parser.rs b/esi/src/parser.rs index 0a42cf6..3ebc821 100644 --- a/esi/src/parser.rs +++ b/esi/src/parser.rs @@ -756,6 +756,46 @@ fn esi_break(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { )(input) } +/// Parse ... +fn esi_function_tag<'a>( + original: &Bytes, + input: &'a [u8], +) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { + map( + tuple(( + delimited( + streaming_bytes::tag(TAG_ESI_FUNCTION_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_close_bracket), + ), + |i| tag_content(original, i), + streaming_bytes::tag(TAG_ESI_FUNCTION_CLOSE), + )), + |(attrs, body, _)| { + let name = attrs.get("name").cloned().unwrap_or_default(); + + ParseResult::Single(Element::Esi(Tag::Function { name, body })) + }, + )(input) +} + +/// Parse +fn esi_return(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { + map( + delimited( + streaming_bytes::tag(TAG_ESI_RETURN_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_self_closing), + ), + |attrs| { + let value_str = attrs.get("value").cloned().unwrap_or_default(); + let value = parse_attr_as_expr_with_context(value_str, false); + + ParseResult::Single(Element::Esi(Tag::Return { value })) + }, + )(input) +} + /// Zero-copy parser for ... fn esi_choose<'a>( original: &Bytes, @@ -1340,6 +1380,8 @@ fn tag_handler<'a>( TAG_NAME_ESI_EXCEPT => esi_except(original, start), TAG_NAME_ESI_FOREACH => esi_foreach(original, start), TAG_NAME_ESI_BREAK => esi_break(start), + TAG_NAME_ESI_FUNCTION => esi_function_tag(original, start), + TAG_NAME_ESI_RETURN => esi_return(start), // Special HTML tags - pass start to re-parse from beginning // (script needs to check attributes, so easier to re-parse than continue) @@ -1582,7 +1624,10 @@ fn fn_argument(input: &[u8]) -> IResult<&[u8], Vec, Error<&[u8]>> { } fn fn_nested_argument(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { - alt((esi_function, esi_variable, string, integer, bareword))(input) + // Try full expression parsing first (supports $(ARGS{0}) - 1) + // expr() will naturally stop at commas and closing parens + // If expr fails, fall back to bareword for backward compatibility + alt((expr, bareword))(input) } fn integer(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { @@ -2629,6 +2674,64 @@ exception! } } + #[test] + fn test_parse_function() { + let input = b"Hello $(name)"; + let bytes = Bytes::from_static(input); + let (rest, elements) = parse_remainder(&bytes).unwrap(); + assert_eq!(rest.len(), 0); + assert_eq!(elements.len(), 1); + + match &elements[0] { + Element::Esi(Tag::Function { name, body }) => { + assert_eq!(name, "greet"); + assert!(!body.is_empty()); + } + other => panic!("Expected Function tag, got {:?}", other), + } + } + + #[test] + fn test_parse_function_with_return() { + let input = + b""; + let bytes = Bytes::from_static(input); + let (rest, elements) = parse_remainder(&bytes).unwrap(); + assert_eq!(rest.len(), 0); + assert_eq!(elements.len(), 1); + + match &elements[0] { + Element::Esi(Tag::Function { name, body }) => { + assert_eq!(name, "add"); + assert_eq!(body.len(), 1); + match &body[0] { + Element::Esi(Tag::Return { value }) => { + // Return should have a valid expression (Comparison for + operator) + assert!(matches!(value, Expr::Comparison { .. })); + } + other => panic!("Expected Return tag in function body, got {:?}", other), + } + } + other => panic!("Expected Function tag, got {:?}", other), + } + } + + #[test] + fn test_parse_return() { + let input = b""; + let bytes = Bytes::from_static(input); + let (rest, elements) = parse_remainder(&bytes).unwrap(); + assert_eq!(rest.len(), 0); + assert_eq!(elements.len(), 1); + + match &elements[0] { + Element::Esi(Tag::Return { value }) => { + assert!(matches!(value, Expr::Integer(42))); + } + other => panic!("Expected Return tag, got {:?}", other), + } + } + #[test] fn test_parse_dict_literal() { let input = b"{1:'apple',2:'orange'}"; diff --git a/esi/src/parser_types.rs b/esi/src/parser_types.rs index 7099bc5..9803b86 100644 --- a/esi/src/parser_types.rs +++ b/esi/src/parser_types.rs @@ -88,6 +88,13 @@ pub enum Tag { content: Vec, }, Break, + Function { + name: String, + body: Vec, + }, + Return { + value: Expr, + }, } #[derive(Debug, PartialEq, Clone)] diff --git a/esi/tests/esi-tests.rs b/esi/tests/esi-tests.rs index 33634b9..cf61adf 100644 --- a/esi/tests/esi-tests.rs +++ b/esi/tests/esi-tests.rs @@ -1778,3 +1778,143 @@ fn test_arithmetic_operators_in_esi() { "100 / 5 - 2 * 3 with left-to-right evaluation should be 54" ); } + +#[test] +fn test_user_defined_function_basic() { + init_logs(); + + let input = r#" + Hello, World! + $greet() + "#; + let req = Request::get("http://example.com"); + let result = process_esi_document(input, req).expect("Processing should succeed"); + + // Function should output accumulated text + assert!(result.contains("Hello, World!"), "Result was: {}", result); +} + +#[test] +fn test_user_defined_function_add() { + init_logs(); + + let input = r#" + + + + $add( 5, 7 ) + "#; + let req = Request::get("http://example.com"); + let result = process_esi_document(input, req).expect("Processing should succeed"); + + assert!(result.contains("12"), "Result was: {}", result); +} + +#[test] +fn test_user_defined_function_multiply() { + init_logs(); + + let input = r#" + + + + Result: $multiply(6, 7) + "#; + let req = Request::get("http://example.com"); + let result = process_esi_document(input, req).expect("Processing should succeed"); + + assert!(result.contains("42"), "Result was: {}", result); +} + +#[test] +fn test_user_defined_function_is_odd() { + init_logs(); + + let input = r#" + + + + + + + + + + + $is_odd(3) + "#; + let req = Request::get("http://example.com"); + let result = process_esi_document(input, req).expect("Processing should succeed"); + + assert!(result.contains("yes"), "Result was: {}", result); +} + +#[test] +fn test_user_defined_function_sum_with_foreach() { + init_logs(); + + let input = r#" + + + + + + + + $sum(1, 2, 3, 4) + "#; + let req = Request::get("http://example.com"); + let result = process_esi_document(input, req).expect("Processing should succeed"); + + assert!(result.contains("10"), "Result was: {}", result); +} + +#[test] +fn test_user_defined_function_recursive_addv() { + init_logs(); + + let input = r#" + + + + + + + + + + + + + + + $addv(5, 10, 15) + "#; + let req = Request::get("http://example.com"); + let result = process_esi_document(input, req).expect("Processing should succeed"); + + assert!(result.contains("30"), "Result was: {}", result); +} + +#[test] +fn test_user_defined_function_recursive_factorial() { + init_logs(); + + let input = r#" + + + + + + + + + + + $factorial(5) + "#; + let req = Request::get("http://example.com"); + let result = process_esi_document(input, req).expect("Processing should succeed"); + + assert!(result.contains("120"), "Result was: {}", result); +} From 378da1b10a347d2bb54a495b53d05390c456610d Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Sun, 1 Mar 2026 16:40:59 -0600 Subject: [PATCH 086/119] Refactor ESI element processing by introducing ElementHandler trait - Created a new `element_handler.rs` file to define the `ElementHandler` trait. - Implemented shared processing logic for both streaming (`DocumentHandler`) and function contexts (`FunctionHandler`). - Updated `expression.rs` and `lib.rs` to utilize the new trait for handling ESI elements. - Simplified element processing by consolidating common logic into default methods within the trait. - Ensured that context-specific behaviors for tags like `on_return`, `on_include`, `on_eval`, `on_try`, and `on_function` are clearly defined and handled appropriately. --- esi/src/element_handler.rs | 287 +++++++++++++++ esi/src/expression.rs | 233 ++++-------- esi/src/lib.rs | 720 ++++++++++++++----------------------- 3 files changed, 623 insertions(+), 617 deletions(-) create mode 100644 esi/src/element_handler.rs diff --git a/esi/src/element_handler.rs b/esi/src/element_handler.rs new file mode 100644 index 0000000..079f2a9 --- /dev/null +++ b/esi/src/element_handler.rs @@ -0,0 +1,287 @@ +//! Shared element processing trait used by both streaming (`Processor`) and +//! expression-evaluation (`call_user_function`) contexts. +//! +//! # Design +//! +//! Both processing contexts handle the same set of ESI tags but differ in +//! exactly four behaviours: +//! +//! | Hook | Streaming (`DocumentHandler`) | Function (`FunctionHandler`) | +//! |-----------------|----------------------------------------|--------------------------------------------| +//! | `on_return` | ignore (no return concept at top level)| evaluate & signal `Flow::Return(val)` | +//! | `on_include` | dispatch & enqueue the fragment | error – not allowed in function bodies | +//! | `on_eval` | fetch, parse, re-process | error – not allowed in function bodies | +//! | `on_try` | build parallel-fetch queues | ignore (no dispatcher available) | +//! | `on_function` | register in context | error – nested definitions not supported | +//! +//! Everything else – `Text`/`Html`/`Expr` output, `Assign`, `Vars`, `Choose`, +//! `Foreach`, `Break` – is implemented once as default methods on this trait. + +use bytes::Bytes; + +use crate::{ + expression::{eval_expr, EvalContext, Value}, + parser_types::{Element, Expr, IncludeAttributes, Tag, WhenBranch}, + Result, +}; + +/// Unified control-flow signal returned by every element-processing step. +pub(crate) enum Flow { + /// Keep going with the next element. + Continue, + /// Exit the nearest enclosing `esi:foreach` loop. + Break, + /// Return from the enclosing user-defined function with the given value. + Return(Value), +} + +/// Trait that abstracts over both ESI processing contexts. +/// +/// Implementors provide context-specific behaviour through the required hooks; +/// all shared tag-handling logic lives in the default method implementations. +pub(crate) trait ElementHandler { + // ------------------------------------------------------------------------- + // Required: context access + // ------------------------------------------------------------------------- + + /// Mutable access to the evaluation context (variables, request metadata, …). + fn ctx(&mut self) -> &mut EvalContext; + + /// Write bytes to the context-appropriate output + /// (directly to a `Write` for streaming, or to a `Vec` for functions). + fn write_bytes(&mut self, bytes: Bytes) -> Result<()>; + + // ------------------------------------------------------------------------- + // Required: context-specific hooks + // ------------------------------------------------------------------------- + + /// Handle ``. + /// Streaming: ignore (returns `Flow::Continue`). + /// Function: evaluate `value`, return `Flow::Return(val)`. + fn on_return(&mut self, value: &Expr) -> Result; + + /// Handle ``. + /// Streaming: dispatch the fragment request and enqueue it. + /// Function: return an error. + fn on_include(&mut self, attrs: IncludeAttributes) -> Result; + + /// Handle ``. + /// Streaming: fetch the fragment, parse it as ESI, re-process in current context. + /// Function: return an error. + fn on_eval(&mut self, attrs: IncludeAttributes) -> Result; + + /// Handle ``. + /// Streaming: build parallel-dispatch queues for each attempt and the except clause. + /// Function: ignore (returns `Flow::Continue`). + fn on_try( + &mut self, + attempt_events: Vec>, + except_events: Vec, + ) -> Result; + + /// Handle ``. + /// Streaming: register in the evaluation context. + /// Function: return an error (nested definitions are not supported). + fn on_function(&mut self, name: String, body: Vec) -> Result; + + /// Non-blocking check for completed fragment requests, flushing any ready output. + /// + /// Called after processing each top-level element in the main parse loop. + /// Default is a no-op — only meaningful in the streaming context. + fn process_ready_queue_items(&mut self) -> Result<()> { + Ok(()) + } + + // ------------------------------------------------------------------------- + // Default: shared dispatch + // ------------------------------------------------------------------------- + + /// Dispatch a single element to the appropriate handler. + /// + /// All context-neutral tags call shared default helpers; context-specific + /// tags call the required hooks above. + fn process(&mut self, element: &Element) -> Result { + match element { + Element::Text(text) | Element::Html(text) => { + self.write_bytes(text.clone())?; + Ok(Flow::Continue) + } + + Element::Expr(expr) => { + match eval_expr(expr, self.ctx()) { + Ok(val) if !matches!(val, Value::Null) => { + let bytes = val.to_bytes(); + if !bytes.is_empty() { + self.write_bytes(bytes)?; + } + } + _ => {} // Skip null values or evaluation errors + } + Ok(Flow::Continue) + } + + Element::Esi(Tag::Assign { + name, + subscript, + value, + }) => self.handle_assign(name, subscript.as_ref(), value), + + Element::Esi(Tag::Vars { name }) => self.handle_vars(name.as_deref()), + + Element::Esi(Tag::Include { attrs }) => self.on_include(attrs.clone()), + + Element::Esi(Tag::Eval { attrs }) => self.on_eval(attrs.clone()), + + Element::Esi(Tag::Choose { + when_branches, + otherwise_events, + }) => self.handle_choose(when_branches, otherwise_events), + + Element::Esi(Tag::Foreach { + collection, + item, + content, + }) => self.handle_foreach(collection, item.as_deref(), content), + + Element::Esi(Tag::Break) => Ok(Flow::Break), + + Element::Esi(Tag::Try { + attempt_events, + except_events, + }) => self.on_try(attempt_events.clone(), except_events.clone()), + + Element::Esi(Tag::Function { name, body }) => { + self.on_function(name.clone(), body.clone()) + } + + Element::Esi(Tag::Return { value }) => self.on_return(value), + + // Other standalone tags (e.g. Otherwise, When, Attempt, Except at + // top level) are parser artefacts that should never appear here. + Element::Esi(_) => Ok(Flow::Continue), + } + } + + // ------------------------------------------------------------------------- + // Default: shared tag handlers + // ------------------------------------------------------------------------- + + /// Handle `` — shared between both contexts. + fn handle_assign( + &mut self, + name: &str, + subscript: Option<&Expr>, + value: &Expr, + ) -> Result { + // Evaluate right-hand side; on error use empty string (ESI leniency) + let val = eval_expr(value, self.ctx()).unwrap_or(Value::Text("".into())); + + if let Some(subscript_expr) = subscript { + // Subscript assignment: modify existing collection + if let Ok(subscript_val) = eval_expr(subscript_expr, self.ctx()) { + let key_str = subscript_val.to_string(); + self.ctx().set_variable(name, Some(&key_str), val)?; + } + } else { + // Regular assignment + self.ctx().set_variable(name, None, val)?; + } + Ok(Flow::Continue) + } + + /// Handle `` — sets the match-capture variable name. + fn handle_vars(&mut self, name: Option<&str>) -> Result { + if let Some(n) = name { + self.ctx().set_match_name(n); + } + Ok(Flow::Continue) + } + + /// Handle `` — evaluate when-branches in order, + /// fall through to otherwise if none match. + fn handle_choose( + &mut self, + when_branches: &[WhenBranch], + otherwise_events: &[Element], + ) -> Result { + let mut chose_branch = false; + + for when_branch in when_branches { + if let Some(ref match_name) = when_branch.match_name { + self.ctx().set_match_name(match_name); + } + + match eval_expr(&when_branch.test, self.ctx()) { + Ok(test_result) if test_result.to_bool() => { + // This branch matches - recursively process it + for elem in &when_branch.content { + match self.process(elem)? { + Flow::Continue => continue, + other => return Ok(other), + } + } + chose_branch = true; + break; + } + _ => continue, + } + } + + // No when matched - process otherwise + if !chose_branch { + for elem in otherwise_events { + match self.process(elem)? { + Flow::Continue => continue, + other => return Ok(other), + } + } + } + + Ok(Flow::Continue) + } + + /// Handle ``. + fn handle_foreach( + &mut self, + collection: &Expr, + item: Option<&str>, + content: &[Element], + ) -> Result { + // Evaluate the collection expression + let collection_value = eval_expr(collection, self.ctx()).unwrap_or(Value::Null); + + // Convert to a list if needed + let items = match &collection_value { + Value::List(items) => items.clone(), + Value::Dict(map) => map + .iter() + .map(|(k, v)| { + // Convert dict entries to a list of 2-element lists [key, value] + Value::List(vec![Value::Text(k.clone().into()), v.clone()]) + }) + .collect(), + Value::Null => Vec::new(), + other => vec![other.clone()], // Treat single values as a list of one + }; + + // Default item variable name if not specified + let item_var = item.unwrap_or("item").to_string(); + + // Iterate through items + 'foreach: for item_value in items { + // Set the item variable + self.ctx().set_variable(&item_var, None, item_value)?; + + // Process content for this iteration + for elem in content { + match self.process(elem)? { + Flow::Continue => continue, + Flow::Break => break 'foreach, + ret @ Flow::Return(_) => return Ok(ret), + } + } + } + + Ok(Flow::Continue) + } +} diff --git a/esi/src/expression.rs b/esi/src/expression.rs index 0026d2e..bde1f1a 100644 --- a/esi/src/expression.rs +++ b/esi/src/expression.rs @@ -5,9 +5,10 @@ use regex::RegexBuilder; use std::{collections::HashMap, fmt::Display}; use crate::{ + element_handler::{ElementHandler, Flow}, functions, literals::*, - parser_types::{Element, Expr, Operator}, + parser_types::{Element, Expr, IncludeAttributes, Operator}, ExecutionError, Result, }; @@ -356,7 +357,7 @@ pub struct EvalContext { /// Registry for user-defined ESI functions function_registry: FunctionRegistry, /// Maximum recursion depth for user-defined function calls (per ESI spec, default: 5) - max_function_recursion_depth: usize, + function_recursion_depth: usize, } impl Default for EvalContext { fn default() -> Self { @@ -374,7 +375,7 @@ impl Default for EvalContext { is_uncacheable: false, args_stack: Vec::new(), function_registry: FunctionRegistry::new(), - max_function_recursion_depth: 5, + function_recursion_depth: 5, } } } @@ -397,7 +398,7 @@ impl EvalContext { is_uncacheable: false, args_stack: Vec::new(), function_registry: FunctionRegistry::new(), - max_function_recursion_depth: 5, + function_recursion_depth: 5, } } @@ -706,7 +707,7 @@ impl EvalContext { /// Set maximum recursion depth for user-defined function calls pub const fn set_max_function_recursion_depth(&mut self, depth: usize) { - self.max_function_recursion_depth = depth; + self.function_recursion_depth = depth; } } @@ -894,167 +895,61 @@ fn dict_to_string(map: &HashMap) -> String { parts.join("&") } -/// Process a single element within a function body, accumulating output and checking for returns -/// This is a helper for call_user_function that can be called recursively +/// Element handler for user-defined function bodies. /// -/// Returns: -/// - `Ok(Some(value))` - Explicit return with value -/// - `Ok(None)` - Continue processing -fn process_function_element( - element: &Element, - output: &mut Vec, - ctx: &mut EvalContext, - should_break: &mut bool, -) -> Result> { - match element { - Element::Esi(crate::parser_types::Tag::Return { value }) => { - // Evaluate the return expression and signal early return - Ok(Some(eval_expr(value, ctx)?)) - } - Element::Text(text) | Element::Html(text) => { - output.extend_from_slice(text); - Ok(None) - } - Element::Expr(expr) => { - let value = eval_expr(expr, ctx)?; - output.extend_from_slice(value.to_bytes().as_ref()); - Ok(None) - } - Element::Esi(crate::parser_types::Tag::Assign { - name, - subscript, - value, - }) => { - let val = eval_expr(value, ctx)?; - let subscript_str = if let Some(sub_expr) = subscript { - Some(eval_expr(sub_expr, ctx)?.to_string()) - } else { - None - }; - ctx.set_variable(name, subscript_str.as_deref(), val)?; - Ok(None) - } - Element::Esi(crate::parser_types::Tag::Vars { name }) => { - if let Some(match_name) = name { - ctx.set_match_name(match_name); - } - Ok(None) - } - Element::Esi(crate::parser_types::Tag::Choose { - when_branches, - otherwise_events, - }) => { - let mut chose_branch = false; - - for when_branch in when_branches { - if let Some(ref match_name) = when_branch.match_name { - ctx.set_match_name(match_name); - } +/// Writes evaluated output to an in-memory `Vec`; signals `Return` or +/// `Break` back to the caller via `Flow`. +struct FunctionHandler<'a> { + ctx: &'a mut EvalContext, + output: &'a mut Vec, +} - match eval_expr(&when_branch.test, ctx) { - Ok(test_result) if test_result.to_bool() => { - // This branch matches - recursively process it - for elem in &when_branch.content { - if let Some(return_val) = process_function_element(elem, output, ctx, should_break)? { - return Ok(Some(return_val)); - } - if *should_break { - return Ok(None); - } - } - chose_branch = true; - break; - } - _ => continue, - } - } +impl ElementHandler for FunctionHandler<'_> { + fn ctx(&mut self) -> &mut EvalContext { + self.ctx + } - // No when matched - process otherwise - if !chose_branch { - for elem in otherwise_events { - if let Some(return_val) = process_function_element(elem, output, ctx, should_break)? { - return Ok(Some(return_val)); - } - if *should_break { - return Ok(None); - } - } - } - Ok(None) - } - Element::Esi(crate::parser_types::Tag::Foreach { - collection, - item, - content, - }) => { - // Evaluate the collection expression - let collection_value = eval_expr(collection, ctx)?; - - // Convert to a list if needed - let items = match &collection_value { - Value::List(items) => items.clone(), - Value::Dict(map) => { - // Convert dict to list of [key, value] pairs - map.iter() - .map(|(k, v)| { - Value::List(vec![Value::Text(k.clone().into()), v.clone()]) - }) - .collect() - } - Value::Null => Vec::new(), - other => vec![other.clone()], // Treat single values as a list of one - }; + fn write_bytes(&mut self, bytes: bytes::Bytes) -> Result<()> { + self.output.extend_from_slice(&bytes); + Ok(()) + } - // Default item variable name if not specified - let item_var = item.as_ref().map(|s| s.as_str()).unwrap_or("item"); + /// Evaluate the return expression and signal an early exit from the function body. + fn on_return(&mut self, value: &Expr) -> Result { + let val = eval_expr(value, self.ctx)?; + Ok(Flow::Return(val)) + } - // Iterate through items - for item_value in items { - // Set the item variable - ctx.set_variable(item_var, None, item_value)?; + /// Per ESI spec: `esi:include` is not allowed inside function bodies. + fn on_include(&mut self, _attrs: IncludeAttributes) -> Result { + Err(ExecutionError::FunctionError( + "esi:include is not allowed in function bodies".to_string(), + )) + } - // Process content for this iteration - for elem in content { - if let Some(return_val) = process_function_element(elem, output, ctx, should_break)? { - return Ok(Some(return_val)); - } - if *should_break { - break; // Break out of foreach loop - } - } - if *should_break { - *should_break = false; // Reset break flag after exiting loop - break; - } - } - Ok(None) - } - Element::Esi(crate::parser_types::Tag::Break) => { - // Signal break to exit foreach loop - *should_break = true; - Ok(None) - } - // Per ESI spec: functions cannot contain include, eval, or nested function definitions - Element::Esi(crate::parser_types::Tag::Include { .. }) => { - Err(ExecutionError::FunctionError( - "esi:include is not allowed in function bodies".to_string(), - )) - } - Element::Esi(crate::parser_types::Tag::Eval { .. }) => { - Err(ExecutionError::FunctionError( - "esi:eval is not allowed in function bodies".to_string(), - )) - } - Element::Esi(crate::parser_types::Tag::Function { .. }) => { - Err(ExecutionError::FunctionError( - "esi:function is not allowed in function bodies (nested function definitions are not supported)".to_string(), - )) - } + /// Per ESI spec: `esi:eval` is not allowed inside function bodies. + fn on_eval(&mut self, _attrs: IncludeAttributes) -> Result { + Err(ExecutionError::FunctionError( + "esi:eval is not allowed in function bodies".to_string(), + )) + } + + /// `esi:try` requires a dispatcher; silently ignore inside function bodies. + fn on_try( + &mut self, + _attempt_events: Vec>, + _except_events: Vec, + ) -> Result { // Try/Except would require dispatcher context which isn't available in expression evaluation // Silently ignore for now (could also error) - Element::Esi(crate::parser_types::Tag::Try { .. }) => Ok(None), - // Other tags that shouldn't appear - silently ignore - _ => Ok(None), + Ok(Flow::Continue) + } + + /// Per ESI spec: nested function definitions are not supported. + fn on_function(&mut self, _name: String, _body: Vec) -> Result { + Err(ExecutionError::FunctionError( + "esi:function is not allowed in function bodies (nested function definitions are not supported)".to_string(), + )) } } @@ -1079,29 +974,31 @@ fn call_user_function( ctx: &mut EvalContext, ) -> Result { // Check recursion depth before proceeding - if ctx.args_stack.len() >= ctx.max_function_recursion_depth { + if ctx.args_stack.len() >= ctx.function_recursion_depth { return Err(ExecutionError::FunctionError(format!( "Maximum recursion depth ({}) exceeded for function '{}'", - ctx.max_function_recursion_depth, name + ctx.function_recursion_depth, name ))); } // Push arguments onto the stack for $(ARGS) access ctx.push_args(args.to_vec()); - // Process function body, catching any errors to ensure cleanup + // Process function body via the shared ElementHandler trait, catching any + // errors to ensure cleanup let result = (|| { let mut output = Vec::new(); - let mut should_break = false; + let mut handler = FunctionHandler { + ctx, + output: &mut output, + }; for element in body { - if let Some(return_value) = - process_function_element(element, &mut output, ctx, &mut should_break)? - { - return Ok(return_value); + match handler.process(element)? { + Flow::Continue => continue, + Flow::Return(value) => return Ok(value), + Flow::Break => continue, // Break at top level - ignore } - // Break at function level doesn't make sense - ignore the flag - should_break = false; } // No explicit return - return accumulated output as text diff --git a/esi/src/lib.rs b/esi/src/lib.rs index 0933a7d..37898fd 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -2,6 +2,7 @@ pub mod cache; mod config; +mod element_handler; mod error; mod expression; mod functions; @@ -9,8 +10,9 @@ mod literals; mod parser; pub mod parser_types; +use crate::element_handler::{ElementHandler, Flow}; use crate::expression::EvalContext; -use crate::parser_types::{DcaMode, Expr}; +use crate::parser_types::{DcaMode, Element, Expr, IncludeAttributes}; use bytes::{Buf, Bytes, BytesMut}; use fastly::http::request::{PendingRequest, PollResult}; use fastly::http::{header, Method, StatusCode, Url}; @@ -171,6 +173,259 @@ pub struct Processor { queue: VecDeque, } +/// [`ElementHandler`] implementation for top-level ESI document processing. +/// +/// Pairs with [`FunctionHandler`](crate::expression::FunctionHandler) — together they are the two +/// concrete implementors of the trait, distinguished by execution context: this one drives +/// [`Processor`]'s streaming pipeline, giving the shared default methods access to the +/// output writer, the fragment dispatcher, and the ready-queue. +// +// (contrast with `FunctionHandler` in expression.rs, which drives user-defined function bodies) +struct DocumentHandler<'a, W: Write> { + processor: &'a mut Processor, + output: &'a mut W, + dispatcher: &'a FragmentRequestDispatcher, + /// Optional response post-processor; needed by `process_ready_queue_items`. + response_processor: Option<&'a FragmentResponseProcessor>, +} + +impl ElementHandler for DocumentHandler<'_, W> { + fn ctx(&mut self) -> &mut EvalContext { + &mut self.processor.ctx + } + + fn process_ready_queue_items(&mut self) -> crate::Result<()> { + self.processor.process_ready_queue_items( + self.output, + self.dispatcher, + self.response_processor, + ) + } + + fn write_bytes(&mut self, bytes: Bytes) -> crate::Result<()> { + if self.processor.queue.is_empty() { + // Not blocked - write immediately + self.output + .write_all(&bytes) + .map_err(ESIError::WriterError)?; + } else { + // Blocked by a pending fragment - enqueue for later + self.processor + .queue + .push_back(QueuedElement::Content(bytes)); + } + Ok(()) + } + + fn on_return(&mut self, _value: &Expr) -> crate::Result { + // Return tags should only appear inside function bodies, not at the streaming level + Ok(Flow::Continue) + } + + fn on_include(&mut self, attrs: IncludeAttributes) -> crate::Result { + let queued_element = self + .processor + .process_include_tag(&attrs, self.dispatcher)?; + self.processor.queue.push_back(queued_element); + Ok(Flow::Continue) + } + + /// Handle `` — BLOCKING operation that fetches and re-processes content as ESI. + /// + /// The `dca` attribute controls processing mode: + /// - `dca="none"` (default): fragment executed in parent's context (shared variables). + /// - `dca="esi"`: fragment executed in an isolated context (output only, no variable leakage). + fn on_eval(&mut self, attrs: IncludeAttributes) -> crate::Result { + // Build and dispatch the request (same machinery as include, but blocking) + let queued_element = self + .processor + .dispatch_include_to_element(&attrs, self.dispatcher)?; + + match queued_element { + QueuedElement::Include(fragment) => { + // Eval is BLOCKING - wait for the response immediately + let response = fragment.pending_fragment.wait()?; + + if !response.get_status().is_success() { + if fragment.metadata.continue_on_error { + // Per ESI spec: onerror="continue" deletes the tag with no output + return Ok(Flow::Continue); + } else { + return Err(ExecutionError::ExpressionError(format!( + "Eval request failed with status: {}", + response.get_status() + ))); + } + } + + // Get the response body + let body_bytes = response.into_body_bytes(); + let body_as_bytes = Bytes::from(body_bytes); + + // ALWAYS parse as ESI (this is the key difference from include) + let (rest, elements) = parser::parse_remainder(&body_as_bytes).map_err(|e| { + ExecutionError::ExpressionError(format!("Failed to parse eval fragment: {}", e)) + })?; + + if !rest.is_empty() { + return Err(ExecutionError::ExpressionError( + "Incomplete parse of eval fragment".to_string(), + )); + } + + if fragment.metadata.dca == DcaMode::Esi { + // dca="esi": TWO-PHASE processing + // Phase 1: Process fragment in ISOLATED context + let dispatcher = self.dispatcher; // Copy the fat pointer (it's a reference) + let mut isolated_processor = Processor::new( + Some(self.processor.ctx.get_request().clone_without_body()), + self.processor.configuration.clone(), + ); + let mut isolated_output = Vec::new(); + + let mut isolated_handler = DocumentHandler { + processor: &mut isolated_processor, + output: &mut isolated_output, + dispatcher, + response_processor: None, + }; + for element in elements { + isolated_handler.process(&element)?; + } + + // Phase 2: Parse the isolated output as ESI and process in PARENT's context + // This is why variables don't leak: they only exist in phase 1 + let isolated_bytes = Bytes::from(isolated_output); + let (rest, output_elements) = parser::parse_remainder(&isolated_bytes) + .map_err(|e| { + ExecutionError::ExpressionError(format!( + "Failed to parse eval isolated output: {}", + e + )) + })?; + + if !rest.is_empty() { + return Err(ExecutionError::ExpressionError( + "Incomplete parse of eval isolated output".to_string(), + )); + } + + for element in output_elements { + if matches!(self.process(&element)?, Flow::Break) { + return Ok(Flow::Break); + } + } + } else { + // dca="none": SINGLE-PHASE processing in PARENT's context + // Fragment included first, then executed in parent (variables affect parent) + for element in elements { + if matches!(self.process(&element)?, Flow::Break) { + return Ok(Flow::Break); // Propagate break from eval'd content + } + } + } + + Ok(Flow::Continue) + } + QueuedElement::Content(_) => { + // Error with continue_on_error - insert nothing per spec + Ok(Flow::Continue) + } + _ => unreachable!("dispatch_include_to_element should only return Include or Content"), + } + } + + fn on_try( + &mut self, + attempt_events: Vec>, + except_events: Vec, + ) -> crate::Result { + let mut attempt_queues = Vec::new(); + + for attempt in attempt_events { + let attempt_queue = self.build_attempt_queue(attempt)?; + attempt_queues.push(attempt_queue); + } + + // Process except clause elements + let except_queue = self.build_attempt_queue(except_events)?; + + // Add the try block to the queue with all attempts and except dispatched + self.processor.queue.push_back(QueuedElement::Try { + attempt_elements: attempt_queues, + except_elements: except_queue, + }); + Ok(Flow::Continue) + } + + fn on_function(&mut self, name: String, body: Vec) -> crate::Result { + // Register user-defined function in the evaluation context + self.processor.ctx.register_function(name, body); + Ok(Flow::Continue) + } +} + +impl DocumentHandler<'_, W> { + /// Build a pre-dispatch queue for use inside a `` attempt or except block. + /// + /// Text/html/expressions and includes are resolved and queued immediately (for parallel + /// fetching); `choose` and nested `try` blocks are processed inline via the trait. + fn build_attempt_queue( + &mut self, + elements: Vec, + ) -> crate::Result> { + use parser_types::{Element, Tag}; + + let mut queue = Vec::new(); + + for elem in elements { + match elem { + Element::Text(text) => { + queue.push(QueuedElement::Content(text)); + } + Element::Html(html) => { + queue.push(QueuedElement::Content(html)); + } + Element::Expr(ref expr) => match expression::eval_expr(expr, self.ctx()) { + Ok(value) => { + if !matches!(value, expression::Value::Null) { + let bytes = value.to_bytes(); + if !bytes.is_empty() { + queue.push(QueuedElement::Content(bytes)); + } + } + } + Err(e) => { + debug!("Expression evaluation failed: {e:?}"); + } + }, + Element::Esi(Tag::Include { ref attrs }) => { + // Dispatch the include and add to queue + let queued_element = + self.processor.process_include_tag(attrs, self.dispatcher)?; + queue.push(queued_element); + } + Element::Esi(Tag::Choose { + ref when_branches, + ref otherwise_events, + }) => { + // Evaluate and process chosen branch inline + self.handle_choose(when_branches, otherwise_events)?; + // Note: breaks within try blocks don't propagate out + } + Element::Esi(Tag::Try { .. }) => { + // Nested try blocks - process recursively + self.process(&elem)?; + // Note: breaks within try blocks don't propagate out + } + Element::Esi(_) => {} + } + } + + Ok(queue) + } +} + impl Processor { pub fn new(original_request_metadata: Option, configuration: Configuration) -> Self { let mut ctx = EvalContext::new(); @@ -440,16 +695,17 @@ impl Processor { match parse_result { Ok((remaining, elements)) => { // Successfully parsed some elements + let mut handler = DocumentHandler { + processor: self, + output: output_writer, + dispatcher, + response_processor: process_fragment_response, + }; for element in elements { - let _ = - self.process_element_streaming(element, output_writer, dispatcher)?; // Note: breaks at top-level are ignored + handler.process(&element)?; // After each element, check if any queued includes are ready (non-blocking poll) - self.process_ready_queue_items( - output_writer, - dispatcher, - process_fragment_response, - )?; + handler.process_ready_queue_items()?; } // Calculate how many bytes were consumed @@ -505,444 +761,6 @@ impl Processor { Ok(()) } - /// Handle text or HTML content elements - fn handle_content(&mut self, text: Bytes, output_writer: &mut impl Write) -> Result { - if self.queue.is_empty() { - // Not blocked - write immediately - output_writer.write_all(&text)?; - } else { - // Blocked - queue it - self.queue.push_back(QueuedElement::Content(text)); - } - Ok(false) - } - - /// Handle expression evaluation and output - fn handle_expr(&mut self, expr: Expr, output_writer: &mut impl Write) -> Result { - match expression::eval_expr(&expr, &mut self.ctx) { - Ok(val) if !matches!(val, expression::Value::Null) => { - let bytes = val.to_bytes(); - if !bytes.is_empty() { - if self.queue.is_empty() { - output_writer.write_all(&bytes)?; - } else { - self.queue.push_back(QueuedElement::Content(bytes)); - } - } - } - _ => {} // Skip null or error - } - Ok(false) - } - - /// Handle variable assignment - fn handle_assign(&mut self, name: &str, subscript: Option, value: &Expr) -> Result { - let val = expression::eval_expr(value, &mut self.ctx) - .unwrap_or(expression::Value::Text("".into())); - - if let Some(subscript_expr) = subscript { - // Subscript assignment: modify existing collection - if let Ok(subscript_val) = expression::eval_expr(&subscript_expr, &mut self.ctx) { - let key_str = subscript_val.to_string(); - self.ctx.set_variable(name, Some(&key_str), val)?; - } - } else { - // Regular assignment without subscript - self.ctx.set_variable(name, None, val)?; - } - Ok(false) - } - - /// Handle esi:vars tag (sets match name) - fn handle_vars(&mut self, name: Option) -> Result { - if let Some(n) = name { - self.ctx.set_match_name(&n); - } - Ok(false) - } - - /// Handle esi:include tag - fn handle_include( - &mut self, - attrs: &parser_types::IncludeAttributes, - dispatcher: &FragmentRequestDispatcher, - ) -> Result { - let queued_element = self.process_include_tag(attrs, dispatcher)?; - self.queue.push_back(queued_element); - Ok(false) - } - - /// Handle esi:eval tag - BLOCKING operation that fetches and evaluates content as ESI - /// The dca attribute determines how eval processes the fragment: - /// - dca="none" (default): Fragment executed in parent's context (shared variables) - /// - dca="esi": Fragment executed in isolated context (output only) - fn handle_eval( - &mut self, - attrs: &parser_types::IncludeAttributes, - dispatcher: &FragmentRequestDispatcher, - output_writer: &mut impl Write, - ) -> Result { - // Build and dispatch the request (similar to include) - let queued_element = self.dispatch_include_to_element(attrs, dispatcher)?; - - // Eval is BLOCKING - wait for the response immediately - match queued_element { - QueuedElement::Include(fragment) => { - // Wait for the fragment to complete - let response = fragment.pending_fragment.wait()?; - - // Check if successful - if !response.get_status().is_success() { - if fragment.metadata.continue_on_error { - // Per ESI spec: onerror="continue" deletes the tag with no output - return Ok(false); - } else { - return Err(ExecutionError::ExpressionError(format!( - "Eval request failed with status: {}", - response.get_status() - ))); - } - } - - // Get the response body - let body_bytes = response.into_body_bytes(); - let body_as_bytes = Bytes::from(body_bytes); - - // ALWAYS parse as ESI (this is the key difference from include) - let (rest, elements) = parser::parse_remainder(&body_as_bytes).map_err(|e| { - ExecutionError::ExpressionError(format!("Failed to parse eval fragment: {}", e)) - })?; - - if !rest.is_empty() { - return Err(ExecutionError::ExpressionError( - "Incomplete parse of eval fragment".to_string(), - )); - } - - // Check dca mode to determine processing context - if fragment.metadata.dca == DcaMode::Esi { - // dca="esi": TWO-PHASE processing - // Phase 1: Process fragment in ISOLATED context - let mut isolated_processor = Self::new( - Some(self.ctx.get_request().clone_without_body()), - self.configuration.clone(), - ); - let mut isolated_output = Vec::new(); - - for element in elements { - isolated_processor.process_element_streaming( - element, - &mut isolated_output, - dispatcher, - )?; - } - - // Phase 2: Parse the isolated output as ESI and process in PARENT's context - // This is why variables don't leak: they only exist in phase 1 - let isolated_bytes = Bytes::from(isolated_output); - let (rest, output_elements) = parser::parse_remainder(&isolated_bytes) - .map_err(|e| { - ExecutionError::ExpressionError(format!( - "Failed to parse eval isolated output: {}", - e - )) - })?; - - if !rest.is_empty() { - return Err(ExecutionError::ExpressionError( - "Incomplete parse of eval isolated output".to_string(), - )); - } - - for element in output_elements { - let break_encountered = - self.process_element_streaming(element, output_writer, dispatcher)?; - if break_encountered { - return Ok(true); - } - } - } else { - // dca="none": SINGLE-PHASE processing in PARENT's context - // Fragment included first, then executed in parent (variables affect parent) - for element in elements { - let break_encountered = - self.process_element_streaming(element, output_writer, dispatcher)?; - if break_encountered { - return Ok(true); // Propagate break from eval'd content - } - } - } - - Ok(false) - } - QueuedElement::Content(_content) => { - // Error with continue_on_error - insert nothing per spec - Ok(false) - } - _ => unreachable!("dispatch_include_to_element should only return Include or Content"), - } - } - - /// Handle esi:choose tag - fn handle_choose( - &mut self, - when_branches: Vec, - otherwise_events: Vec, - output_writer: &mut impl Write, - dispatcher: &FragmentRequestDispatcher, - ) -> Result { - let mut chose_branch = false; - - for when_branch in when_branches { - if let Some(ref match_name) = when_branch.match_name { - self.ctx.set_match_name(match_name); - } - - match expression::eval_expr(&when_branch.test, &mut self.ctx) { - Ok(test_result) if test_result.to_bool() => { - // This branch matches - recursively process it - for elem in when_branch.content { - let break_encountered = - self.process_element_streaming(elem, output_writer, dispatcher)?; - if break_encountered { - return Ok(true); // Propagate break signal - } - } - chose_branch = true; - break; - } - _ => continue, - } - } - - // No when matched - process otherwise - if !chose_branch { - for elem in otherwise_events { - let break_encountered = - self.process_element_streaming(elem, output_writer, dispatcher)?; - if break_encountered { - return Ok(true); // Propagate break signal - } - } - } - Ok(false) - } - - /// Handle esi:try tag with attempts and except clause - fn handle_try( - &mut self, - attempt_events: Vec>, - except_events: Vec, - output_writer: &mut impl Write, - dispatcher: &FragmentRequestDispatcher, - ) -> Result { - let mut attempt_queues = Vec::new(); - - for attempt in attempt_events { - let attempt_queue = self.build_attempt_queue(attempt, output_writer, dispatcher)?; - attempt_queues.push(attempt_queue); - } - - // Process except clause elements - let except_queue = self.build_attempt_queue(except_events, output_writer, dispatcher)?; - - // Add the try block to the queue with all attempts and except dispatched - self.queue.push_back(QueuedElement::Try { - attempt_elements: attempt_queues, - except_elements: except_queue, - }); - Ok(false) - } - - /// Build a queue for attempt or except blocks - fn build_attempt_queue( - &mut self, - elements: Vec, - output_writer: &mut impl Write, - dispatcher: &FragmentRequestDispatcher, - ) -> Result> { - let mut queue = Vec::new(); - - for elem in elements { - match elem { - parser_types::Element::Text(text) => { - queue.push(QueuedElement::Content(text)); - } - parser_types::Element::Html(html) => { - queue.push(QueuedElement::Content(html)); - } - parser_types::Element::Expr(expr) => { - match expression::eval_expr(&expr, &mut self.ctx) { - Ok(value) => { - if !matches!(value, expression::Value::Null) { - let bytes = value.to_bytes(); - if !bytes.is_empty() { - queue.push(QueuedElement::Content(bytes)); - } - } - } - Err(e) => { - debug!("Expression evaluation failed: {e:?}"); - } - } - } - parser_types::Element::Esi(parser_types::Tag::Include { attrs }) => { - // Dispatch the include and add to queue - let queued_element = self.process_include_tag(&attrs, dispatcher)?; - queue.push(queued_element); - } - parser_types::Element::Esi(parser_types::Tag::Choose { - when_branches, - otherwise_events, - }) => { - // Evaluate and process chosen branch inline - let mut chose_branch = false; - for when_branch in when_branches { - if let Some(match_name) = &when_branch.match_name { - self.ctx.set_match_name(match_name); - } - let test_result = expression::eval_expr(&when_branch.test, &mut self.ctx)?; - if test_result.to_bool() { - chose_branch = true; - for elem in when_branch.content { - self.process_element_streaming(elem, output_writer, dispatcher)?; - // Note: breaks within try blocks don't propagate out - } - break; - } - } - if !chose_branch { - for elem in otherwise_events { - self.process_element_streaming(elem, output_writer, dispatcher)?; - // Note: breaks within try blocks don't propagate out - } - } - } - parser_types::Element::Esi(parser_types::Tag::Try { .. }) => { - // Nested try blocks - process recursively - self.process_element_streaming(elem.clone(), output_writer, dispatcher)?; - // Note: breaks within try blocks don't propagate out - } - parser_types::Element::Esi(_) => {} - } - } - - Ok(queue) - } - - /// Handle esi:foreach tag - fn handle_foreach( - &mut self, - collection: Expr, - item: Option, - content: &[parser_types::Element], - output_writer: &mut impl Write, - dispatcher: &FragmentRequestDispatcher, - ) -> Result { - // Evaluate the collection expression - let collection_value = - expression::eval_expr(&collection, &mut self.ctx).unwrap_or(expression::Value::Null); - - // Convert to a list if needed - let items = match &collection_value { - expression::Value::List(items) => items.clone(), - expression::Value::Dict(map) => { - // Convert dict entries to a list of 2-element lists [key, value] - map.iter() - .map(|(k, v)| { - expression::Value::List(vec![ - expression::Value::Text(k.clone().into()), - v.clone(), - ]) - }) - .collect() - } - expression::Value::Null => Vec::new(), - other => vec![other.clone()], // Treat single values as a list of one - }; - - // Default item variable name if not specified - let item_var = item.unwrap_or_else(|| "item".to_string()); - - // Iterate through items - 'foreach_loop: for item_value in items { - // Set the item variable - self.ctx.set_variable(&item_var, None, item_value)?; - - // Process content for this iteration - for elem in content { - let break_encountered = - self.process_element_streaming(elem.clone(), output_writer, dispatcher)?; - if break_encountered { - break 'foreach_loop; - } - } - } - Ok(false) - } - - /// Process a single element in streaming mode - fn process_element_streaming( - &mut self, - element: parser_types::Element, - output_writer: &mut impl Write, - dispatcher: &FragmentRequestDispatcher, - ) -> Result { - use parser_types::{Element, Tag}; - - match element { - Element::Text(text) | Element::Html(text) => self.handle_content(text, output_writer), - - Element::Expr(expr) => self.handle_expr(expr, output_writer), - - Element::Esi(Tag::Assign { - name, - subscript, - value, - }) => self.handle_assign(&name, subscript, &value), - - Element::Esi(Tag::Vars { name }) => self.handle_vars(name), - - Element::Esi(Tag::Include { attrs }) => self.handle_include(&attrs, dispatcher), - - Element::Esi(Tag::Eval { attrs }) => { - self.handle_eval(&attrs, dispatcher, output_writer) - } - - Element::Esi(Tag::Choose { - when_branches, - otherwise_events, - }) => self.handle_choose(when_branches, otherwise_events, output_writer, dispatcher), - - Element::Esi(Tag::Try { - attempt_events, - except_events, - }) => self.handle_try(attempt_events, except_events, output_writer, dispatcher), - - Element::Esi(Tag::Foreach { - collection, - item, - content, - }) => self.handle_foreach(collection, item, &content, output_writer, dispatcher), - - Element::Esi(Tag::Break) => Ok(true), - - Element::Esi(Tag::Function { name, body }) => { - // Register user-defined function in the evaluation context - self.ctx.register_function(name, body); - Ok(false) - } - - Element::Esi(Tag::Return { .. }) => { - // Return tags should only appear inside function bodies, not at top level - // Ignore at top level - Ok(false) - } - - Element::Esi(_) => Ok(false), // Other standalone tags shouldn't appear - } - } - /// Evaluate an Expr to a Bytes value for use in includes /// Handles variable resolution, function calls, and string interpolation fn evaluate_expr_to_bytes(&mut self, expr: &Expr) -> Result { @@ -1527,10 +1345,14 @@ impl Processor { } // Process each element in the current namespace + let mut handler = DocumentHandler { + processor: self, + output: output_writer, + dispatcher, + response_processor: None, + }; for element in elements { - let break_encountered = - self.process_element_streaming(element, output_writer, dispatcher)?; - if break_encountered { + if matches!(handler.process(&element)?, Flow::Break) { return Ok(()); // Break from foreach, stop processing } } From a13aef328055c3cd3aac349b170c8666f5483126 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Sun, 1 Mar 2026 21:50:49 -0600 Subject: [PATCH 087/119] fix(try): includes inside attempt blocks now see correct variable state --- esi/src/lib.rs | 510 +++++++++++++---------------------------- esi/tests/esi-tests.rs | 319 ++++++++++++++++++++++++++ 2 files changed, 484 insertions(+), 345 deletions(-) diff --git a/esi/src/lib.rs b/esi/src/lib.rs index 37898fd..552536c 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -95,11 +95,11 @@ enum QueuedElement { Content(Bytes), /// A dispatched include waiting to be executed Include(Box), - /// A try block with attempts and except clause - /// All includes from all attempts have been dispatched in parallel + /// A try block with unevaluated attempt/except elements. + /// Elements are executed lazily in document order when the block is drained. Try { - attempt_elements: Vec>, - except_elements: Vec, + attempt_elements: Vec>, + except_elements: Vec, }, } @@ -225,7 +225,7 @@ impl ElementHandler for DocumentHandler<'_, W> { fn on_include(&mut self, attrs: IncludeAttributes) -> crate::Result { let queued_element = self .processor - .process_include_tag(&attrs, self.dispatcher)?; + .dispatch_include_to_element(&attrs, self.dispatcher)?; self.processor.queue.push_back(queued_element); Ok(Flow::Continue) } @@ -276,7 +276,8 @@ impl ElementHandler for DocumentHandler<'_, W> { if fragment.metadata.dca == DcaMode::Esi { // dca="esi": TWO-PHASE processing // Phase 1: Process fragment in ISOLATED context - let dispatcher = self.dispatcher; // Copy the fat pointer (it's a reference) + // Reborrow before the exclusive borrow of self.processor below + let dispatcher = self.dispatcher; let mut isolated_processor = Processor::new( Some(self.processor.ctx.get_request().clone_without_body()), self.processor.configuration.clone(), @@ -340,20 +341,12 @@ impl ElementHandler for DocumentHandler<'_, W> { attempt_events: Vec>, except_events: Vec, ) -> crate::Result { - let mut attempt_queues = Vec::new(); - - for attempt in attempt_events { - let attempt_queue = self.build_attempt_queue(attempt)?; - attempt_queues.push(attempt_queue); - } - - // Process except clause elements - let except_queue = self.build_attempt_queue(except_events)?; - - // Add the try block to the queue with all attempts and except dispatched + // Store raw elements; they will be evaluated lazily in document order + // when the try block is drained. This ensures variable assignments + // made by earlier elements in the attempt are visible to later includes. self.processor.queue.push_back(QueuedElement::Try { - attempt_elements: attempt_queues, - except_elements: except_queue, + attempt_elements: attempt_events, + except_elements: except_events, }); Ok(Flow::Continue) } @@ -365,67 +358,6 @@ impl ElementHandler for DocumentHandler<'_, W> { } } -impl DocumentHandler<'_, W> { - /// Build a pre-dispatch queue for use inside a `` attempt or except block. - /// - /// Text/html/expressions and includes are resolved and queued immediately (for parallel - /// fetching); `choose` and nested `try` blocks are processed inline via the trait. - fn build_attempt_queue( - &mut self, - elements: Vec, - ) -> crate::Result> { - use parser_types::{Element, Tag}; - - let mut queue = Vec::new(); - - for elem in elements { - match elem { - Element::Text(text) => { - queue.push(QueuedElement::Content(text)); - } - Element::Html(html) => { - queue.push(QueuedElement::Content(html)); - } - Element::Expr(ref expr) => match expression::eval_expr(expr, self.ctx()) { - Ok(value) => { - if !matches!(value, expression::Value::Null) { - let bytes = value.to_bytes(); - if !bytes.is_empty() { - queue.push(QueuedElement::Content(bytes)); - } - } - } - Err(e) => { - debug!("Expression evaluation failed: {e:?}"); - } - }, - Element::Esi(Tag::Include { ref attrs }) => { - // Dispatch the include and add to queue - let queued_element = - self.processor.process_include_tag(attrs, self.dispatcher)?; - queue.push(queued_element); - } - Element::Esi(Tag::Choose { - ref when_branches, - ref otherwise_events, - }) => { - // Evaluate and process chosen branch inline - self.handle_choose(when_branches, otherwise_events)?; - // Note: breaks within try blocks don't propagate out - } - Element::Esi(Tag::Try { .. }) => { - // Nested try blocks - process recursively - self.process(&elem)?; - // Note: breaks within try blocks don't propagate out - } - Element::Esi(_) => {} - } - } - - Ok(queue) - } -} - impl Processor { pub fn new(original_request_metadata: Option, configuration: Configuration) -> Self { let mut ctx = EvalContext::new(); @@ -761,35 +693,10 @@ impl Processor { Ok(()) } - /// Evaluate an Expr to a Bytes value for use in includes - /// Handles variable resolution, function calls, and string interpolation - fn evaluate_expr_to_bytes(&mut self, expr: &Expr) -> Result { - use crate::expression::eval_expr; - - // Evaluate the expression to get a Value - let result = eval_expr(expr, &mut self.ctx)?; - - // Convert the Value to Bytes using the built-in to_bytes method - Ok(result.to_bytes()) - } - - /// Helper to evaluate Include expressions and dispatch the request - /// Returns a `QueuedElement` ready to be added to any queue (main/attempt/except) - fn process_include_tag( - &mut self, - attrs: &parser_types::IncludeAttributes, - dispatcher: &FragmentRequestDispatcher, - ) -> Result { - self.dispatch_include_to_element(attrs, dispatcher) - } - /// Evaluate request parameters from `IncludeAttributes` and return a `FragmentMetadata` struct /// /// Evaluate original tag attributes and compute all values needed for dispatching a fragment request - fn evaluate_request_params( - &mut self, - attrs: &parser_types::IncludeAttributes, - ) -> Result { + fn evaluate_request_params(&mut self, attrs: &IncludeAttributes) -> Result { // Parse TTL if provided (it's a literal string like "120m", not an expression) let ttl_override = attrs .ttl @@ -800,26 +707,26 @@ impl Processor { let method = attrs .method .as_ref() - .map(|e| self.evaluate_expr_to_bytes(e)) + .map(|e| eval_expr_to_bytes(e, &mut self.ctx)) .transpose()?; // Evaluate entity if provided let entity = attrs .entity .as_ref() - .map(|e| self.evaluate_expr_to_bytes(e)) + .map(|e| eval_expr_to_bytes(e, &mut self.ctx)) .transpose()?; // Evaluate header values let mut setheaders = Vec::with_capacity(attrs.setheaders.len()); for (name, value_expr) in &attrs.setheaders { - let value_bytes = self.evaluate_expr_to_bytes(value_expr)?; + let value_bytes = eval_expr_to_bytes(value_expr, &mut self.ctx)?; setheaders.push((name.clone(), value_bytes)); } let mut appendheaders = Vec::with_capacity(attrs.appendheaders.len()); for (name, value_expr) in &attrs.appendheaders { - let value_bytes = self.evaluate_expr_to_bytes(value_expr)?; + let value_bytes = eval_expr_to_bytes(value_expr, &mut self.ctx)?; appendheaders.push((name.clone(), value_bytes)); } @@ -844,15 +751,15 @@ impl Processor { /// This is the single source of truth for include dispatching logic fn dispatch_include_to_element( &mut self, - attrs: &parser_types::IncludeAttributes, + attrs: &IncludeAttributes, dispatcher: &FragmentRequestDispatcher, ) -> Result { // Evaluate src and alt expressions to get actual URLs - let src_bytes = self.evaluate_expr_to_bytes(&attrs.src)?; + let src_bytes = eval_expr_to_bytes(&attrs.src, &mut self.ctx)?; let alt_bytes = attrs .alt .as_ref() - .map(|e| self.evaluate_expr_to_bytes(e)) + .map(|e| eval_expr_to_bytes(e, &mut self.ctx)) .transpose()?; // Evaluate all metadata once (includes request params and TTL) @@ -869,7 +776,7 @@ impl Processor { let mut separator = if url.contains('?') { '&' } else { '?' }; for (name, value_expr) in &attrs.params { - let value = self.evaluate_expr_to_bytes(value_expr)?; + let value = eval_expr_to_bytes(value_expr, &mut self.ctx)?; let value_str = String::from_utf8_lossy(&value); // Direct string building is more efficient than format! url.push(separator); @@ -913,7 +820,7 @@ impl Processor { dispatcher(alt_req_without_body, metadata.maxwait).map_or_else( |_| { Ok(QueuedElement::Content(Bytes::from_static( - b"", + FRAGMENT_REQUEST_FAILED, ))) }, // @@ -929,7 +836,7 @@ impl Processor { ) } else { Ok(QueuedElement::Content(Bytes::from_static( - b"", + FRAGMENT_REQUEST_FAILED, ))) } } @@ -939,46 +846,31 @@ impl Processor { } } - /// Check ready queue items - non-blocking poll - /// Process any fragments that are already completed without blocking + /// Check ready queue items — non-blocking poll. + /// + /// Processes completed fragments, ready content, and try blocks from the front of the + /// queue without blocking. Stops as soon as it encounters a pending include. fn process_ready_queue_items( &mut self, output_writer: &mut impl Write, dispatcher: &FragmentRequestDispatcher, processor: Option<&FragmentResponseProcessor>, ) -> Result<()> { - // Process ready items from the front of the queue without blocking loop { - // Check what's at the front - let should_try = match self.queue.front() { - Some(QueuedElement::Content(_)) => true, - Some(QueuedElement::Include(_)) => true, - Some(QueuedElement::Try { .. }) => false, // Skip try blocks - None => false, - }; - - if !should_try { - break; - } - - // Pop and process the front element - let elem = self.queue.pop_front().unwrap(); - match elem { - QueuedElement::Content(content) => { - // Content is always ready + match self.queue.pop_front() { + None => break, + Some(QueuedElement::Content(content)) => { + // Content is always ready - write immediately output_writer.write_all(&content)?; } - QueuedElement::Include(mut fragment) => { + Some(QueuedElement::Include(mut fragment)) => { // Poll the fragment (non-blocking check) let pending_content = std::mem::replace( &mut fragment.pending_fragment, PendingFragmentContent::NoContent, ); fragment.pending_fragment = pending_content.poll(); - - // Check if it's ready now if fragment.pending_fragment.is_ready() { - // Process it! self.process_include_from_queue( *fragment, output_writer, @@ -986,255 +878,140 @@ impl Processor { processor, )?; } else { - // Still pending - put it back at the front and stop + // Still pending - put it back at front and stop self.queue.push_front(QueuedElement::Include(fragment)); break; } } - QueuedElement::Try { .. } => { - unreachable!("Try blocks should be skipped in ready check"); + Some(QueuedElement::Try { + attempt_elements, + except_elements, + }) => { + // Process try blocks inline rather than stalling the queue. + // Previously Try was skipped here, causing a stall whenever a Try block + // reached the front after a preceding include was consumed. + self.process_try_block( + attempt_elements, + except_elements, + output_writer, + dispatcher, + processor, + )?; } } } Ok(()) } - /// Drain queue with efficient waiting using `select()` - /// Uses `select()` to process whichever pending request completes first + /// Drain the queue to completion, blocking on each element in document order. + /// + /// All fragment requests were pre-dispatched during element parsing and are already + /// fetching in parallel. This method harvests them sequentially in document order. + /// + /// The previous `select()`-based approach was removed because `fastly::http::request::select` + /// does not expose which of the original requests completed (no identity on `PendingRequest`), + /// making it impossible to correctly correlate the completed response with its fragment + /// metadata (alt URL, `onerror` policy, etc.). fn drain_queue( &mut self, output_writer: &mut impl Write, dispatcher: &FragmentRequestDispatcher, processor: Option<&FragmentResponseProcessor>, ) -> Result<()> { - while !self.queue.is_empty() { - // First, write out any content that's at the front - while let Some(QueuedElement::Content(_)) = self.queue.front() { - if let Some(QueuedElement::Content(bytes)) = self.queue.pop_front() { + while let Some(elem) = self.queue.pop_front() { + match elem { + QueuedElement::Content(bytes) => { output_writer.write_all(&bytes)?; } - } - - if self.queue.is_empty() { - break; - } - - // Collect all pending includes from the queue - let mut pending_fragments: Vec<(usize, Box)> = Vec::new(); - let mut temp_queue: VecDeque = VecDeque::new(); - - for (idx, elem) in self.queue.drain(..).enumerate() { - match elem { - QueuedElement::Include(fragment) => { - if matches!( - fragment.pending_fragment, - PendingFragmentContent::PendingRequest(_) - ) { - pending_fragments.push((idx, fragment)); - } else { - // Already ready - process immediately - temp_queue.push_back(QueuedElement::Include(fragment)); - } - } - other => temp_queue.push_back(other), - } - } - - // Restore the queue with non-pending items - self.queue = temp_queue; - - if pending_fragments.is_empty() { - // Process remaining non-pending items - if let Some(elem) = self.queue.pop_front() { - match elem { - QueuedElement::Include(fragment) => { - self.process_include_from_queue( - *fragment, - output_writer, - dispatcher, - processor, - )?; - } - QueuedElement::Try { - attempt_elements, - except_elements, - } => { - // Process try block: try each attempt, use except if all fail - self.process_try_block( - attempt_elements, - except_elements, - output_writer, - dispatcher, - processor, - )?; - } - QueuedElement::Content(_) => { - unreachable!("Content should have been processed above"); - } - } + QueuedElement::Include(fragment) => { + self.process_include_from_queue( + *fragment, + output_writer, + dispatcher, + processor, + )?; } - continue; - } - - // Extract PendingRequests for select() - let mut pending_reqs: Vec = Vec::new(); - let mut fragments_by_request: Vec<(usize, Box)> = Vec::new(); - - for (idx, mut fragment) in pending_fragments { - if let PendingFragmentContent::PendingRequest(pending_req) = std::mem::replace( - &mut fragment.pending_fragment, - PendingFragmentContent::NoContent, - ) { - pending_reqs.push(*pending_req); - fragments_by_request.push((idx, fragment)); + QueuedElement::Try { + attempt_elements, + except_elements, + } => { + self.process_try_block( + attempt_elements, + except_elements, + output_writer, + dispatcher, + processor, + )?; } } - - if pending_reqs.is_empty() { - continue; - } - - // Wait for any one to complete using select - let (result, remaining) = fastly::http::request::select(pending_reqs); - - // The completed request is the one that's NOT in remaining - let completed_idx = fragments_by_request.len() - remaining.len() - 1; - let (_original_idx, mut completed_fragment) = - fragments_by_request.remove(completed_idx); - - // Update the completed fragment with the result and track TTL if rendered caching enabled - completed_fragment.pending_fragment = result.map_or_else( - |_| PendingFragmentContent::NoContent, - |resp| { - // Track TTL if we need it for rendered document (for tracking OR header emission) - if self.configuration.cache.is_rendered_cacheable - || self.configuration.cache.rendered_cache_control - { - // Use ttl_override from the include tag if present, otherwise calculate from response - let ttl = if let Some(override_ttl) = - completed_fragment.metadata.ttl_override - { - debug!("Using TTL override from include tag: {override_ttl} seconds"); - Some(override_ttl) - } else { - match cache::calculate_ttl(&resp, &self.configuration.cache) { - Ok(Some(ttl)) => { - debug!("Calculated TTL from response: {ttl} seconds"); - Some(ttl) - } - Ok(None) => { - debug!("Response not cacheable (private/no-cache/set-cookie)"); - self.ctx.mark_document_uncacheable(); - None - } - Err(e) => { - debug!("Error calculating TTL: {e:?}"); - None - } - } - }; - - if let Some(ttl_value) = ttl { - self.ctx.update_cache_min_ttl(ttl_value); - debug!("Tracking TTL {ttl_value} for rendered document"); - } - } - PendingFragmentContent::CompletedRequest(Box::new(resp)) - }, - ); - - // Put remaining fragments back in queue (with their pending requests restored) - for (pending_req, (_idx, mut fragment)) in - remaining.into_iter().zip(fragments_by_request) - { - fragment.pending_fragment = - PendingFragmentContent::PendingRequest(Box::new(pending_req)); - self.queue.push_back(QueuedElement::Include(fragment)); - } - - // Process the completed fragment - self.process_include_from_queue( - *completed_fragment, - output_writer, - dispatcher, - processor, - )?; } - Ok(()) } - - /// Process a try block recursively, handling nested try blocks naturally + /// Process a try block: execute ALL attempts in document order (they are + /// independent statements), then run the except clause if any failed. fn process_try_block( &mut self, - attempt_elements: Vec>, - except_elements: Vec, + attempt_elements: Vec>, + except_elements: Vec, output_writer: &mut impl Write, dispatcher: &FragmentRequestDispatcher, processor: Option<&FragmentResponseProcessor>, ) -> Result<()> { - let mut succeeded = false; - - // Try each attempt in order + let mut any_failed = false; for attempt in attempt_elements { - match self.process_queued_elements(attempt, dispatcher, processor) { - Ok(buffer) => { - // This attempt succeeded - write it out - output_writer.write_all(&buffer)?; - succeeded = true; - break; - } - Err(_) => { - // This attempt failed - try the next one - continue; - } + match self.process_attempt_elements(attempt, dispatcher, processor) { + Ok(buffer) => output_writer.write_all(&buffer)?, + Err(_) => any_failed = true, } } - - // If all attempts failed, process except clause - if !succeeded { - let except_buffer = - self.process_queued_elements(except_elements, dispatcher, processor)?; - output_writer.write_all(&except_buffer)?; + if any_failed { + let buf = self.process_attempt_elements(except_elements, dispatcher, processor)?; + output_writer.write_all(&buf)?; } - Ok(()) } - /// Process a list of queued elements recursively, returning the output buffer - /// This naturally handles nested try blocks through recursion - fn process_queued_elements( + /// Execute a list of raw ESI elements in document order into a fresh buffer. + /// + /// Elements are processed sequentially through a `DocumentHandler`: + /// - Text / Html / Expr and complex tags (Choose, Foreach, Assign, …) + /// execute immediately, writing into `buffer` directly when no + /// in-flight includes precede them, or into `self.queue` as `Content` + /// when an include is already queued (preserving document order). + /// - `` is dispatched asynchronously at the exact point it + /// is reached, **after** all preceding assigns have updated the context. + /// + /// After all elements have been walked, any queued includes are drained in + /// document order (blocking wait per include). + fn process_attempt_elements( &mut self, - elements: Vec, + elements: Vec, dispatcher: &FragmentRequestDispatcher, processor: Option<&FragmentResponseProcessor>, ) -> Result> { let mut buffer = Vec::new(); - for elem in elements { - match elem { - QueuedElement::Content(bytes) => { - buffer.write_all(&bytes)?; - } - QueuedElement::Include(fragment) => { - self.process_include_from_queue(*fragment, &mut buffer, dispatcher, processor)?; - } - QueuedElement::Try { - attempt_elements, - except_elements, - } => { - // Recursively process nested try block - self.process_try_block( - attempt_elements, - except_elements, - &mut buffer, - dispatcher, - processor, - )?; - } + // Isolate this attempt's dispatch queue from the outer document queue. + let saved_queue = std::mem::take(&mut self.queue); + + { + let mut handler = DocumentHandler { + processor: self, + output: &mut buffer, + dispatcher, + response_processor: processor, + }; + for elem in &elements { + handler.process(elem)?; } } + // Drain any includes (and nested try blocks) dispatched during the walk. + self.drain_queue(&mut buffer, dispatcher, processor)?; + + // Restore the outer document queue. + self.queue = saved_queue; + Ok(buffer) } @@ -1259,6 +1036,37 @@ impl Processor { response }; + // Track TTL for rendered document caching + if final_response.get_status().is_success() + && (self.configuration.cache.is_rendered_cacheable + || self.configuration.cache.rendered_cache_control) + { + let ttl = if let Some(override_ttl) = fragment.metadata.ttl_override { + debug!("Using TTL override from include tag: {override_ttl} seconds"); + Some(override_ttl) + } else { + match cache::calculate_ttl(&final_response, &self.configuration.cache) { + Ok(Some(ttl)) => { + debug!("Calculated TTL from response: {ttl} seconds"); + Some(ttl) + } + Ok(None) => { + debug!("Response not cacheable (private/no-cache/set-cookie)"); + self.ctx.mark_document_uncacheable(); + None + } + Err(e) => { + debug!("Error calculating TTL: {e:?}"); + None + } + } + }; + if let Some(ttl_value) = ttl { + self.ctx.update_cache_min_ttl(ttl_value); + debug!("Tracking TTL {ttl_value} for rendered document"); + } + } + // Check if successful if final_response.get_status().is_success() { let body_bytes = final_response.into_body_bytes(); @@ -1300,7 +1108,7 @@ impl Processor { Ok(()) } Err(_) if continue_on_error => { - output_writer.write_all(b"")?; + output_writer.write_all(FRAGMENT_REQUEST_FAILED)?; Ok(()) } Err(_) => Err(ESIError::ExpressionError( @@ -1308,7 +1116,7 @@ impl Processor { )), } } else if continue_on_error { - output_writer.write_all(b"")?; + output_writer.write_all(FRAGMENT_REQUEST_FAILED)?; Ok(()) } else { Err(ESIError::ExpressionError(format!( @@ -1364,6 +1172,18 @@ impl Processor { } } +/// Placeholder HTML comment written when a fragment could not be fetched and `onerror="continue"`. +const FRAGMENT_REQUEST_FAILED: &[u8] = b""; + +/// Evaluate an [`Expr`] to a [`Bytes`] value. +/// +/// Free function (not a `Processor` method) so callers can independently borrow other +/// `Processor` fields alongside `ctx`. +fn eval_expr_to_bytes(expr: &Expr, ctx: &mut EvalContext) -> Result { + let result = expression::eval_expr(expr, ctx)?; + Ok(result.to_bytes()) +} + // Default fragment request dispatcher that uses the request's hostname as backend // Uses dynamic backends to support maxwait attribute as first_byte_timeout fn default_fragment_dispatcher( diff --git a/esi/tests/esi-tests.rs b/esi/tests/esi-tests.rs index cf61adf..ddc4f1c 100644 --- a/esi/tests/esi-tests.rs +++ b/esi/tests/esi-tests.rs @@ -1918,3 +1918,322 @@ fn test_user_defined_function_recursive_factorial() { assert!(result.contains("120"), "Result was: {}", result); } +// ────────────────────────────────────────────────────────────────────────────── +// Tests for ESI tags inside attempt/except blocks (fix #9 / #2) +// Previously, Choose, Foreach, Assign and Vars were silently dropped when they +// appeared inside an attempt or except block because build_attempt_queue only +// handled Text, Html, Expr, Include, and a hard-coded Choose/Try branch that +// routed output to the wrong queue. +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_try_attempt_with_vars() { + init_logs(); + + let input = r#" + + $(x) + fallback +"#; + + let result = process_esi_document(input, Request::get("http://example.com/")) + .expect("Processing should succeed"); + + assert!( + result.contains("hello"), + "vars inside try attempt should render. Got: {result}" + ); + assert!( + !result.contains("fallback"), + "fallback should NOT appear. Got: {result}" + ); +} + +#[test] +fn test_try_attempt_with_choose() { + init_logs(); + + let input = r#" + + + + chosen + other + + + fallback +"#; + + let result = process_esi_document(input, Request::get("http://example.com/")) + .expect("Processing should succeed"); + + assert!( + result.contains("chosen"), + "choose inside try attempt should evaluate. Got: {result}" + ); + assert!( + !result.contains("other"), + "non-matching branch should not appear. Got: {result}" + ); + assert!( + !result.contains("fallback"), + "fallback should NOT appear. Got: {result}" + ); +} + +#[test] +fn test_try_attempt_with_foreach() { + init_logs(); + + let input = r#" + $(i) + fallback +"#; + + let result = process_esi_document(input, Request::get("http://example.com/")) + .expect("Processing should succeed"); + + assert_eq!( + result.trim(), + "abc", + "foreach inside try attempt should iterate. Got: {result}" + ); +} + +#[test] +fn test_try_attempt_with_assign() { + init_logs(); + + let input = r#" + + + $(val) + + fallback +"#; + + let result = process_esi_document(input, Request::get("http://example.com/")) + .expect("Processing should succeed"); + + assert!( + result.contains("computed"), + "assign+vars inside try attempt should work. Got: {result}" + ); + assert!( + !result.contains("fallback"), + "fallback should NOT appear. Got: {result}" + ); +} + +#[test] +fn test_try_except_with_vars() { + init_logs(); + + // Attempt dispatches an include that returns 500 (no onerror=continue, so it raises Err + // and the try machinery falls through to the except block). + let input = r#" + + + $(msg) +"#; + + // Dispatcher that always returns a 500 so the attempt fails + let dispatcher = |_req: Request, _: Option| -> esi::Result { + let mut resp = fastly::Response::new(); + resp.set_status(fastly::http::StatusCode::INTERNAL_SERVER_ERROR); + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + resp, + ))) + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new( + Some(Request::get("http://example.com/")), + Configuration::default(), + ); + processor + .process_stream(reader, &mut output, Some(&dispatcher), None) + .expect("Processing should succeed"); + + let result = String::from_utf8(output).unwrap(); + assert!( + result.contains("except-rendered"), + "vars inside except block should render. Got: {result}" + ); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Multi-include document ordering (fix #7) +// With simplified drain_queue (sequential wait), includes must appear in the +// same order they appear in the document regardless of which finishes first. +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_multi_include_document_order() { + init_logs(); + + let input = r#""#; + + let dispatcher = |req: Request, _: Option| -> esi::Result { + let body = if req.get_url_str().contains("/first") { + "FIRST" + } else if req.get_url_str().contains("/second") { + "SECOND" + } else { + "THIRD" + }; + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + fastly::Response::from_body(body), + ))) + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new( + Some(Request::get("http://example.com/")), + Configuration::default(), + ); + processor + .process_stream(reader, &mut output, Some(&dispatcher), None) + .expect("Processing should succeed"); + + let result = String::from_utf8(output).unwrap(); + assert_eq!( + result, "FIRSTSECONDTHIRD", + "Includes must appear in document order. Got: {result}" + ); +} + +// ────────────────────────────────────────────────────────────────────────────── +// Try block after an include in the same document (fix #11) +// Previously, process_ready_queue_items skipped Try blocks entirely, so a Try +// that reached the head of the queue (after a preceding include was consumed) +// would stall until drain_queue ran at the end - never an outright bug in tests +// using CompletedRequest, but wrong for real async requests. The fix makes +// process_ready_queue_items process Try blocks inline. +// ────────────────────────────────────────────────────────────────────────────── + +#[test] +fn test_include_followed_by_try_block() { + init_logs(); + + let input = r#" + + + except-content +"#; + + let dispatcher = |req: Request, _: Option| -> esi::Result { + let body = if req.get_url_str().contains("/first") { + "first-content" + } else { + "attempt-content" + }; + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + fastly::Response::from_body(body), + ))) + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new( + Some(Request::get("http://example.com/")), + Configuration::default(), + ); + processor + .process_stream(reader, &mut output, Some(&dispatcher), None) + .expect("Processing should succeed"); + + let result = String::from_utf8(output).unwrap(); + assert!( + result.contains("first-content"), + "Include before try should appear. Got: {result}" + ); + assert!( + result.contains("attempt-content"), + "Try attempt should execute after include. Got: {result}" + ); + assert!( + !result.contains("except-content"), + "Except should NOT appear when attempt succeeds. Got: {result}" + ); +} + +#[test] +fn test_content_order_around_try_block() { + // Verifies that text before and after a block appears in the + // correct position in the output, even when the attempt contains an include. + init_logs(); + + let input = r#"before + + fallback +after"#; + + let dispatcher = |_req: Request, _: Option| -> esi::Result { + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + fastly::Response::from_body("fragment-content"), + ))) + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new( + Some(Request::get("http://example.com/")), + Configuration::default(), + ); + processor + .process_stream(reader, &mut output, Some(&dispatcher), None) + .expect("Processing should succeed"); + + let result = String::from_utf8(output).unwrap(); + assert_eq!(result, "beforefragment-contentafter", "Got: {result:?}"); +} + +#[test] +fn test_try_block_at_queue_head_uses_except_on_failure() { + init_logs(); + + // An include followed by a try whose attempt fails → except should show + let input = r#" + + + except-content +"#; + + let dispatcher = |req: Request, _: Option| -> esi::Result { + if req.get_url_str().contains("/first") { + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + fastly::Response::from_body("first-content"), + ))) + } else { + // Attempt fails with 500 + let mut resp = fastly::Response::new(); + resp.set_status(fastly::http::StatusCode::INTERNAL_SERVER_ERROR); + Ok(esi::PendingFragmentContent::CompletedRequest(Box::new( + resp, + ))) + } + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new( + Some(Request::get("http://example.com/")), + Configuration::default(), + ); + processor + .process_stream(reader, &mut output, Some(&dispatcher), None) + .expect("Processing should succeed"); + + let result = String::from_utf8(output).unwrap(); + assert!( + result.contains("first-content"), + "Include before try should appear. Got: {result}" + ); + assert!( + result.contains("except-content"), + "Except should appear when attempt fails. Got: {result}" + ); +} From 6747f57e4c3b5b2feeb099a6e378d03c6de6ad66 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Mon, 2 Mar 2026 23:29:28 -0600 Subject: [PATCH 088/119] refactor(try): parallel try-block dispatch with flat-buf slot tracking All includes are dispatched into the same select() pool as bare includes, removing the Nx sequential penalty for consecutive try blocks. Try-block content reuses the main buf slots (no separate content_slots); a TryBlockTracker/AttemptTracker pair records which slots belong to each attempt and assembles the result when all resolve. - 3 new types: TryBlockTracker, AttemptTracker, SlotEntry - Request correlation keyed by (method, URL) - process_queue renamed from process_ready_queue_items for clarity --- esi/src/element_handler.rs | 2 +- esi/src/lib.rs | 772 +++++++++++++++++++++++++++++-------- esi/tests/esi-tests.rs | 4 +- 3 files changed, 614 insertions(+), 164 deletions(-) diff --git a/esi/src/element_handler.rs b/esi/src/element_handler.rs index 079f2a9..db0d2e1 100644 --- a/esi/src/element_handler.rs +++ b/esi/src/element_handler.rs @@ -88,7 +88,7 @@ pub(crate) trait ElementHandler { /// /// Called after processing each top-level element in the main parse loop. /// Default is a no-op — only meaningful in the streaming context. - fn process_ready_queue_items(&mut self) -> Result<()> { + fn process_queue(&mut self) -> Result<()> { Ok(()) } diff --git a/esi/src/lib.rs b/esi/src/lib.rs index 552536c..0151455 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -13,13 +13,13 @@ pub mod parser_types; use crate::element_handler::{ElementHandler, Flow}; use crate::expression::EvalContext; use crate::parser_types::{DcaMode, Element, Expr, IncludeAttributes}; -use bytes::{Buf, Bytes, BytesMut}; -use fastly::http::request::{PendingRequest, PollResult}; +use bytes::{Bytes, BytesMut}; +use fastly::http::request::{select, PendingRequest}; use fastly::http::{header, Method, StatusCode, Url}; use fastly::{mime, Backend, Request, Response}; use log::debug; use std::borrow::Cow; -use std::collections::VecDeque; +use std::collections::{HashMap, VecDeque}; use std::io::{BufRead, Write}; use std::time::Duration; @@ -73,6 +73,7 @@ struct FragmentMetadata { continue_on_error: bool, /// Optional timeout in milliseconds for this specific request maxwait: Option, + /// Dynamic Content Assembly mode for this request I(controls pre-processing) dca: DcaMode, } @@ -103,26 +104,61 @@ enum QueuedElement { }, } -impl PendingFragmentContent { - /// Poll to check if the request is ready without blocking - /// Returns the updated `PendingFragmentContent` (either still Pending or now Completed/NoContent) - pub fn poll(self) -> Self { - match self { - Self::PendingRequest(pending_request) => match pending_request.poll() { - PollResult::Done(result) => result.map_or_else( - |_| Self::NoContent, - |resp| Self::CompletedRequest(Box::new(resp)), - ), - PollResult::Pending(pending_request) => { - // Still pending - put it back - Self::PendingRequest(Box::new(pending_request)) - } - }, - // Already completed - return as-is - other => other, - } - } +// --------------------------------------------------------------------------- +// Parallel try-block tracking types (flat-buf design) +// --------------------------------------------------------------------------- + +#[derive(Hash, Eq, PartialEq, Clone)] +struct RequestKey { + method: Method, + url: String, +} + +/// Tracks an in-flight `` block in `drain_queue`. +/// +/// Try-block includes share the main `buf` slots (same as bare includes) +/// instead of maintaining a separate `content_slots` system. Each attempt +/// records which `buf` indices hold its content so that assembly can +/// concatenate them once every pending include has been resolved. +struct TryBlockTracker { + /// `buf` slot reserved for the assembled try-block output. + outer_slot: usize, + /// Per-attempt tracking (document order). + attempts: Vec, + /// Deferred until all attempts resolve; only evaluated if any attempt + /// failed. + except_elements: Vec, + /// Total in-flight includes across all attempts. When this reaches + /// zero the block is ready to assemble. + pending_count: usize, +} + +/// Per-attempt state inside a [`TryBlockTracker`]. +struct AttemptTracker { + /// Indices into the main `buf` vec that hold this attempt's content + /// (both static text and resolved includes), in document order. + buf_slots: Vec, + /// Set to `true` if any include in this attempt returned a non-success + /// status without `continue_on_error`. + failed: bool, +} + +/// Entry in the `url_map` that correlates a completing `PendingRequest` +/// back to the `buf` slot it should fill. +/// +/// A single struct covers both bare ``s and includes inside +/// `` blocks — the `try_info` field distinguishes the two cases. +struct SlotEntry { + /// Index into the main `buf` vec to fill with the processed response. + buf_slot: usize, + /// Fragment metadata needed to process the response (alt, headers, dca…). + fragment: Box, + /// `Some((tracker_idx, attempt_idx))` when this include lives inside a + /// try block; `None` for a bare include. + try_info: Option<(usize, usize)>, +} +impl PendingFragmentContent { /// Check if the content is ready (completed or no content) pub const fn is_ready(&self) -> bool { !matches!(self, Self::PendingRequest(_)) @@ -184,9 +220,8 @@ pub struct Processor { struct DocumentHandler<'a, W: Write> { processor: &'a mut Processor, output: &'a mut W, - dispatcher: &'a FragmentRequestDispatcher, - /// Optional response post-processor; needed by `process_ready_queue_items`. - response_processor: Option<&'a FragmentResponseProcessor>, + dispatch_fragment_request: &'a FragmentRequestDispatcher, + fragment_response_handler: Option<&'a FragmentResponseProcessor>, } impl ElementHandler for DocumentHandler<'_, W> { @@ -194,11 +229,11 @@ impl ElementHandler for DocumentHandler<'_, W> { &mut self.processor.ctx } - fn process_ready_queue_items(&mut self) -> crate::Result<()> { - self.processor.process_ready_queue_items( + fn process_queue(&mut self) -> crate::Result<()> { + self.processor.process_queue( self.output, - self.dispatcher, - self.response_processor, + self.dispatch_fragment_request, + self.fragment_response_handler, ) } @@ -225,7 +260,7 @@ impl ElementHandler for DocumentHandler<'_, W> { fn on_include(&mut self, attrs: IncludeAttributes) -> crate::Result { let queued_element = self .processor - .dispatch_include_to_element(&attrs, self.dispatcher)?; + .dispatch_include_to_element(&attrs, self.dispatch_fragment_request)?; self.processor.queue.push_back(queued_element); Ok(Flow::Continue) } @@ -239,7 +274,7 @@ impl ElementHandler for DocumentHandler<'_, W> { // Build and dispatch the request (same machinery as include, but blocking) let queued_element = self .processor - .dispatch_include_to_element(&attrs, self.dispatcher)?; + .dispatch_include_to_element(&attrs, self.dispatch_fragment_request)?; match queued_element { QueuedElement::Include(fragment) => { @@ -277,23 +312,30 @@ impl ElementHandler for DocumentHandler<'_, W> { // dca="esi": TWO-PHASE processing // Phase 1: Process fragment in ISOLATED context // Reborrow before the exclusive borrow of self.processor below - let dispatcher = self.dispatcher; + let dispatcher = self.dispatch_fragment_request; let mut isolated_processor = Processor::new( Some(self.processor.ctx.get_request().clone_without_body()), self.processor.configuration.clone(), ); let mut isolated_output = Vec::new(); - let mut isolated_handler = DocumentHandler { - processor: &mut isolated_processor, - output: &mut isolated_output, - dispatcher, - response_processor: None, - }; - for element in elements { - isolated_handler.process(&element)?; + { + let mut isolated_handler = DocumentHandler { + processor: &mut isolated_processor, + output: &mut isolated_output, + dispatch_fragment_request: dispatcher, + fragment_response_handler: None, + }; + for element in elements { + isolated_handler.process(&element)?; + } + // isolated_handler drops here, releasing the mutable borrow of isolated_output } + // Drain any includes dispatched during Phase 1 (e.g. inside the eval'd fragment). + // Must happen before we read isolated_output, while isolated_handler has already dropped. + isolated_processor.drain_queue(&mut isolated_output, dispatcher, None)?; + // Phase 2: Parse the isolated output as ESI and process in PARENT's context // This is why variables don't leak: they only exist in phase 1 let isolated_bytes = Bytes::from(isolated_output); @@ -358,6 +400,12 @@ impl ElementHandler for DocumentHandler<'_, W> { } } +/// Implementation of the main Processor methods driving ESI processing +/// +/// This impl block contains the core logic for processing ESI documents, including +/// the main streaming loop, fragment dispatching, and queue management. The +/// DocumentHandler implementation above delegates to these methods for the actual processing work, +/// allowing the handler to focus on interfacing with the streaming architecture and the evaluation context. impl Processor { pub fn new(original_request_metadata: Option, configuration: Configuration) -> Self { let mut ctx = EvalContext::new(); @@ -488,6 +536,7 @@ impl Processor { } } + // Apply any response headers set during processing for (name, value) in self.ctx.response_headers() { resp.set_header(name, value); } @@ -510,42 +559,15 @@ impl Processor { Ok(()) } - /// Process an ESI stream with industry-grade streaming architecture - /// - /// This is the low-level streaming API that processes ESI markup from any - /// `BufRead` source to any `Write` destination. For processing Fastly responses, - /// use [`process_response`](Self::process_response) instead. - /// - /// This method implements **three levels of streaming** for optimal performance: + /// Process an ESI stream from any `BufRead` into a `Write`. /// - /// ## 1. Chunked Input Reading (Memory Efficient) - /// - Reads source stream in 8KB chunks from `BufRead` - /// - Accumulates chunks until parser can make progress - /// - Prevents loading entire document into memory at once - /// - Bounded memory growth with incremental processing + /// - Reads in 8 KB chunks, buffering only what the parser needs + /// - Parses incrementally; writes content as soon as it’s parsed + /// - Dispatches includes immediately; waits for them later in document order + /// - Uses `select()` to harvest in-flight includes while preserving output order /// - /// ## 2. Streaming Output (Low Latency) - /// - Writes processed content immediately as elements are parsed - /// - Non-blocking poll checks for completed fragments - /// - Output reaches destination with minimal delay - /// - No buffering of final output - /// - /// ## 3. Streaming Fragments (Maximum Parallelism) - /// - Dispatches all includes immediately (non-blocking) - /// - Uses `select()` to process whichever fragment completes first - /// - All fragments fetch in parallel, no wasted waiting - /// - Try blocks dispatch all attempts' includes upfront - /// - /// ## Key Features: - /// - Only fetches fragments that are actually needed (not those in unexecuted branches) - /// - Fully recursive nested try/except blocks - /// - Proper alt fallback and `continue_on_error` handling - /// - Full ESI specification compliance - /// - /// ## Note on Parsing: - /// The parser (nom-based) requires complete input for each parse operation. - /// We handle this by buffering input chunks until a successful parse, - /// then processing parsed elements immediately while retaining unparsed remainder. + /// For Fastly `Response` bodies, prefer [`process_response`], which wires up + /// cache headers and response metadata for you. /// /// # Arguments /// * `src_stream` - `BufRead` source containing ESI markup (streams in chunks) @@ -610,9 +632,10 @@ impl Processor { } } - // Freeze a view of the buffer for zero-copy parsing - // We clone here because freeze() consumes, but Bytes cloning is cheap (ref count) - let frozen = buffer.clone().freeze(); + // Create a zero-copy window of the current buffer contents without cloning. + // split_off(0) moves the data into a new view while keeping the same backing store. + let mut window = buffer.split_off(0); + let frozen = window.freeze(); // Try to parse what we have in the buffer // Use streaming parser unless we're at EOF, then use complete parser @@ -630,14 +653,14 @@ impl Processor { let mut handler = DocumentHandler { processor: self, output: output_writer, - dispatcher, - response_processor: process_fragment_response, + dispatch_fragment_request: dispatcher, + fragment_response_handler: process_fragment_response, }; for element in elements { // Note: breaks at top-level are ignored handler.process(&element)?; - // After each element, check if any queued includes are ready (non-blocking poll) - handler.process_ready_queue_items()?; + // After each element, check if any queued includes are ready + handler.process_queue()?; } // Calculate how many bytes were consumed @@ -658,8 +681,11 @@ impl Processor { // which treats remainder as Text elements break; } - // Keep remainder for next chunk - advance past consumed bytes - buffer.advance(consumed); + // Reuse the existing backing store without cloning: split_off leaves + // `window` empty; reuse it for the remainder so we avoid copying. + let remainder_bytes = frozen.slice(consumed..); + window = BytesMut::from(remainder_bytes.as_ref()); + buffer = window.split_off(0); } } Err(nom::Err::Incomplete(_)) => { @@ -850,7 +876,7 @@ impl Processor { /// /// Processes completed fragments, ready content, and try blocks from the front of the /// queue without blocking. Stops as soon as it encounters a pending include. - fn process_ready_queue_items( + fn process_queue( &mut self, output_writer: &mut impl Write, dispatcher: &FragmentRequestDispatcher, @@ -864,23 +890,24 @@ impl Processor { output_writer.write_all(&content)?; } Some(QueuedElement::Include(mut fragment)) => { - // Poll the fragment (non-blocking check) + // If the fragment is already completed (cache hit / NoContent), + // process immediately. Otherwise, leave it in place and exit + // to avoid busy-wait polling. let pending_content = std::mem::replace( &mut fragment.pending_fragment, PendingFragmentContent::NoContent, ); - fragment.pending_fragment = pending_content.poll(); - if fragment.pending_fragment.is_ready() { - self.process_include_from_queue( - *fragment, - output_writer, - dispatcher, - processor, - )?; - } else { - // Still pending - put it back at front and stop - self.queue.push_front(QueuedElement::Include(fragment)); - break; + match pending_content { + PendingFragmentContent::PendingRequest(request) => { + fragment.pending_fragment = + PendingFragmentContent::PendingRequest(request); + self.queue.push_front(QueuedElement::Include(fragment)); + break; + } + ready => { + fragment.pending_fragment = ready; + self.process_include(*fragment, output_writer, dispatcher, processor)?; + } } } Some(QueuedElement::Try { @@ -903,50 +930,450 @@ impl Processor { Ok(()) } - /// Drain the queue to completion, blocking on each element in document order. - /// - /// All fragment requests were pre-dispatched during element parsing and are already - /// fetching in parallel. This method harvests them sequentially in document order. + /// Build a correlation key for matching select() results to dispatched requests. + fn make_request_key(req: &Request) -> RequestKey { + RequestKey { + method: req.get_method().clone(), + url: req.get_url_str().to_string(), + } + } + + /// Drain the queue to completion, preserving document order while using + /// `fastly::http::request::select()` to process whichever in-flight include + /// finishes first. /// - /// The previous `select()`-based approach was removed because `fastly::http::request::select` - /// does not expose which of the original requests completed (no identity on `PendingRequest`), - /// making it impossible to correctly correlate the completed response with its fragment - /// metadata (alt URL, `onerror` policy, etc.). + /// - All includes (bare and inside ``) are dispatched before any + /// waits; a single pending pool feeds `select()`, removing the xN + /// sequential penalty for many consecutive try blocks. + /// - Each queued element gets a slot in `buf`; try-block includes use the + /// same `buf` slots as bare includes (no separate content_slots system). + /// A `TryBlockTracker` records which buf indices belong to each attempt + /// so they can be assembled into the outer slot when resolved. + /// - Request correlation uses (method + URL) keys via `SlotEntry`; the + /// `try_info` field distinguishes bare includes from try-block includes. fn drain_queue( &mut self, output_writer: &mut impl Write, - dispatcher: &FragmentRequestDispatcher, - processor: Option<&FragmentResponseProcessor>, + dispatch_fragment_request: &FragmentRequestDispatcher, + process_fragment_response: Option<&FragmentResponseProcessor>, ) -> Result<()> { - while let Some(elem) = self.queue.pop_front() { - match elem { - QueuedElement::Content(bytes) => { - output_writer.write_all(&bytes)?; + // `buf[i]` is `None` while the slot is waiting for a response, + // `Some(bytes)` once it is ready. Try-block includes use the SAME + // buf slots as bare includes — no separate content_slots system. + let mut buf: Vec> = Vec::new(); + let mut next_out: usize = 0; + + // RequestKey → FIFO queue of SlotEntry for all in-flight requests. + // A single SlotEntry struct covers both bare includes and try-block + // includes; the `try_info` field distinguishes the two cases. + let mut url_map: HashMap> = HashMap::new(); + + // PendingRequests handed to select() on each iteration. + let mut pending: Vec = Vec::new(); + + // One tracker per block encountered during Step 1. + let mut try_trackers: Vec = Vec::new(); + + loop { + // ------------------------------------------------------------------ + // Step 1: drain self.queue, assigning every element a slot. + // + // After this inner loop self.queue is guaranteed empty. That + // invariant means DocumentHandler::write_bytes() called from within + // `process_include` writes directly to the caller-supplied + // slot_buf rather than re-queuing (the correct behaviour for + // dca="esi" fragment bodies that contain further ESI directives). + // ------------------------------------------------------------------ + while let Some(elem) = self.queue.pop_front() { + match elem { + QueuedElement::Content(bytes) => { + buf.push(Some(bytes)); + } + + QueuedElement::Include(mut fragment) => { + let slot = buf.len(); + buf.push(None); // placeholder; filled when response arrives + + let pending_content = std::mem::replace( + &mut fragment.pending_fragment, + PendingFragmentContent::NoContent, + ); + match pending_content { + PendingFragmentContent::PendingRequest(req) => { + let key = Self::make_request_key(&fragment.req); + url_map.entry(key).or_default().push_back(SlotEntry { + buf_slot: slot, + fragment, + try_info: None, + }); + pending.push(*req); + } + ready => { + // CompletedRequest or NoContent: process now. + fragment.pending_fragment = ready; + let mut slot_buf = Vec::new(); + self.process_include( + *fragment, + &mut slot_buf, + dispatch_fragment_request, + process_fragment_response, + )?; + buf[slot] = Some(Bytes::from(slot_buf)); + // dca="esi" may push new items onto self.queue; + // the outer while picks them up next iteration. + } + } + } + + QueuedElement::Try { + attempt_elements, + except_elements, + } => { + // Reserve one outer slot for the assembled output. + let outer_slot = buf.len(); + buf.push(None); + + let tracker_idx = try_trackers.len(); + try_trackers.push(TryBlockTracker { + outer_slot, + attempts: Vec::with_capacity(attempt_elements.len()), + except_elements, + pending_count: 0, + }); + + // Walk each attempt through DocumentHandler to + // dispatch includes, then flatten results into buf. + for (attempt_idx, attempt_elems) in attempt_elements.into_iter().enumerate() + { + try_trackers[tracker_idx].attempts.push(AttemptTracker { + buf_slots: Vec::new(), + failed: false, + }); + + let mut pre_buf: Vec = Vec::new(); + let mut pre_failed = false; + self.execute_isolated( + &attempt_elems, + &mut pre_buf, + dispatch_fragment_request, + process_fragment_response, + |this, pre_out| { + // Static content before the first include. + if !pre_out.is_empty() { + let slot = buf.len(); + buf.push(Some(Bytes::from(pre_out.to_vec()))); + try_trackers[tracker_idx].attempts[attempt_idx] + .buf_slots + .push(slot); + } + + // Remaining queued elements (document order). + while let Some(qe) = this.queue.pop_front() { + match qe { + QueuedElement::Content(bytes) => { + let slot = buf.len(); + buf.push(Some(bytes)); + try_trackers[tracker_idx].attempts[attempt_idx] + .buf_slots + .push(slot); + } + + QueuedElement::Include(mut frag) => { + let slot = buf.len(); + buf.push(None); + try_trackers[tracker_idx].attempts[attempt_idx] + .buf_slots + .push(slot); + + let pc = std::mem::replace( + &mut frag.pending_fragment, + PendingFragmentContent::NoContent, + ); + match pc { + PendingFragmentContent::PendingRequest(req) => { + let key = Self::make_request_key(&frag.req); + url_map.entry(key).or_default().push_back( + SlotEntry { + buf_slot: slot, + fragment: frag, + try_info: Some(( + tracker_idx, + attempt_idx, + )), + }, + ); + pending.push(*req); + try_trackers[tracker_idx].pending_count += + 1; + } + ready => { + frag.pending_fragment = ready; + let mut slot_buf = Vec::new(); + if this + .process_include( + *frag, + &mut slot_buf, + dispatch_fragment_request, + process_fragment_response, + ) + .is_err() + { + pre_failed = true; + } + buf[slot] = Some(Bytes::from(slot_buf)); + } + } + } + + QueuedElement::Try { + attempt_elements: nested_attempts, + except_elements: nested_except, + } => { + // Nested try: process synchronously. + let slot = buf.len(); + buf.push(None); + try_trackers[tracker_idx].attempts[attempt_idx] + .buf_slots + .push(slot); + let mut slot_buf = Vec::new(); + this.process_try_block( + nested_attempts, + nested_except, + &mut slot_buf, + dispatch_fragment_request, + process_fragment_response, + )?; + buf[slot] = Some(Bytes::from(slot_buf)); + } + } + } + Ok(()) + }, + )?; + + if pre_failed { + try_trackers[tracker_idx].attempts[attempt_idx].failed = true; + } + } + + // If no includes are pending, assemble immediately. + if try_trackers[tracker_idx].pending_count == 0 { + Self::assemble_try_block( + self, + tracker_idx, + &mut try_trackers, + &mut buf, + dispatch_fragment_request, + process_fragment_response, + )?; + } + } } - QueuedElement::Include(fragment) => { - self.process_include_from_queue( + } + + // ------------------------------------------------------------------ + // Step 2: flush consecutive ready slots at next_out. + // ------------------------------------------------------------------ + while next_out < buf.len() { + match &buf[next_out] { + Some(bytes) => { + output_writer.write_all(bytes)?; + buf[next_out] = Some(Bytes::new()); // release allocation + next_out += 1; + } + None => break, // head slot still waiting + } + } + + // ------------------------------------------------------------------ + // Step 3: done when nothing is pending. + // ------------------------------------------------------------------ + if pending.is_empty() { + break; + } + + // ------------------------------------------------------------------ + // Step 4: wait for the next completed request from the shared pool. + // ------------------------------------------------------------------ + let (result, remaining) = select(pending); + pending = remaining; + + // ------------------------------------------------------------------ + // Step 5: correlate the response with its SlotEntry and act. + // + // Success → Response::get_backend_request() carries the sent URL. + // Failure → SendError::into_sent_req() recovers the URL; a 500 is + // synthesised so existing alt/onerror logic is unchanged. + // ------------------------------------------------------------------ + let (key, completed_content) = match result { + Ok(resp) => { + let key = resp + .get_backend_request() + .map(Self::make_request_key) + .ok_or_else(|| { + ESIError::ExpressionError( + "drain_queue: response missing backend request for correlation" + .to_string(), + ) + })?; + ( + key, + PendingFragmentContent::CompletedRequest(Box::new(resp)), + ) + } + Err(e) => { + let req = e.into_sent_req(); + let key = Self::make_request_key(&req); + debug!( + "Fragment request to {} {} failed; triggering alt/onerror handling", + key.method, key.url + ); + ( + key, + PendingFragmentContent::CompletedRequest(Box::new(Response::from_status( + StatusCode::INTERNAL_SERVER_ERROR, + ))), + ) + } + }; + + let entry = url_map + .get_mut(&key) + .and_then(|q| q.pop_front()) + .ok_or_else(|| { + ESIError::ExpressionError(format!( + "drain_queue: no in-flight fragment for {}/{}", + key.method, key.url + )) + })?; + + let SlotEntry { + buf_slot, + mut fragment, + try_info, + } = entry; + + match try_info { + // ------------------------------------------------------- + // Bare : fill the buf slot directly. + // ------------------------------------------------------- + None => { + fragment.pending_fragment = completed_content; + let mut slot_buf = Vec::new(); + self.process_include( *fragment, - output_writer, - dispatcher, - processor, + &mut slot_buf, + dispatch_fragment_request, + process_fragment_response, )?; + buf[buf_slot] = Some(Bytes::from(slot_buf)); + // dca="esi" may push new QueuedElements onto self.queue. + // Loop back to Step 1 to assign them slots. } - QueuedElement::Try { - attempt_elements, - except_elements, - } => { - self.process_try_block( - attempt_elements, - except_elements, - output_writer, - dispatcher, - processor, - )?; + + // ------------------------------------------------------- + // Include inside a attempt: fill the buf slot, + // then check if the entire try block is now resolved. + // ------------------------------------------------------- + Some((tracker_idx, attempt_idx)) => { + fragment.pending_fragment = completed_content; + let mut slot_buf = Vec::new(); + let include_failed = self + .process_include( + *fragment, + &mut slot_buf, + dispatch_fragment_request, + process_fragment_response, + ) + .is_err(); + buf[buf_slot] = Some(Bytes::from(slot_buf)); + + if include_failed { + try_trackers[tracker_idx].attempts[attempt_idx].failed = true; + } + try_trackers[tracker_idx].pending_count -= 1; + + if try_trackers[tracker_idx].pending_count == 0 { + Self::assemble_try_block( + self, + tracker_idx, + &mut try_trackers, + &mut buf, + dispatch_fragment_request, + process_fragment_response, + )?; + } + // dca="esi" inside a try-attempt promotes sub-includes + // to outer slots. Loop back to Step 1. + } + } + } + + // Final flush: every slot must be ready at this point. + while next_out < buf.len() { + match &buf[next_out] { + Some(bytes) => { + output_writer.write_all(bytes)?; + next_out += 1; + } + None => { + return Err(ESIError::ExpressionError( + "drain_queue: slot still pending after all requests resolved".to_string(), + )); } } } + Ok(()) } + + /// Assemble a fully-resolved try block: concatenate successful attempt + /// content from `buf` slots, clear inner slots, and set the outer slot. + fn assemble_try_block( + &mut self, + tracker_idx: usize, + try_trackers: &mut [TryBlockTracker], + buf: &mut [Option], + dispatch_fragment_request: &FragmentRequestDispatcher, + process_fragment_response: Option<&FragmentResponseProcessor>, + ) -> Result<()> { + let mut any_failed = false; + let mut output: Vec = Vec::new(); + + for attempt in &try_trackers[tracker_idx].attempts { + if attempt.failed { + any_failed = true; + // Clear failed attempt's inner slots so Step 2 skips them. + for &slot_idx in &attempt.buf_slots { + buf[slot_idx] = Some(Bytes::new()); + } + } else { + for &slot_idx in &attempt.buf_slots { + if let Some(bytes) = &buf[slot_idx] { + output.extend_from_slice(bytes); + } + // Clear inner slot so Step 2 flushes it as a no-op. + buf[slot_idx] = Some(Bytes::new()); + } + } + } + + if any_failed { + let except_elements = std::mem::take(&mut try_trackers[tracker_idx].except_elements); + if !except_elements.is_empty() { + let except_buf = self.process_try_task( + except_elements, + dispatch_fragment_request, + process_fragment_response, + )?; + output.extend_from_slice(&except_buf); + } + } + + buf[try_trackers[tracker_idx].outer_slot] = Some(Bytes::from(output)); + Ok(()) + } + /// Process a try block: execute ALL attempts in document order (they are /// independent statements), then run the except clause if any failed. fn process_try_block( @@ -959,18 +1386,52 @@ impl Processor { ) -> Result<()> { let mut any_failed = false; for attempt in attempt_elements { - match self.process_attempt_elements(attempt, dispatcher, processor) { + match self.process_try_task(attempt, dispatcher, processor) { Ok(buffer) => output_writer.write_all(&buffer)?, Err(_) => any_failed = true, } } if any_failed { - let buf = self.process_attempt_elements(except_elements, dispatcher, processor)?; + let buf = self.process_try_task(except_elements, dispatcher, processor)?; output_writer.write_all(&buf)?; } Ok(()) } + /// Execute a `DocumentHandler` with an isolated queue. + /// + /// Saves `self.queue`, runs the handler writing into `output`, executes the + /// provided `after` closure (which can consume the temporary queue), then + /// restores the saved queue. + fn execute_isolated( + &mut self, + elements: &[Element], + output: &mut W, + dispatcher: &FragmentRequestDispatcher, + processor: Option<&FragmentResponseProcessor>, + after: impl FnOnce(&mut Self, &mut W) -> Result, + ) -> Result { + let saved_queue = std::mem::take(&mut self.queue); + + { + let mut handler = DocumentHandler { + processor: self, + output, + dispatch_fragment_request: dispatcher, + fragment_response_handler: processor, + }; + for elem in elements { + handler.process(elem)?; + } + } + + let result = after(self, output); + + // Always restore the outer queue, even if `after` failed. + self.queue = saved_queue; + result + } + /// Execute a list of raw ESI elements in document order into a fresh buffer. /// /// Elements are processed sequentially through a `DocumentHandler`: @@ -983,45 +1444,34 @@ impl Processor { /// /// After all elements have been walked, any queued includes are drained in /// document order (blocking wait per include). - fn process_attempt_elements( + fn process_try_task( &mut self, elements: Vec, dispatcher: &FragmentRequestDispatcher, processor: Option<&FragmentResponseProcessor>, ) -> Result> { let mut buffer = Vec::new(); - - // Isolate this attempt's dispatch queue from the outer document queue. - let saved_queue = std::mem::take(&mut self.queue); - - { - let mut handler = DocumentHandler { - processor: self, - output: &mut buffer, - dispatcher, - response_processor: processor, - }; - for elem in &elements { - handler.process(elem)?; - } - } - - // Drain any includes (and nested try blocks) dispatched during the walk. - self.drain_queue(&mut buffer, dispatcher, processor)?; - - // Restore the outer document queue. - self.queue = saved_queue; + self.execute_isolated( + &elements, + &mut buffer, + dispatcher, + processor, + |this, out| { + this.drain_queue(out, dispatcher, processor)?; + Ok(()) + }, + )?; Ok(buffer) } /// Process an include from the queue (wait and write, handle alt) - fn process_include_from_queue( + fn process_include( &mut self, fragment: Fragment, output_writer: &mut impl Write, - dispatcher: &FragmentRequestDispatcher, - processor: Option<&FragmentResponseProcessor>, + dispatch_fragment_request: &FragmentRequestDispatcher, + process_fragment_response: Option<&FragmentResponseProcessor>, ) -> Result<()> { let continue_on_error = fragment.metadata.continue_on_error; @@ -1030,7 +1480,7 @@ impl Processor { // Apply processor if provided let mut req_for_processor = fragment.req.clone_without_body(); - let final_response = if let Some(proc) = processor { + let final_response = if let Some(proc) = process_fragment_response { proc(&mut req_for_processor, response)? } else { response @@ -1074,7 +1524,7 @@ impl Processor { body_bytes, &fragment.metadata.dca, output_writer, - dispatcher, + dispatch_fragment_request, )?; Ok(()) } else if let Some(alt_src) = fragment.alt_bytes { @@ -1088,11 +1538,11 @@ impl Processor { )?; let alt_req_without_body = alt_req.clone_without_body(); - match dispatcher(alt_req_without_body, fragment.metadata.maxwait) { + match dispatch_fragment_request(alt_req_without_body, fragment.metadata.maxwait) { Ok(alt_pending) => { let alt_response = alt_pending.wait()?; let mut alt_req_for_proc = alt_req.clone_without_body(); - let final_alt = if let Some(proc) = processor { + let final_alt = if let Some(proc) = process_fragment_response { proc(&mut alt_req_for_proc, alt_response)? } else { alt_response @@ -1103,7 +1553,7 @@ impl Processor { body_bytes, &fragment.metadata.dca, output_writer, - dispatcher, + dispatch_fragment_request, )?; Ok(()) } @@ -1156,8 +1606,8 @@ impl Processor { let mut handler = DocumentHandler { processor: self, output: output_writer, - dispatcher, - response_processor: None, + dispatch_fragment_request: dispatcher, + fragment_response_handler: None, }; for element in elements { if matches!(handler.process(&element)?, Flow::Break) { diff --git a/esi/tests/esi-tests.rs b/esi/tests/esi-tests.rs index ddc4f1c..17622be 100644 --- a/esi/tests/esi-tests.rs +++ b/esi/tests/esi-tests.rs @@ -2107,11 +2107,11 @@ fn test_multi_include_document_order() { // ────────────────────────────────────────────────────────────────────────────── // Try block after an include in the same document (fix #11) -// Previously, process_ready_queue_items skipped Try blocks entirely, so a Try +// Previously, process_queue skipped Try blocks entirely, so a Try // that reached the head of the queue (after a preceding include was consumed) // would stall until drain_queue ran at the end - never an outright bug in tests // using CompletedRequest, but wrong for real async requests. The fix makes -// process_ready_queue_items process Try blocks inline. +// process_queue process Try blocks inline. // ────────────────────────────────────────────────────────────────────────────── #[test] From b936717eca9f6acc90f8d7840fe2a174392da242 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Tue, 3 Mar 2026 09:08:31 -0600 Subject: [PATCH 089/119] refactor(config): remove namespace field and related methods from Configuration struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit refactor(lib): Reduce cloning in ElementHandler::process() — pass IncludeAttributes by reference. --- esi/src/config.rs | 11 ----------- esi/src/element_handler.rs | 8 ++++---- esi/src/error.rs | 5 ----- esi/src/expression.rs | 4 ++-- esi/src/lib.rs | 4 ++-- 5 files changed, 8 insertions(+), 24 deletions(-) diff --git a/esi/src/config.rs b/esi/src/config.rs index 2c263e6..cefe682 100644 --- a/esi/src/config.rs +++ b/esi/src/config.rs @@ -5,7 +5,6 @@ use crate::cache::CacheConfig; /// ## Usage Example /// ```rust,no_run /// let config = esi::Configuration::default() -/// .with_namespace("app") /// .with_caching(esi::cache::CacheConfig { /// is_rendered_cacheable: true, /// rendered_cache_control: true, @@ -18,8 +17,6 @@ use crate::cache::CacheConfig; #[allow(clippy::return_self_not_must_use)] #[derive(Clone, Debug)] pub struct Configuration { - /// The XML namespace to use when scanning for ESI tags. Defaults to `esi`. - pub namespace: String, /// For working with non-HTML ESI templates, e.g. JSON files, this option allows you to disable the unescaping of URLs pub is_escaped_content: bool, /// Cache configuration for ESI includes @@ -31,7 +28,6 @@ pub struct Configuration { impl Default for Configuration { fn default() -> Self { Self { - namespace: String::from("esi"), is_escaped_content: true, cache: CacheConfig::default(), function_recursion_depth: 5, @@ -40,13 +36,6 @@ impl Default for Configuration { } impl Configuration { - /// Sets an alternative ESI namespace, which is used to identify ESI instructions. - /// - /// For example, setting this to `test` would cause the processor to only match tags like ``. - pub fn with_namespace(mut self, namespace: impl Into) -> Self { - self.namespace = namespace.into(); - self - } /// For working with non-HTML ESI templates, eg JSON files, allows to disable URLs unescaping pub fn with_escaped(mut self, is_escaped: impl Into) -> Self { self.is_escaped_content = is_escaped.into(); diff --git a/esi/src/element_handler.rs b/esi/src/element_handler.rs index db0d2e1..0e0007b 100644 --- a/esi/src/element_handler.rs +++ b/esi/src/element_handler.rs @@ -63,12 +63,12 @@ pub(crate) trait ElementHandler { /// Handle ``. /// Streaming: dispatch the fragment request and enqueue it. /// Function: return an error. - fn on_include(&mut self, attrs: IncludeAttributes) -> Result; + fn on_include(&mut self, attrs: &IncludeAttributes) -> Result; /// Handle ``. /// Streaming: fetch the fragment, parse it as ESI, re-process in current context. /// Function: return an error. - fn on_eval(&mut self, attrs: IncludeAttributes) -> Result; + fn on_eval(&mut self, attrs: &IncludeAttributes) -> Result; /// Handle ``. /// Streaming: build parallel-dispatch queues for each attempt and the except clause. @@ -128,9 +128,9 @@ pub(crate) trait ElementHandler { Element::Esi(Tag::Vars { name }) => self.handle_vars(name.as_deref()), - Element::Esi(Tag::Include { attrs }) => self.on_include(attrs.clone()), + Element::Esi(Tag::Include { attrs }) => self.on_include(attrs), - Element::Esi(Tag::Eval { attrs }) => self.on_eval(attrs.clone()), + Element::Esi(Tag::Eval { attrs }) => self.on_eval(attrs), Element::Esi(Tag::Choose { when_branches, diff --git a/esi/src/error.rs b/esi/src/error.rs index bbda6e6..bdbb877 100644 --- a/esi/src/error.rs +++ b/esi/src/error.rs @@ -6,11 +6,6 @@ use fastly::http::request::SendError; #[derive(Error, Debug)] #[allow(clippy::large_enum_variant)] pub enum ExecutionError { - /// Invalid XML was encountered during parsing. - /// (Legacy - not used by nom parser) - // #[error("xml parsing error: {0}")] - // XMLError(#[from] quick_xml::Error), - /// The ESI document contains a tag with a missing parameter. #[error("tag `{0}` is missing required parameter `{1}`")] MissingRequiredParameter(String, String), diff --git a/esi/src/expression.rs b/esi/src/expression.rs index bde1f1a..2040f30 100644 --- a/esi/src/expression.rs +++ b/esi/src/expression.rs @@ -921,14 +921,14 @@ impl ElementHandler for FunctionHandler<'_> { } /// Per ESI spec: `esi:include` is not allowed inside function bodies. - fn on_include(&mut self, _attrs: IncludeAttributes) -> Result { + fn on_include(&mut self, _attrs: &IncludeAttributes) -> Result { Err(ExecutionError::FunctionError( "esi:include is not allowed in function bodies".to_string(), )) } /// Per ESI spec: `esi:eval` is not allowed inside function bodies. - fn on_eval(&mut self, _attrs: IncludeAttributes) -> Result { + fn on_eval(&mut self, _attrs: &IncludeAttributes) -> Result { Err(ExecutionError::FunctionError( "esi:eval is not allowed in function bodies".to_string(), )) diff --git a/esi/src/lib.rs b/esi/src/lib.rs index 0151455..5229493 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -257,7 +257,7 @@ impl ElementHandler for DocumentHandler<'_, W> { Ok(Flow::Continue) } - fn on_include(&mut self, attrs: IncludeAttributes) -> crate::Result { + fn on_include(&mut self, attrs: &IncludeAttributes) -> crate::Result { let queued_element = self .processor .dispatch_include_to_element(&attrs, self.dispatch_fragment_request)?; @@ -270,7 +270,7 @@ impl ElementHandler for DocumentHandler<'_, W> { /// The `dca` attribute controls processing mode: /// - `dca="none"` (default): fragment executed in parent's context (shared variables). /// - `dca="esi"`: fragment executed in an isolated context (output only, no variable leakage). - fn on_eval(&mut self, attrs: IncludeAttributes) -> crate::Result { + fn on_eval(&mut self, attrs: &IncludeAttributes) -> crate::Result { // Build and dispatch the request (same machinery as include, but blocking) let queued_element = self .processor From 0e5518863e2b9685f709d7396d814c1d026d244c Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Tue, 3 Mar 2026 10:01:56 -0600 Subject: [PATCH 090/119] =?UTF-8?q?perf:=20use=20Bytes=20in=20Expr::String?= =?UTF-8?q?,=20optimize=20tag=20dispatch,=20rename=20Element::Text?= =?UTF-8?q?=E2=86=92Content?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace Expr::String(Option) with Expr::String(Option) to eliminate allocations in the expression parser (Bytes' copy is ref count inc) - Rename Element::Text to Element::Content for clarity (vs Html passthrough) - Document Element and Expr enums --- esi/src/element_handler.rs | 2 +- esi/src/expression.rs | 4 +- esi/src/parser.rs | 102 +++++++++++++++++++------------------ esi/src/parser_types.rs | 35 ++++++++++--- esi/tests/parser.rs | 32 ++++++------ esi/tests/zero_copy.rs | 10 ++-- 6 files changed, 105 insertions(+), 80 deletions(-) diff --git a/esi/src/element_handler.rs b/esi/src/element_handler.rs index 0e0007b..c9b11bc 100644 --- a/esi/src/element_handler.rs +++ b/esi/src/element_handler.rs @@ -102,7 +102,7 @@ pub(crate) trait ElementHandler { /// tags call the required hooks above. fn process(&mut self, element: &Element) -> Result { match element { - Element::Text(text) | Element::Html(text) => { + Element::Content(text) | Element::Html(text) => { self.write_bytes(text.clone())?; Ok(Flow::Continue) } diff --git a/esi/src/expression.rs b/esi/src/expression.rs index 2040f30..2ed4255 100644 --- a/esi/src/expression.rs +++ b/esi/src/expression.rs @@ -51,7 +51,7 @@ impl FunctionRegistry { pub fn eval_expr(expr: &Expr, ctx: &mut EvalContext) -> Result { match expr { Expr::Integer(i) => Ok(Value::Integer(*i)), - Expr::String(Some(s)) => Ok(Value::Text(Bytes::from(s.clone()))), + Expr::String(Some(b)) => Ok(Value::Text(b.clone())), Expr::String(None) => Ok(Value::Text(Bytes::new())), Expr::Variable(name, key, default) => { // Evaluate the key expression if present @@ -115,7 +115,7 @@ pub fn eval_expr(expr: &Expr, ctx: &mut EvalContext) -> Result { let mut result = String::new(); for element in elements { match element { - Element::Text(text) => { + Element::Content(text) => { result.push_str(&String::from_utf8_lossy(text.as_ref())); } Element::Html(html) => { diff --git a/esi/src/parser.rs b/esi/src/parser.rs index 3ebc821..769d47c 100644 --- a/esi/src/parser.rs +++ b/esi/src/parser.rs @@ -114,7 +114,7 @@ where ParsingMode::Complete => { // Treat remaining bytes as text - refcount increment, zero-copy if !remaining.is_empty() { - result.push(Element::Text(slice_as_bytes(original, remaining))); + result.push(Element::Content(slice_as_bytes(original, remaining))); } Ok((&remaining[remaining.len()..], result)) } @@ -216,7 +216,7 @@ fn interpolated_text<'a>( recognize(streaming_bytes::take_while1(|c| { !is_open_bracket(c) && !is_dollar(c) })), - |s: &[u8]| ParseResult::Single(Element::Text(slice_as_bytes(original, s))), + |s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s))), )(input) } @@ -227,7 +227,7 @@ fn interpolated_text_complete<'a>( ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { map( recognize(take_while1(|c| !is_open_bracket(c) && !is_dollar(c))), - |s: &[u8]| ParseResult::Single(Element::Text(slice_as_bytes(original, s))), + |s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s))), )(input) } @@ -275,7 +275,7 @@ fn top_level_text<'a>( ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { map( recognize(streaming_bytes::take_while1(|c| !is_open_bracket(c))), - |s: &[u8]| ParseResult::Single(Element::Text(slice_as_bytes(original, s))), + |s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s))), )(input) } @@ -380,7 +380,9 @@ fn parse_variable_name_with_subscript(name: &str) -> (String, Option) { .all(|c| c.is_ascii_alphanumeric() || c == '_') { // Bare identifier like "joan" - treat as string literal key - Some(Expr::String(Some(subscript_str.to_string()))) + Some(Expr::String(Some(Bytes::copy_from_slice( + subscript_str.as_bytes(), + )))) } else if let Ok((_, expr)) = parse_expression(subscript_str) { // Successfully parsed as expression (e.g., "'key'", "$(var)", complex expression) Some(expr) @@ -428,7 +430,7 @@ fn assign_attributes_short(attrs: HashMap) -> ParseResult { Ok((_, expr)) => expr, Err(_) => { // If parsing fails (e.g., plain text), treat as a string literal - Expr::String(Some(value_str)) + Expr::String(Some(Bytes::from(value_str))) } }; @@ -452,7 +454,7 @@ fn parse_attr_as_expr(value_str: String) -> Expr { fn parse_attr_as_expr_with_context(value_str: String, bare_id_as_variable: bool) -> Expr { // Fast-path: empty string if value_str.is_empty() { - return Expr::String(Some(String::new())); + return Expr::String(Some(Bytes::new())); } // Try to parse as pure ESI expression first (variables/functions/quoted strings/integers/dict/list literals) @@ -486,18 +488,16 @@ fn parse_attr_as_expr_with_context(value_str: String, bare_id_as_variable: bool) if elements.len() == 1 { match elements.into_iter().next().unwrap() { Element::Expr(expr) => expr, - Element::Text(text) => { - Expr::String(Some(String::from_utf8_lossy(&text).into_owned())) - } - _ => Expr::String(Some(String::from_utf8_lossy(&bytes).into_owned())), + Element::Content(text) => Expr::String(Some(text)), + _ => Expr::String(Some(bytes.clone())), } } else if !elements.is_empty() { Expr::Interpolated(elements) } else { - Expr::String(Some(String::new())) + Expr::String(Some(Bytes::new())) } } - _ => Expr::String(Some(String::from_utf8_lossy(&bytes).into_owned())), + _ => Expr::String(Some(bytes.clone())), } } @@ -519,22 +519,24 @@ fn assign_long(attrs: &HashMap, mut content: Vec) -> Pa // We need to convert it to a single expression let value = if content.is_empty() { // Empty content - empty string - Expr::String(Some(String::new())) + Expr::String(Some(Bytes::new())) } else if content.len() == 1 { // Single element - pop to take ownership match content.pop().expect("checked len == 1") { Element::Expr(expr) => expr, - Element::Text(text) => { + Element::Content(text) => { // Try to parse the text as an expression - let text_str = String::from_utf8_lossy(text.as_ref()).to_string(); - match parse_expression(&text_str) { - Ok((_, expr)) => expr, - Err(_) => Expr::String(Some(text_str)), + match std::str::from_utf8(text.as_ref()) { + Ok(text_str) => match parse_expression(text_str) { + Ok((_, expr)) => expr, + Err(_) => Expr::String(Some(text)), + }, + Err(_) => Expr::String(Some(text)), } } _ => { // HTML or other - treat as empty string - Expr::String(Some(String::new())) + Expr::String(Some(Bytes::new())) } } } else { @@ -916,7 +918,7 @@ fn parse_content_complete(original: &Bytes, content: &[u8]) -> Vec { ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { map( take_while1(|c| !is_dollar(c) && !is_open_bracket(c)), - |s: &[u8]| ParseResult::Single(Element::Text(slice_as_bytes(original, s))), + |s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s))), )(input) } @@ -969,7 +971,7 @@ fn parse_content_complete(original: &Bytes, content: &[u8]) -> Vec { // Fallback: consume one byte as text if nothing else matches // This handles stray $ or < characters that aren't valid expressions/tags - elements.push(Element::Text(slice_as_bytes(original, &remaining[..1]))); + elements.push(Element::Content(slice_as_bytes(original, &remaining[..1]))); remaining = &remaining[1..]; } @@ -1018,7 +1020,7 @@ fn esi_text<'a>( streaming_bytes::take_until(TAG_ESI_TEXT_CLOSE), streaming_bytes::tag(TAG_ESI_TEXT_CLOSE), ), - |v| ParseResult::Single(Element::Text(slice_as_bytes(original, v))), + |v| ParseResult::Single(Element::Content(slice_as_bytes(original, v))), )(input) } fn esi_include(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { @@ -1548,51 +1550,51 @@ fn esi_var_name(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { )(input) } -fn not_dollar_or_curlies(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { +fn not_dollar_or_curlies(input: &[u8]) -> IResult<&[u8], Bytes, Error<&[u8]>> { map( take_while(|c| { !is_dollar(c) && c != OPEN_BRACE && c != CLOSE_BRACE && c != COMMA && c != DOUBLE_QUOTE }), - bytes_to_string, + Bytes::copy_from_slice, )(input) } // TODO: handle escaping -fn single_quoted_string(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { +fn single_quoted_string(input: &[u8]) -> IResult<&[u8], Bytes, Error<&[u8]>> { map( delimited( tag(&[SINGLE_QUOTE]), take_while(|c| !is_single_quote(c)), tag(&[SINGLE_QUOTE]), ), - bytes_to_string, + Bytes::copy_from_slice, )(input) } -fn triple_quoted_string(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { +fn triple_quoted_string(input: &[u8]) -> IResult<&[u8], Bytes, Error<&[u8]>> { map( delimited( tag(QUOTE_TRIPLE), take_until(QUOTE_TRIPLE), tag(QUOTE_TRIPLE), ), - bytes_to_string, + Bytes::copy_from_slice, )(input) } fn string(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { map( alt((triple_quoted_string, single_quoted_string)), - |string: String| { - if string.is_empty() { + |bytes: Bytes| { + if bytes.is_empty() { Expr::String(None) } else { - Expr::String(Some(string)) + Expr::String(Some(bytes)) } }, )(input) } -fn var_key(input: &[u8]) -> IResult<&[u8], String, Error<&[u8]>> { +fn var_key(input: &[u8]) -> IResult<&[u8], Bytes, Error<&[u8]>> { alt(( triple_quoted_string, single_quoted_string, @@ -1606,7 +1608,7 @@ fn esi_var_key_expr(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { // Try to parse as a variable first (e.g., $(keyVar)) esi_variable, // Otherwise parse as a string - map(var_key, |s: String| Expr::String(Some(s))), + map(var_key, |b: Bytes| Expr::String(Some(b))), ))(input) } @@ -1970,7 +1972,7 @@ exception! assert_eq!( x, [ - Element::Text(Bytes::from_static(b"hello")), + Element::Content(Bytes::from_static(b"hello")), Element::Html(Bytes::from_static(b"
")), ] ); @@ -1987,8 +1989,8 @@ exception! assert_eq!( elements, [ - Element::Text(Bytes::from_static(b"outer")), - Element::Text(Bytes::from_static(b"inner")), + Element::Content(Bytes::from_static(b"outer")), + Element::Content(Bytes::from_static(b"inner")), ] ); } @@ -2002,9 +2004,9 @@ exception! assert_eq!(rest.len(), 0, "Should parse completely"); assert_eq!(elements.len(), 3); - assert!(matches!(&elements[0], Element::Text(t) if t.as_ref() == b"Hello ")); + assert!(matches!(&elements[0], Element::Content(t) if t.as_ref() == b"Hello ")); assert!(matches!(&elements[1], Element::Expr(_))); - assert!(matches!(&elements[2], Element::Text(t) if t.as_ref() == b", welcome!")); + assert!(matches!(&elements[2], Element::Content(t) if t.as_ref() == b", welcome!")); } #[test] @@ -2050,12 +2052,12 @@ exception! [Element::Expr(Expr::Comparison { left: Box::new(Expr::Call( "call".to_string(), - vec![Expr::String(Some("hello".to_string()))] + vec![Expr::String(Some(Bytes::from("hello")))] )), operator: Operator::Matches, right: Box::new(Expr::Variable( "var".to_string(), - Some(Box::new(Expr::String(Some("key".to_string())))), + Some(Box::new(Expr::String(Some(Bytes::from("key"))))), None )) })] @@ -2144,7 +2146,7 @@ exception! let bytes = Bytes::from_static(input); let (rest, x) = parse_remainder(&bytes).unwrap(); assert_eq!(rest.len(), 0); - assert_eq!(x, [Element::Text(Bytes::from_static(b"hello\nthere"))]); + assert_eq!(x, [Element::Content(Bytes::from_static(b"hello\nthere"))]); } #[test] fn test_parse_interpolated() { @@ -2155,8 +2157,8 @@ exception! assert_eq!( x, [ - Element::Text(Bytes::from_static(b"hello $(foo)")), - Element::Text(Bytes::from_static(b"goodbye ")), + Element::Content(Bytes::from_static(b"hello $(foo)")), + Element::Content(Bytes::from_static(b"goodbye ")), Element::Expr(Expr::Variable("foo".to_string(), None, None)), ] ); @@ -2304,7 +2306,7 @@ exception! assert_eq!(elements.len(), 1); if let Element::Esi(Tag::Include { attrs, .. }) = &elements[0] { assert!( - matches!(&attrs.src, Expr::String(Some(s)) if s == "http://example.com/fragment") + matches!(&attrs.src, Expr::String(Some(s)) if s == &Bytes::from("http://example.com/fragment")) ); } else { panic!("Expected Include tag"); @@ -2323,7 +2325,7 @@ exception! }) = &elements[0] { assert_eq!(name, "foo"); - assert_eq!(value, &Expr::String(Some("bar".to_string()))); + assert_eq!(value, &Expr::String(Some(Bytes::from("bar")))); } else { panic!("Expected Assign tag"); } @@ -2456,7 +2458,7 @@ exception! assert!(subscript.is_some(), "Should have subscript"); if let Some(sub) = subscript { // Should be a string literal "joan" - assert!(matches!(sub, Expr::String(Some(s)) if s == "joan")); + assert!(matches!(sub, Expr::String(Some(s)) if s == &Bytes::from("joan"))); } assert!(matches!(value, Expr::Integer(28))); } @@ -2482,7 +2484,7 @@ exception! if let Some(sub) = subscript { // Should be a string literal "bob" assert!( - matches!(sub, Expr::String(Some(s)) if s == "bob"), + matches!(sub, Expr::String(Some(s)) if s == &Bytes::from("bob")), "Subscript should be 'bob', got {:?}", sub ); @@ -2549,7 +2551,7 @@ exception! assert_eq!(rest.len(), 0, "Should consume all input"); assert_eq!(elements.len(), 1); // The whole thing becomes text since script tag couldn't be fully parsed - assert!(matches!(&elements[0], Element::Text(_))); + assert!(matches!(&elements[0], Element::Content(_))); } #[test] fn test_partial_esi_tag() { @@ -2574,7 +2576,7 @@ exception! Ok((rest, elements)) => { // Should have parsed "hello " as text assert_eq!(elements.len(), 1); - assert!(matches!(&elements[0], Element::Text(t) if t.as_ref() == b"hello ")); + assert!(matches!(&elements[0], Element::Content(t) if t.as_ref() == b"hello ")); // Remaining should be the partial tag assert_eq!(rest, b"`, ``). Esi(Tag), + /// A dynamic ESI expression (e.g. `$(HTTP_HOST)`, `$(dict{'key'})`). Expr(Expr), + /// Raw HTML markup passed through verbatim without interpretation. Html(Bytes), - Text(Bytes), + /// Plain-text content inside ESI constructs that participates in + /// expression evaluation (e.g. assign bodies, interpolated segments). + Content(Bytes), } +/// An ESI expression AST node. +/// +/// Produced by the expression parser for attribute values, `esi:vars`, +/// `esi:when` test conditions, and `esi:assign` bodies. Evaluated at +/// runtime by [`eval_expr`](crate::expression::eval_expr) to produce +/// a [`Value`](crate::expression::Value). #[derive(Debug, Clone, PartialEq)] pub enum Expr { + /// Integer literal (e.g. `42`, `-1`). Integer(i32), - String(Option), + /// String literal (e.g. `'hello'`). `None` represents the empty string `''`. + String(Option), + /// Variable reference: name, optional subscript key, optional default value. + /// e.g. `$(HTTP_HOST)`, `$(dict{'key'})`, `$(var|'default')`. Variable(String, Option>, Option>), + /// Binary comparison or arithmetic: `left operator right`. Comparison { left: Box, operator: Operator, right: Box, }, + /// Function call: name and argument list (e.g. `$base64_encode(...)`). Call(String, Vec), + /// Logical negation: `!(expr)`. Not(Box), - /// Represents a compound expression with interpolated text and expressions - /// Used for cases like: prefix$(VAR)suffix + /// Compound expression mixing literal text and embedded expressions. + /// e.g. `prefix$(VAR)suffix` inside ``. Interpolated(Vec), - /// Dictionary literal: {key:value, key:value} + /// Dictionary literal: `{key: value, key: value}`. DictLiteral(Vec<(Expr, Expr)>), - /// List literal: [value, value] + /// List literal: `[value, value, ...]`. ListLiteral(Vec), } diff --git a/esi/tests/parser.rs b/esi/tests/parser.rs index df122b6..97492dc 100644 --- a/esi/tests/parser.rs +++ b/esi/tests/parser.rs @@ -533,12 +533,12 @@ fn test_parse_assign_long_with_interpolation() { return false; } // Check first element is Text("Hello ") - let first_ok = if let esi::parser_types::Element::Text(ref bytes) = elements[0] - { - &bytes[..] == b"Hello " - } else { - false - }; + let first_ok = + if let esi::parser_types::Element::Content(ref bytes) = elements[0] { + &bytes[..] == b"Hello " + } else { + false + }; // Check second element is Variable("name", None, None) let second_ok = if let esi::parser_types::Element::Expr( esi::parser_types::Expr::Variable(ref n, None, None), @@ -549,12 +549,12 @@ fn test_parse_assign_long_with_interpolation() { false }; // Check third element is Text("!") - let third_ok = if let esi::parser_types::Element::Text(ref bytes) = elements[2] - { - &bytes[..] == b"!" - } else { - false - }; + let third_ok = + if let esi::parser_types::Element::Content(ref bytes) = elements[2] { + &bytes[..] == b"!" + } else { + false + }; first_ok && second_ok && third_ok } else { false @@ -771,7 +771,7 @@ fn test_parse_remove() { // esi:remove content should not appear in elements at all let has_removed_text = elements.iter().any(|element| { - if let esi::parser_types::Element::Text(t) = element { + if let esi::parser_types::Element::Content(t) = element { // Check if bytes contain the substring let needle = b"should not appear"; t.windows(needle.len()).any(|window| window == needle) @@ -787,7 +787,7 @@ fn test_parse_remove() { // But visible content should be there let has_visible = elements.iter().any(|element| { - if let esi::parser_types::Element::Text(t) = element { + if let esi::parser_types::Element::Content(t) = element { let needle = b"visible"; t.windows(needle.len()).any(|window| window == needle) } else { @@ -830,7 +830,7 @@ fn test_parse_text_tag() { // esi:text content should be plain text, ESI tags inside should not be parsed let text_found = elements.iter().any(|element| { - if let esi::parser_types::Element::Text(t) = element { + if let esi::parser_types::Element::Content(t) = element { let needle1 = b" Date: Tue, 3 Mar 2026 11:45:09 -0600 Subject: [PATCH 091/119] refactor(parser): optimize attribute extraction and reduce Vec allocations --- esi/src/parser.rs | 123 ++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 70 deletions(-) diff --git a/esi/src/parser.rs b/esi/src/parser.rs index 769d47c..5704908 100644 --- a/esi/src/parser.rs +++ b/esi/src/parser.rs @@ -14,6 +14,7 @@ use nom::error::Error; use nom::multi::{fold_many0, many0, separated_list0}; use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; use nom::IResult; + use std::collections::HashMap; use crate::literals::*; @@ -87,7 +88,7 @@ fn parse_loop<'a, F>( where F: FnMut(&Bytes, &'a [u8]) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>>, { - let mut result = Vec::new(); + let mut result = Vec::with_capacity(8); let mut remaining = original.as_ref(); loop { @@ -173,18 +174,6 @@ fn bytes_to_string(bytes: &[u8]) -> String { String::from_utf8_lossy(bytes).into_owned() } -/// Helper to extract an attribute value from a `HashMap`, removing it -#[inline] -fn take_attr(attrs: &mut HashMap, key: &str) -> String { - attrs.remove(key).unwrap_or_default() -} - -/// Helper to extract an optional attribute value from a `HashMap`, removing it -#[inline] -fn take_attr_opt(attrs: &mut HashMap, key: &str) -> Option { - attrs.remove(key) -} - // ============================================================================ // Expression Parsing - Uses COMPLETE parsers (input is always complete) // Expressions come from attribute values which are fully extracted before parsing @@ -409,8 +398,8 @@ fn esi_assign<'a>( alt((esi_assign_short, |i| esi_assign_long(original, i)))(input) } -fn assign_attributes_short(attrs: HashMap) -> ParseResult { - let name = attrs.get("name").cloned().unwrap_or_default(); +fn assign_attributes_short(mut attrs: HashMap) -> ParseResult { + let name = attrs.remove("name").unwrap_or_default(); // Validate variable name according to ESI spec if !is_valid_variable_name(&name) { @@ -422,7 +411,7 @@ fn assign_attributes_short(attrs: HashMap) -> ParseResult { // Parse name and optional subscript (e.g., "colors{0}" or "ages{joan}") let (var_name, subscript) = parse_variable_name_with_subscript(&name); - let value_str = attrs.get("value").cloned().unwrap_or_default(); + let value_str = attrs.remove("value").unwrap_or_default(); // Per ESI spec, short form value attribute contains an expression // Try to parse as ESI expression. If it fails, treat as string literal. @@ -679,11 +668,10 @@ fn esi_otherwise<'a>( |i| tag_content(original, i), streaming_bytes::tag(TAG_ESI_OTHERWISE_CLOSE), ), - |content| { - // Return the Otherwise tag followed by its content elements - let mut result = vec![Element::Esi(Tag::Otherwise)]; - result.extend(content); - ParseResult::Multiple(result) + |mut content| { + // Reuse content Vec — insert marker at front instead of creating a new Vec + content.insert(0, Element::Esi(Tag::Otherwise)); + ParseResult::Multiple(content) }, )(input) } @@ -705,13 +693,13 @@ fn esi_when<'a>( |i| tag_content(original, i), streaming_bytes::tag(TAG_ESI_WHEN_CLOSE), )), - |(attrs, content, _)| { - let test = attrs.get("test").cloned().unwrap_or_default(); - let match_name = attrs.get("matchname").cloned(); + |(mut attrs, content, _)| { + let test = attrs.remove("test").unwrap_or_default(); + let match_name = attrs.remove("matchname"); - // Return the When tag followed by its content elements as a marker - let mut result = vec![Element::Esi(Tag::When { test, match_name })]; - result.extend(content); + // Reuse content Vec — insert marker at front instead of creating a new Vec + let mut result = content; + result.insert(0, Element::Esi(Tag::When { test, match_name })); ParseResult::Multiple(result) }, )(input) @@ -732,10 +720,10 @@ fn esi_foreach<'a>( |i| tag_content(original, i), streaming_bytes::tag(TAG_ESI_FOREACH_CLOSE), )), - |(attrs, content, _)| { - let collection_str = attrs.get("collection").cloned().unwrap_or_default(); + |(mut attrs, content, _)| { + let collection_str = attrs.remove("collection").unwrap_or_default(); let collection = parse_attr_as_expr_with_context(collection_str, true); - let item = attrs.get("item").cloned(); + let item = attrs.remove("item"); ParseResult::Single(Element::Esi(Tag::Foreach { collection, @@ -773,8 +761,8 @@ fn esi_function_tag<'a>( |i| tag_content(original, i), streaming_bytes::tag(TAG_ESI_FUNCTION_CLOSE), )), - |(attrs, body, _)| { - let name = attrs.get("name").cloned().unwrap_or_default(); + |(mut attrs, body, _)| { + let name = attrs.remove("name").unwrap_or_default(); ParseResult::Single(Element::Esi(Tag::Function { name, body })) }, @@ -789,8 +777,8 @@ fn esi_return(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { attributes, preceded(streaming_char::multispace0, streaming_self_closing), ), - |attrs| { - let value_str = attrs.get("value").cloned().unwrap_or_default(); + |mut attrs| { + let value_str = attrs.remove("value").unwrap_or_default(); let value = parse_attr_as_expr_with_context(value_str, false); ParseResult::Single(Element::Esi(Tag::Return { value })) @@ -883,7 +871,7 @@ fn esi_vars<'a>( } fn parse_vars_attributes(mut attrs: HashMap) -> Result { - take_attr_opt(&mut attrs, "name").map_or_else( + attrs.remove("name").map_or_else( || Err("no name field in short form vars"), |name_val| { if let Ok((_, expr)) = parse_expression(&name_val) { @@ -1027,60 +1015,55 @@ fn esi_include(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { alt((esi_include_self_closing, esi_include_with_params))(input) } -/// Helper to extract include attributes from the attributes `HashMap` +/// Helper to extract include attributes from the HashMap fn extract_include_attrs( mut attrs: HashMap, params: Vec<(String, Expr)>, ) -> IncludeAttributes { - let src = parse_attr_as_expr(take_attr(&mut attrs, "src")); - let alt = take_attr_opt(&mut attrs, "alt").map(parse_attr_as_expr); - let continue_on_error = attrs.get("onerror").is_some_and(|s| s == "continue"); + let src = parse_attr_as_expr(attrs.remove("src").unwrap_or_default()); + let alt = attrs.remove("alt").map(parse_attr_as_expr); + let continue_on_error = attrs.get("onerror").is_some_and(|v| v == "continue"); // Parse dca attribute - default to None - let dca = match attrs.get("dca").map(|s| s.to_lowercase()).as_deref() { + let dca = match attrs.get("dca").map(|v| v.to_lowercase()).as_deref() { Some("esi") => DcaMode::Esi, - _ => DcaMode::None, // Default or unrecognized values + _ => DcaMode::None, }; - let ttl = take_attr_opt(&mut attrs, "ttl"); - let maxwait = take_attr_opt(&mut attrs, "maxwait").and_then(|s| s.parse::().ok()); + let ttl = attrs.remove("ttl"); + let maxwait = attrs.remove("maxwait").and_then(|s| s.parse::().ok()); let no_store = attrs .get("no-store") - .is_some_and(|s| s == "on" || s == "true"); - let method = take_attr_opt(&mut attrs, "method").map(parse_attr_as_expr); - let entity = take_attr_opt(&mut attrs, "entity").map(parse_attr_as_expr); + .is_some_and(|v| v == "on" || v == "true"); + let method = attrs.remove("method").map(parse_attr_as_expr); + let entity = attrs.remove("entity").map(parse_attr_as_expr); // Parse header manipulation attributes let mut appendheaders = Vec::new(); let mut setheaders = Vec::new(); let mut removeheaders = Vec::new(); - // Collect all header attributes (there can be multiple) + // Collect header attributes from remaining attrs let keys: Vec = attrs.keys().cloned().collect(); for key in keys { + let value = attrs.remove(&key).unwrap(); if key.starts_with("appendheader") { - if let Some(value) = attrs.remove(&key) { - // Parse header format: "Header-Name: value" - if let Some((name, val)) = value.split_once(':') { - appendheaders.push(( - name.trim().to_string(), - parse_attr_as_expr(val.trim().to_string()), - )); - } + // Parse header format: "Header-Name: value" + if let Some((name, val)) = value.split_once(':') { + appendheaders.push(( + name.trim().to_string(), + parse_attr_as_expr(val.trim().to_string()), + )); } } else if key.starts_with("setheader") { - if let Some(value) = attrs.remove(&key) { - if let Some((name, val)) = value.split_once(':') { - setheaders.push(( - name.trim().to_string(), - parse_attr_as_expr(val.trim().to_string()), - )); - } + if let Some((name, val)) = value.split_once(':') { + setheaders.push(( + name.trim().to_string(), + parse_attr_as_expr(val.trim().to_string()), + )); } } else if key.starts_with("removeheader") { - if let Some(value) = attrs.remove(&key) { - removeheaders.push(value); - } + removeheaders.push(value); } } @@ -1196,8 +1179,8 @@ fn esi_param(input: &[u8]) -> IResult<&[u8], (String, Expr), Error<&[u8]>> { ), ), |mut attrs| { - let name = take_attr(&mut attrs, "name"); - let value = parse_attr_as_expr(take_attr(&mut attrs, "value")); + let name = attrs.remove("name").unwrap_or_default(); + let value = parse_attr_as_expr(attrs.remove("value").unwrap_or_default()); (name, value) }, )(input) @@ -1324,9 +1307,9 @@ fn tag_name(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { ))(input) } -/// Parse a complete opening tag -/// Returns (`remaining_input`, (`tag_name`, `full_tag_slice`)) -/// Only succeeds when we have a complete tag (ending with > or />) +/// Parse a complete opening tag (streaming gate) +/// Ensures the tag is fully available before dispatching to downstream +/// complete parsers. Returns (`remaining_input`, (`tag_name`, `full_tag_slice`)) #[allow(clippy::type_complexity)] fn esi_opening_tag(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8]), Error<&[u8]>> { let start = input; From afbe7aa95c7561debea2e316372b1389a752a305 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Tue, 3 Mar 2026 12:45:26 -0600 Subject: [PATCH 092/119] perf: use first-byte dispatch to eliminate unnecessary parser attempts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit complex_document benchmark: ~15µs → ~12.5µs --- esi/src/literals.rs | 4 +-- esi/src/parser.rs | 87 +++++++++++++++++++++++++-------------------- 2 files changed, 50 insertions(+), 41 deletions(-) diff --git a/esi/src/literals.rs b/esi/src/literals.rs index cb479ff..ff5fdfa 100644 --- a/esi/src/literals.rs +++ b/esi/src/literals.rs @@ -117,8 +117,8 @@ pub const DOUBLE_QUOTE: u8 = b'"'; pub const QUOTE_TRIPLE: &[u8] = b"'''"; // Brackets & Braces -pub const OPEN_PAREN: &[u8] = b"("; -pub const CLOSE_PAREN: &[u8] = b")"; +pub const OPEN_PAREN: u8 = b'('; +pub const CLOSE_PAREN: u8 = b')'; pub const OPEN_BRACE: u8 = b'{'; pub const CLOSE_BRACE: u8 = b'}'; pub const OPEN_SQ_BRACKET: u8 = b'['; diff --git a/esi/src/parser.rs b/esi/src/parser.rs index 5704908..b50923d 100644 --- a/esi/src/parser.rs +++ b/esi/src/parser.rs @@ -272,11 +272,18 @@ fn interpolated_element<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - alt(( - |i| interpolated_text(original, i), - interpolated_expression, - |i| tag_handler(original, i), - ))(input) + // Fast path: check the first byte to decide which parser to call. + // interpolated_text stops at '<' or '$', so the first byte here is one of those + // (or we're at the start of content). If it's '<', skip interpolated_expression entirely. + match input.first() { + Some(&OPEN_BRACKET) => tag_handler(original, input), + Some(&DOLLAR) => alt((interpolated_expression, |i| tag_handler(original, i)))(input), + _ => alt(( + |i| interpolated_text(original, i), + interpolated_expression, + |i| tag_handler(original, i), + ))(input), + } } // Parse a sequence of interpolated elements (text + expressions + tags) @@ -1189,13 +1196,7 @@ fn esi_param(input: &[u8]) -> IResult<&[u8], (String, Expr), Error<&[u8]>> { fn attributes(input: &[u8]) -> IResult<&[u8], HashMap, Error<&[u8]>> { fold_many0( separated_pair( - preceded( - streaming_char::multispace1, - // Allow alphanumeric characters and hyphens in attribute names - streaming_bytes::take_while1(|c| { - (c as char).is_alphanumeric() || c == HYPHEN || c == UNDERSCORE - }), - ), + preceded(streaming_char::multispace1, streaming_char::alpha1), streaming_bytes::tag(EQUALS), htmlstring, ), @@ -1636,9 +1637,9 @@ fn esi_function(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { let (input, parsed) = tuple(( esi_fn_name, delimited( - terminated(tag(OPEN_PAREN), multispace0), + terminated(tag(&[OPEN_PAREN]), multispace0), fn_argument, - preceded(multispace0, tag(CLOSE_PAREN)), + preceded(multispace0, tag(&[CLOSE_PAREN])), ), ))(input)?; @@ -1648,7 +1649,7 @@ fn esi_function(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { } fn esi_variable(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { - delimited(tag(VAR_OPEN), esi_var_name, tag(CLOSE_PAREN))(input) + delimited(tag(VAR_OPEN), esi_var_name, tag(&[CLOSE_PAREN]))(input) } /// Parse all binary operators @@ -1679,17 +1680,20 @@ fn operator(input: &[u8]) -> IResult<&[u8], Operator, Error<&[u8]>> { } fn interpolated_expression(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { - map( - alt(( - dict_literal, - list_literal, - esi_function, - esi_variable, - integer, - string, - )), - |expr| ParseResult::Single(Element::Expr(expr)), - )(input) + let expr = match input.first() { + Some(&OPEN_BRACE) => dict_literal(input), + Some(&OPEN_SQ_BRACKET) => list_literal(input), + Some(&DOLLAR) => alt((esi_function, esi_variable))(input), + Some(b'0'..=b'9') => integer(input), + Some(&SINGLE_QUOTE) => string(input), + _ => { + return Err(nom::Err::Error(Error::new( + input, + nom::error::ErrorKind::Alt, + ))) + } + }?; + Ok((expr.0, ParseResult::Single(Element::Expr(expr.1)))) } fn dict_literal(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { @@ -1748,23 +1752,28 @@ fn list_literal(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { /// Parse primary expressions (highest precedence atoms) /// Handles: variables, functions, literals, grouped expressions fn primary_expr(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { - alt(( + match input.first() { // Parse grouped expression: (expr) - delimited( - tag(OPEN_PAREN), + Some(&OPEN_PAREN) => delimited( + tag(&[OPEN_PAREN]), delimited(multispace0, expr, multispace0), - tag(CLOSE_PAREN), - ), + tag(&[CLOSE_PAREN]), + )(input), // Parse dictionary literal: {key:value, key:value} - dict_literal, + Some(&OPEN_BRACE) => dict_literal(input), // Parse list literal: [value, value] - list_literal, - // Parse basic expressions - esi_function, - esi_variable, - integer, - string, - ))(input) + Some(&OPEN_SQ_BRACKET) => list_literal(input), + // Parse function call or variable: $func(...) or $(VAR) + Some(&DOLLAR) => alt((esi_function, esi_variable))(input), + // Parse integer literal (with optional leading minus) + Some(b'0'..=b'9') | Some(&HYPHEN) => integer(input), + // Parse string literal (single or triple quoted) + Some(&SINGLE_QUOTE) => string(input), + _ => Err(nom::Err::Error(Error::new( + input, + nom::error::ErrorKind::Alt, + ))), + } } /// Entry point for expression parsing From e3c7cac28e97db8ea295a4ed5af2e20ba5266330 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Tue, 3 Mar 2026 13:28:52 -0600 Subject: [PATCH 093/119] Refactor streaming parser with nom 8 with `Parser` trait - Updated tests in `streaming_behavior.rs` to utilize the `Parser` trait for improved clarity and consistency. - Changed instances of `tag` and `is_not` to use the `.parse()` method for better error handling with incomplete input. - Ensured that all relevant tests correctly assert the expected `Incomplete` results when parsing incomplete data. --- Cargo.lock | 11 +- esi/Cargo.toml | 2 +- esi/src/parser.rs | 767 ++++++++++++++++---------------- esi/tests/streaming_behavior.rs | 22 +- 4 files changed, 396 insertions(+), 406 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 669d6f4..832cf88 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -823,20 +823,13 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - [[package]] name = "nom" -version = "7.1.3" +version = "8.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" dependencies = [ "memchr", - "minimal-lexical", ] [[package]] diff --git a/esi/Cargo.toml b/esi/Cargo.toml index 28881c7..4f0e84c 100644 --- a/esi/Cargo.toml +++ b/esi/Cargo.toml @@ -14,7 +14,7 @@ fastly = "^0.11" log = "^0.4" regex = "1.11.1" html-escape = "0.2.13" -nom = "7.1.3" +nom = "8" bytes = "1.5" base64 = "0.22" percent-encoding = "2.3" diff --git a/esi/src/parser.rs b/esi/src/parser.rs index b50923d..fbc276f 100644 --- a/esi/src/parser.rs +++ b/esi/src/parser.rs @@ -9,11 +9,12 @@ use nom::bytes::complete::{tag, take_until, take_while, take_while1}; use nom::character::complete::multispace0; use nom::branch::alt; -use nom::combinator::{map, map_res, not, opt, peek, recognize}; +use nom::combinator::{not, opt, peek, recognize}; use nom::error::Error; use nom::multi::{fold_many0, many0, separated_list0}; -use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; +use nom::sequence::{delimited, preceded, separated_pair, terminated}; use nom::IResult; +use nom::Parser; use std::collections::HashMap; @@ -201,12 +202,11 @@ fn interpolated_text<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - map( - recognize(streaming_bytes::take_while1(|c| { - !is_open_bracket(c) && !is_dollar(c) - })), - |s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s))), - )(input) + recognize(streaming_bytes::take_while1(|c| { + !is_open_bracket(c) && !is_dollar(c) + })) + .map(|s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s)))) + .parse(input) } // Complete version for attribute value parsing - doesn't return Incomplete @@ -214,10 +214,9 @@ fn interpolated_text_complete<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - map( - recognize(take_while1(|c| !is_open_bracket(c) && !is_dollar(c))), - |s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s))), - )(input) + recognize(take_while1(|c| !is_open_bracket(c) && !is_dollar(c))) + .map(|s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s)))) + .parse(input) } /// Parses a string that may contain interpolated expressions like $(VAR) @@ -232,14 +231,16 @@ pub fn interpolated_content(input: &Bytes) -> IResult<&[u8], Vec, Error |i| { alt((interpolated_expression, |ii| { interpolated_text_complete(input, ii) - }))(i) + })) + .parse(i) }, Vec::new, |mut acc: Vec, item: ParseResult| { item.append_to(&mut acc); acc }, - )(input.as_ref()) + ) + .parse(input.as_ref()) } /// Zero-copy element parser - dispatches to text or tags @@ -253,7 +254,8 @@ fn element<'a>( alt(( |i| top_level_text(original, i), |i| tag_handler(original, i), - ))(input) + )) + .parse(input) } /// Text parser for top-level content - stops only at '<', not at '$()' @@ -262,10 +264,9 @@ fn top_level_text<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - map( - recognize(streaming_bytes::take_while1(|c| !is_open_bracket(c))), - |s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s))), - )(input) + recognize(streaming_bytes::take_while1(|c| !is_open_bracket(c))) + .map(|s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s)))) + .parse(input) } fn interpolated_element<'a>( @@ -277,12 +278,13 @@ fn interpolated_element<'a>( // (or we're at the start of content). If it's '<', skip interpolated_expression entirely. match input.first() { Some(&OPEN_BRACKET) => tag_handler(original, input), - Some(&DOLLAR) => alt((interpolated_expression, |i| tag_handler(original, i)))(input), + Some(&DOLLAR) => alt((interpolated_expression, |i| tag_handler(original, i))).parse(input), _ => alt(( |i| interpolated_text(original, i), interpolated_expression, |i| tag_handler(original, i), - ))(input), + )) + .parse(input), } } @@ -299,7 +301,8 @@ fn tag_content<'a>( item.append_to(&mut acc); acc }, - )(input) + ) + .parse(input) } /// Validates a variable name according to ESI spec: @@ -402,7 +405,7 @@ fn esi_assign<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - alt((esi_assign_short, |i| esi_assign_long(original, i)))(input) + alt((esi_assign_short, |i| esi_assign_long(original, i))).parse(input) } fn assign_attributes_short(mut attrs: HashMap) -> ParseResult { @@ -551,14 +554,13 @@ fn assign_long(attrs: &HashMap, mut content: Vec) -> Pa } fn esi_assign_short(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { - map( - delimited( - streaming_bytes::tag(TAG_ESI_ASSIGN_OPEN), - attributes, - preceded(streaming_char::multispace0, streaming_self_closing), - ), - assign_attributes_short, - )(input) + delimited( + streaming_bytes::tag(TAG_ESI_ASSIGN_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_self_closing), + ) + .map(assign_attributes_short) + .parse(input) } fn esi_assign_long<'a>( @@ -567,22 +569,21 @@ fn esi_assign_long<'a>( ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { // Per ESI spec, esi:assign cannot contain nested ESI tags - only text and expressions // Capture content first with take_until, then parse as complete - map( - tuple(( - delimited( - streaming_bytes::tag(TAG_ESI_ASSIGN_OPEN), - attributes, - preceded(streaming_char::multispace0, streaming_close_bracket), - ), - streaming_bytes::take_until(TAG_ESI_ASSIGN_CLOSE), - streaming_bytes::tag(TAG_ESI_ASSIGN_CLOSE), - )), - |(attrs, content, _)| { + ( + delimited( + streaming_bytes::tag(TAG_ESI_ASSIGN_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_close_bracket), + ), + streaming_bytes::take_until(TAG_ESI_ASSIGN_CLOSE), + streaming_bytes::tag(TAG_ESI_ASSIGN_CLOSE), + ) + .map(|(attrs, content, _)| { // Parse the captured content in complete mode (text + expressions only) let elements = parse_content_complete(original, content); assign_long(&attrs, elements) - }, - )(input) + }) + .parse(input) } // ============================================================================ @@ -602,7 +603,8 @@ fn parse_container_tag<'a>( streaming_bytes::tag(opening_tag), |i| tag_content(original, i), streaming_bytes::tag(closing_tag), - )(input)?; + ) + .parse(input)?; Ok(( input, @@ -641,9 +643,9 @@ fn esi_try<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - let (input, _) = streaming_bytes::tag(TAG_ESI_TRY_OPEN)(input)?; + let (input, _) = streaming_bytes::tag(TAG_ESI_TRY_OPEN).parse(input)?; let (input, v) = tag_content(original, input)?; - let (input, _) = streaming_bytes::tag(TAG_ESI_TRY_CLOSE)(input)?; + let (input, _) = streaming_bytes::tag(TAG_ESI_TRY_CLOSE).parse(input)?; let mut attempts = vec![]; let mut except = None; @@ -669,38 +671,36 @@ fn esi_otherwise<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - map( - delimited( - streaming_bytes::tag(TAG_ESI_OTHERWISE_OPEN), - |i| tag_content(original, i), - streaming_bytes::tag(TAG_ESI_OTHERWISE_CLOSE), - ), - |mut content| { - // Reuse content Vec — insert marker at front instead of creating a new Vec - content.insert(0, Element::Esi(Tag::Otherwise)); - ParseResult::Multiple(content) - }, - )(input) + delimited( + streaming_bytes::tag(TAG_ESI_OTHERWISE_OPEN), + |i| tag_content(original, i), + streaming_bytes::tag(TAG_ESI_OTHERWISE_CLOSE), + ) + .map(|mut content| { + // Reuse content Vec — insert marker at front instead of creating a new Vec + content.insert(0, Element::Esi(Tag::Otherwise)); + ParseResult::Multiple(content) + }) + .parse(input) } fn esi_when<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - map( - tuple(( - delimited( - streaming_bytes::tag(TAG_ESI_WHEN_OPEN), - attributes, - preceded( - streaming_char::multispace0, - alt((streaming_close_bracket, streaming_self_closing)), - ), + ( + delimited( + streaming_bytes::tag(TAG_ESI_WHEN_OPEN), + attributes, + preceded( + streaming_char::multispace0, + alt((streaming_close_bracket, streaming_self_closing)), ), - |i| tag_content(original, i), - streaming_bytes::tag(TAG_ESI_WHEN_CLOSE), - )), - |(mut attrs, content, _)| { + ), + |i| tag_content(original, i), + streaming_bytes::tag(TAG_ESI_WHEN_CLOSE), + ) + .map(|(mut attrs, content, _)| { let test = attrs.remove("test").unwrap_or_default(); let match_name = attrs.remove("matchname"); @@ -708,8 +708,8 @@ fn esi_when<'a>( let mut result = content; result.insert(0, Element::Esi(Tag::When { test, match_name })); ParseResult::Multiple(result) - }, - )(input) + }) + .parse(input) } /// Parse ... @@ -717,17 +717,16 @@ fn esi_foreach<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - map( - tuple(( - delimited( - streaming_bytes::tag(TAG_ESI_FOREACH_OPEN), - attributes, - preceded(streaming_char::multispace0, streaming_close_bracket), - ), - |i| tag_content(original, i), - streaming_bytes::tag(TAG_ESI_FOREACH_CLOSE), - )), - |(mut attrs, content, _)| { + ( + delimited( + streaming_bytes::tag(TAG_ESI_FOREACH_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_close_bracket), + ), + |i| tag_content(original, i), + streaming_bytes::tag(TAG_ESI_FOREACH_CLOSE), + ) + .map(|(mut attrs, content, _)| { let collection_str = attrs.remove("collection").unwrap_or_default(); let collection = parse_attr_as_expr_with_context(collection_str, true); let item = attrs.remove("item"); @@ -737,20 +736,19 @@ fn esi_foreach<'a>( item, content, })) - }, - )(input) + }) + .parse(input) } /// Parse fn esi_break(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { - map( - delimited( - streaming_bytes::tag(TAG_ESI_BREAK_OPEN), - streaming_char::multispace0, - streaming_self_closing, - ), - |_| ParseResult::Single(Element::Esi(Tag::Break)), - )(input) + delimited( + streaming_bytes::tag(TAG_ESI_BREAK_OPEN), + streaming_char::multispace0, + streaming_self_closing, + ) + .map(|_| ParseResult::Single(Element::Esi(Tag::Break))) + .parse(input) } /// Parse ... @@ -758,39 +756,37 @@ fn esi_function_tag<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - map( - tuple(( - delimited( - streaming_bytes::tag(TAG_ESI_FUNCTION_OPEN), - attributes, - preceded(streaming_char::multispace0, streaming_close_bracket), - ), - |i| tag_content(original, i), - streaming_bytes::tag(TAG_ESI_FUNCTION_CLOSE), - )), - |(mut attrs, body, _)| { + ( + delimited( + streaming_bytes::tag(TAG_ESI_FUNCTION_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_close_bracket), + ), + |i| tag_content(original, i), + streaming_bytes::tag(TAG_ESI_FUNCTION_CLOSE), + ) + .map(|(mut attrs, body, _)| { let name = attrs.remove("name").unwrap_or_default(); ParseResult::Single(Element::Esi(Tag::Function { name, body })) - }, - )(input) + }) + .parse(input) } /// Parse fn esi_return(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { - map( - delimited( - streaming_bytes::tag(TAG_ESI_RETURN_OPEN), - attributes, - preceded(streaming_char::multispace0, streaming_self_closing), - ), - |mut attrs| { - let value_str = attrs.remove("value").unwrap_or_default(); - let value = parse_attr_as_expr_with_context(value_str, false); + delimited( + streaming_bytes::tag(TAG_ESI_RETURN_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_self_closing), + ) + .map(|mut attrs| { + let value_str = attrs.remove("value").unwrap_or_default(); + let value = parse_attr_as_expr_with_context(value_str, false); - ParseResult::Single(Element::Esi(Tag::Return { value })) - }, - )(input) + ParseResult::Single(Element::Esi(Tag::Return { value })) + }) + .parse(input) } /// Zero-copy parser for ... @@ -798,9 +794,9 @@ fn esi_choose<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - let (input, _) = streaming_bytes::tag(TAG_ESI_CHOOSE_OPEN)(input)?; + let (input, _) = streaming_bytes::tag(TAG_ESI_CHOOSE_OPEN).parse(input)?; let (input, v) = tag_content(original, input)?; - let (input, _) = streaming_bytes::tag(TAG_ESI_CHOOSE_CLOSE)(input)?; + let (input, _) = streaming_bytes::tag(TAG_ESI_CHOOSE_CLOSE).parse(input)?; let mut when_branches = vec![]; let mut otherwise_events = Vec::new(); @@ -874,7 +870,7 @@ fn esi_vars<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - alt((esi_vars_short, |i| esi_vars_long(original, i)))(input) + alt((esi_vars_short, |i| esi_vars_long(original, i))).parse(input) } fn parse_vars_attributes(mut attrs: HashMap) -> Result { @@ -891,14 +887,13 @@ fn parse_vars_attributes(mut attrs: HashMap) -> Result IResult<&[u8], ParseResult, Error<&[u8]>> { - map_res( - delimited( - streaming_bytes::tag(TAG_ESI_VARS_OPEN), - attributes, - preceded(streaming_char::multispace0, streaming_self_closing), // Short form must be self-closing per ESI spec - ), - parse_vars_attributes, - )(input) + delimited( + streaming_bytes::tag(TAG_ESI_VARS_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_self_closing), // Short form must be self-closing per ESI spec + ) + .map_res(parse_vars_attributes) + .parse(input) } /// Parse content for tags that don't support nested ESI (text + expressions + HTML only) @@ -911,10 +906,9 @@ fn parse_content_complete(original: &Bytes, content: &[u8]) -> Vec { original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - map( - take_while1(|c| !is_dollar(c) && !is_open_bracket(c)), - |s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s))), - )(input) + take_while1(|c| !is_dollar(c) && !is_open_bracket(c)) + .map(|s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s)))) + .parse(input) } // HTML tag in complete mode - any tag that's NOT an ESI tag @@ -923,14 +917,15 @@ fn parse_content_complete(original: &Bytes, content: &[u8]) -> Vec { input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { // Check that this is NOT an esi: tag - let (_, _) = peek(tuple((open_bracket, not(tag(ESI_PREFIX)))))(input)?; + let (_, _) = peek((open_bracket, not(tag(ESI_PREFIX)))).parse(input)?; // Parse the HTML tag (simplified - just capture until >) - let (rest, html) = recognize(tuple(( + let (rest, html) = recognize(( open_bracket, take_until(&[CLOSE_BRACKET][..]), close_bracket, - )))(input)?; + )) + .parse(input)?; Ok(( rest, @@ -978,30 +973,29 @@ fn esi_vars_long<'a>( input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { // esi:vars supports nested ESI tags (like esi:assign) per common usage patterns - let (input, _) = streaming_bytes::tag(TAG_ESI_VARS_OPEN_COMPLETE)(input)?; + let (input, _) = streaming_bytes::tag(TAG_ESI_VARS_OPEN_COMPLETE).parse(input)?; let (input, elements) = tag_content(original, input)?; - let (input, _) = streaming_bytes::tag(TAG_ESI_VARS_CLOSE)(input)?; + let (input, _) = streaming_bytes::tag(TAG_ESI_VARS_CLOSE).parse(input)?; Ok((input, ParseResult::Multiple(elements))) } fn esi_comment(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { - map( - delimited( - streaming_bytes::tag(TAG_ESI_COMMENT_OPEN), - attributes, - preceded(streaming_char::multispace0, streaming_self_closing), // ESI comment must be self-closing per ESI spec - ), - |_| ParseResult::Empty, - )(input) + delimited( + streaming_bytes::tag(TAG_ESI_COMMENT_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_self_closing), // ESI comment must be self-closing per ESI spec + ) + .map(|_| ParseResult::Empty) + .parse(input) } /// Zero-copy esi:remove parser /// Per ESI spec, esi:remove content is discarded - no nested ESI processing needed fn esi_remove(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { - let (input, _) = streaming_bytes::tag(TAG_ESI_REMOVE_OPEN)(input)?; - let (input, _) = streaming_bytes::take_until(TAG_ESI_REMOVE_CLOSE)(input)?; - let (input, _) = streaming_bytes::tag(TAG_ESI_REMOVE_CLOSE)(input)?; + let (input, _) = streaming_bytes::tag(TAG_ESI_REMOVE_OPEN).parse(input)?; + let (input, _) = streaming_bytes::take_until(TAG_ESI_REMOVE_CLOSE).parse(input)?; + let (input, _) = streaming_bytes::tag(TAG_ESI_REMOVE_CLOSE).parse(input)?; Ok((input, ParseResult::Empty)) } @@ -1009,17 +1003,16 @@ fn esi_text<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - map( - delimited( - streaming_bytes::tag(TAG_ESI_TEXT_OPEN), - streaming_bytes::take_until(TAG_ESI_TEXT_CLOSE), - streaming_bytes::tag(TAG_ESI_TEXT_CLOSE), - ), - |v| ParseResult::Single(Element::Content(slice_as_bytes(original, v))), - )(input) + delimited( + streaming_bytes::tag(TAG_ESI_TEXT_OPEN), + streaming_bytes::take_until(TAG_ESI_TEXT_CLOSE), + streaming_bytes::tag(TAG_ESI_TEXT_CLOSE), + ) + .map(|v| ParseResult::Single(Element::Content(slice_as_bytes(original, v)))) + .parse(input) } fn esi_include(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { - alt((esi_include_self_closing, esi_include_with_params))(input) + alt((esi_include_self_closing, esi_include_with_params)).parse(input) } /// Helper to extract include attributes from the HashMap @@ -1092,105 +1085,100 @@ fn extract_include_attrs( } fn esi_include_self_closing(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { - map( - delimited( - streaming_bytes::tag(TAG_ESI_INCLUDE_OPEN), - attributes, - preceded(streaming_char::multispace0, streaming_self_closing), - ), - |attrs| { - let attrs = extract_include_attrs(attrs, Vec::new()); + delimited( + streaming_bytes::tag(TAG_ESI_INCLUDE_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_self_closing), + ) + .map(|attrs| { + let attrs = extract_include_attrs(attrs, Vec::new()); - ParseResult::Single(Element::Esi(Tag::Include { attrs })) - }, - )(input) + ParseResult::Single(Element::Esi(Tag::Include { attrs })) + }) + .parse(input) } fn esi_include_with_params(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { - map( - tuple(( - delimited( - streaming_bytes::tag(TAG_ESI_INCLUDE_OPEN), - attributes, - preceded(streaming_char::multispace0, streaming_close_bracket), - ), - many0(preceded(streaming_char::multispace0, esi_param)), - preceded( - streaming_char::multispace0, - streaming_bytes::tag(TAG_ESI_INCLUDE_CLOSE), - ), - )), - |(attrs, params, _)| { + ( + delimited( + streaming_bytes::tag(TAG_ESI_INCLUDE_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_close_bracket), + ), + many0(preceded(streaming_char::multispace0, esi_param)), + preceded( + streaming_char::multispace0, + streaming_bytes::tag(TAG_ESI_INCLUDE_CLOSE), + ), + ) + .map(|(attrs, params, _)| { let attrs = extract_include_attrs(attrs, params); ParseResult::Single(Element::Esi(Tag::Include { attrs })) - }, - )(input) + }) + .parse(input) } /// Parse tag - similar to include but always evaluates as ESI /// Note: eval does NOT support alt attribute - use try/except instead fn esi_eval(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { - alt((esi_eval_self_closing, esi_eval_with_params))(input) + alt((esi_eval_self_closing, esi_eval_with_params)).parse(input) } fn esi_eval_self_closing(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { - map( - delimited( - streaming_bytes::tag(TAG_ESI_EVAL_OPEN), - attributes, - preceded(streaming_char::multispace0, streaming_self_closing), - ), - |attrs| { - let mut attrs = extract_include_attrs(attrs, Vec::new()); - // Eval does not support alt - clear it if somehow present - attrs.alt = None; + delimited( + streaming_bytes::tag(TAG_ESI_EVAL_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_self_closing), + ) + .map(|attrs| { + let mut attrs = extract_include_attrs(attrs, Vec::new()); + // Eval does not support alt - clear it if somehow present + attrs.alt = None; - ParseResult::Single(Element::Esi(Tag::Eval { attrs })) - }, - )(input) + ParseResult::Single(Element::Esi(Tag::Eval { attrs })) + }) + .parse(input) } fn esi_eval_with_params(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { - map( - tuple(( - delimited( - streaming_bytes::tag(TAG_ESI_EVAL_OPEN), - attributes, - preceded(streaming_char::multispace0, streaming_close_bracket), - ), - many0(preceded(streaming_char::multispace0, esi_param)), - preceded( - streaming_char::multispace0, - streaming_bytes::tag(TAG_ESI_EVAL_CLOSE), - ), - )), - |(attrs, params, _)| { + ( + delimited( + streaming_bytes::tag(TAG_ESI_EVAL_OPEN), + attributes, + preceded(streaming_char::multispace0, streaming_close_bracket), + ), + many0(preceded(streaming_char::multispace0, esi_param)), + preceded( + streaming_char::multispace0, + streaming_bytes::tag(TAG_ESI_EVAL_CLOSE), + ), + ) + .map(|(attrs, params, _)| { let mut attrs = extract_include_attrs(attrs, params); // Eval does not support alt - clear it if somehow present attrs.alt = None; ParseResult::Single(Element::Esi(Tag::Eval { attrs })) - }, - )(input) + }) + .parse(input) } fn esi_param(input: &[u8]) -> IResult<&[u8], (String, Expr), Error<&[u8]>> { - map( - delimited( - streaming_bytes::tag(TAG_ESI_PARAM_OPEN), - attributes, - preceded( - streaming_char::multispace0, - alt((streaming_close_bracket, streaming_self_closing)), - ), + delimited( + streaming_bytes::tag(TAG_ESI_PARAM_OPEN), + attributes, + preceded( + streaming_char::multispace0, + alt((streaming_close_bracket, streaming_self_closing)), ), - |mut attrs| { - let name = attrs.remove("name").unwrap_or_default(); - let value = parse_attr_as_expr(attrs.remove("value").unwrap_or_default()); - (name, value) - }, - )(input) + ) + .map(|mut attrs| { + let name = attrs.remove("name").unwrap_or_default(); + let value = parse_attr_as_expr(attrs.remove("value").unwrap_or_default()); + (name, value) + }) + .parse(input) } fn attributes(input: &[u8]) -> IResult<&[u8], HashMap, Error<&[u8]>> { @@ -1205,7 +1193,8 @@ fn attributes(input: &[u8]) -> IResult<&[u8], HashMap, Error<&[u acc.insert(bytes_to_string(k), bytes_to_string(v)); acc }, - )(input) + ) + .parse(input) } fn htmlstring(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { @@ -1220,7 +1209,8 @@ fn htmlstring(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { streaming_bytes::take_while(|c| !is_single_quote(c)), single_quote, ), - ))(input) + )) + .parse(input) } // ============================================================================ @@ -1229,43 +1219,43 @@ fn htmlstring(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { /// Helper to find and consume the closing '>' character #[inline] fn streaming_close_bracket(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { - streaming_bytes::tag(&[CLOSE_BRACKET])(input) + streaming_bytes::tag(&[CLOSE_BRACKET] as &[u8]).parse(input) } /// Helper to find and consume the closing '>' character #[inline] fn close_bracket(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { - tag(&[CLOSE_BRACKET])(input) + tag(&[CLOSE_BRACKET] as &[u8]).parse(input) } /// Helper to find and consume the closing self-closing tag characters '/>' #[inline] fn streaming_self_closing(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { - streaming_bytes::tag(TAG_SELF_CLOSE)(input) + streaming_bytes::tag(TAG_SELF_CLOSE).parse(input) } /// Helper to find and consume the opening '<' character #[inline] fn open_bracket(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { - tag(&[OPEN_BRACKET])(input) + tag(&[OPEN_BRACKET] as &[u8]).parse(input) } /// Helper to find and consume the opening '<' character #[inline] fn streaming_open_bracket(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { - streaming_bytes::tag(&[OPEN_BRACKET])(input) + streaming_bytes::tag(&[OPEN_BRACKET] as &[u8]).parse(input) } /// Helper to find and consume the closing double quote character #[inline] fn double_quote(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { - streaming_bytes::tag(&[DOUBLE_QUOTE])(input) + streaming_bytes::tag(&[DOUBLE_QUOTE] as &[u8]).parse(input) } /// Helper to find and consume the closing single quote character #[inline] fn single_quote(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { - streaming_bytes::tag(&[SINGLE_QUOTE])(input) + streaming_bytes::tag(&[SINGLE_QUOTE] as &[u8]).parse(input) } /// Check if byte is an opening bracket '<' @@ -1305,7 +1295,8 @@ fn tag_name(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { recognize(nom::sequence::pair( streaming_bytes::take_while_m_n(1, 1, is_tag_start), // first letter streaming_bytes::take_while(is_tag_cont), // rest of name - ))(input) + )) + .parse(input) } /// Parse a complete opening tag (streaming gate) @@ -1320,7 +1311,7 @@ fn esi_opening_tag(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8]), Error<&[u8]>> let (rest, name) = tag_name(rest)?; // Parse attributes - consume everything up to '>' - let (rest, _) = streaming_bytes::take_till(is_close_bracket)(rest)?; + let (rest, _) = streaming_bytes::take_till(is_close_bracket).parse(rest)?; // Must have > to be complete let (rest, _) = streaming_close_bracket(rest)?; @@ -1383,7 +1374,8 @@ fn tag_handler<'a>( } } }, - ))(input) + )) + .parse(input) } /// Parse HTML comment - input starts at +/// +/// +/// ``` +/// +/// Expected output for $(list), $(copy1), $(copy2): all `1,2,9` +#[test] +fn test_list_reference_semantics() -> Result<(), Error> { + let input = r#" + + + +$(list) +$(copy1) +$(copy2)"#; + + let dispatcher = + |_req: Request, _maxwait: Option| -> esi::Result { + unreachable!("no fragments in this test") + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new(None, Configuration::default()); + processor.process_stream(reader, &mut output, Some(&dispatcher), None)?; + + let result = String::from_utf8(output).unwrap(); + // All three variables refer to the same list — mutation through copy1 is + // visible in list and copy2. + assert_eq!( + result.trim(), + "1,2,9\n1,2,9\n1,2,9", + "Lists should be assigned by reference, not copied" + ); + Ok(()) +} + +/// Spec example: using foreach to iterate a dict and build a real copy, +/// then mutating the copy — original should be unaffected. +/// +/// ```esi +/// +/// +/// +/// +/// +/// ``` +/// +/// Expected: dict unchanged, copy has key 2 = "Second" +#[test] +fn test_dict_copy_by_iteration() -> Result<(), Error> { + let input = r#" + + + + +$(dict) +$(copy)"#; + + let dispatcher = + |_req: Request, _maxwait: Option| -> esi::Result { + unreachable!("no fragments in this test") + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new(None, Configuration::default()); + processor.process_stream(reader, &mut output, Some(&dispatcher), None)?; + + let result = String::from_utf8(output).unwrap(); + let lines: Vec<&str> = result.trim().lines().collect(); + + // dict should be unchanged: {1: 'one', 2: 'two', 3: 'three'} + // dict_to_string sorts by key and formats as k=v&k=v + assert_eq!(lines[0], "1=one&2=two&3=three", "Original dict should be unchanged"); + + // copy should have key 2 replaced: {1: 'one', 2: 'Second', 3: 'three'} + assert_eq!(lines[1], "1=one&2=Second&3=three", "Copy should have key 2 = 'Second'"); + + Ok(()) +} + +/// Dict reference semantics: assigning a dict to another name creates an alias. +/// Mutating through the alias is visible from the original. +#[test] +fn test_dict_reference_semantics() -> Result<(), Error> { + let input = r#" + + +$(orig) +$(alias)"#; + + let dispatcher = + |_req: Request, _maxwait: Option| -> esi::Result { + unreachable!("no fragments in this test") + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new(None, Configuration::default()); + processor.process_stream(reader, &mut output, Some(&dispatcher), None)?; + + let result = String::from_utf8(output).unwrap(); + // Both should reflect the mutation + assert_eq!( + result.trim(), + "1=one&2=TWO\n1=one&2=TWO", + "Dicts should be assigned by reference, not copied" + ); + Ok(()) +} + +/// Mutating the original list is visible through the alias. +#[test] +fn test_list_mutation_visible_through_alias() -> Result<(), Error> { + let input = r#" + + +$(b{0})"#; + + let dispatcher = + |_req: Request, _maxwait: Option| -> esi::Result { + unreachable!("no fragments in this test") + }; + + let reader = std::io::BufReader::new(std::io::Cursor::new(input.as_bytes())); + let mut output = Vec::new(); + let mut processor = Processor::new(None, Configuration::default()); + processor.process_stream(reader, &mut output, Some(&dispatcher), None)?; + + let result = String::from_utf8(output).unwrap(); + assert_eq!( + result.trim(), + "99", + "Mutation through original should be visible via alias" + ); + Ok(()) +} From a7016413b1d45fff27a74ca0382b4ab32ed0ef51 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Wed, 4 Mar 2026 15:26:38 -0600 Subject: [PATCH 100/119] refactor: refactor: capacity allocation in various data structures, rename and cleanup --- README.md | 14 +++++++------- esi/src/config.rs | 2 +- esi/src/expression.rs | 8 ++++---- esi/src/functions.rs | 6 +++--- esi/src/lib.rs | 4 ++-- esi/src/parser.rs | 33 +++++++++++++++++++++------------ esi/src/parser_types.rs | 4 ++-- esi/tests/esi-tests.rs | 10 ++++++++-- 8 files changed, 48 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index c600eee..6e7fac0 100644 --- a/README.md +++ b/README.md @@ -226,7 +226,7 @@ The following variables are available in ESI expressions: let config = esi::Configuration::default() .with_escaped(true) // unescape HTML entities in URLs (default: true) .with_chunk_size(32768) // streaming read buffer, in bytes (default: 16384) - .with_max_function_recursion_depth(10) // max depth for user-defined function calls (default: 5) + .with_function_recursion_depth(10) // max depth for user-defined function calls (default: 5) .with_caching(esi::cache::CacheConfig { is_rendered_cacheable: true, rendered_cache_control: true, @@ -237,12 +237,12 @@ let config = esi::Configuration::default() }); ``` -| Field | Builder method | Default | Description | -| -------------------------- | ------------------------------------------ | --------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| `is_escaped_content` | `with_escaped(bool)` | `true` | Unescape HTML entities in URLs. Set to `false` for non-HTML templates (e.g. JSON). | -| `chunk_size` | `with_chunk_size(usize)` | `16384` | Size (bytes) of the read buffer used when streaming ESI input. Larger values may improve throughput; smaller values reduce memory. | -| `function_recursion_depth` | `with_max_function_recursion_depth(usize)` | `5` | Maximum call-stack depth for user-defined ESI functions. | -| `cache` | `with_caching(CacheConfig)` | see below | Cache settings for rendered output and included fragments. | +| Field | Builder method | Default | Description | +| -------------------------- | -------------------------------------- | --------- | ---------------------------------------------------------------------------------------------------------------------------------- | +| `is_escaped_content` | `with_escaped(bool)` | `true` | Unescape HTML entities in URLs. Set to `false` for non-HTML templates (e.g. JSON). | +| `chunk_size` | `with_chunk_size(usize)` | `16384` | Size (bytes) of the read buffer used when streaming ESI input. Larger values may improve throughput; smaller values reduce memory. | +| `function_recursion_depth` | `with_function_recursion_depth(usize)` | `5` | Maximum call-stack depth for user-defined ESI functions. | +| `cache` | `with_caching(CacheConfig)` | see below | Cache settings for rendered output and included fragments. | **`CacheConfig` fields:** diff --git a/esi/src/config.rs b/esi/src/config.rs index c98c48d..0ddd717 100644 --- a/esi/src/config.rs +++ b/esi/src/config.rs @@ -52,7 +52,7 @@ impl Configuration { } /// Configure maximum recursion depth for user-defined function calls - pub const fn with_max_function_recursion_depth(mut self, depth: usize) -> Self { + pub const fn with_function_recursion_depth(mut self, depth: usize) -> Self { self.function_recursion_depth = depth; self } diff --git a/esi/src/expression.rs b/esi/src/expression.rs index 2475af1..5b8adfa 100644 --- a/esi/src/expression.rs +++ b/esi/src/expression.rs @@ -83,7 +83,7 @@ pub fn eval_expr(expr: &Expr, ctx: &mut EvalContext) -> Result { eval_comparison(&left_val, &right_val, operator, ctx) } Expr::Call(func_name, args) => { - let mut values = Vec::new(); + let mut values = Vec::with_capacity(args.len()); for arg in args { values.push(eval_expr(arg, ctx)?); } @@ -94,7 +94,7 @@ pub fn eval_expr(expr: &Expr, ctx: &mut EvalContext) -> Result { Ok(Value::Boolean(!inner_value.to_bool())) } Expr::DictLiteral(pairs) => { - let mut map = HashMap::new(); + let mut map = HashMap::with_capacity(pairs.len()); for (key_expr, val_expr) in pairs { let key = eval_expr(key_expr, ctx)?; let val = eval_expr(val_expr, ctx)?; @@ -103,7 +103,7 @@ pub fn eval_expr(expr: &Expr, ctx: &mut EvalContext) -> Result { Ok(Value::new_dict(map)) } Expr::ListLiteral(items) => { - let mut values = Vec::new(); + let mut values = Vec::with_capacity(items.len()); for item_expr in items { values.push(eval_expr(item_expr, ctx)?); } @@ -558,7 +558,7 @@ impl EvalContext { if params.is_empty() { Value::Null } else { - let mut dict = HashMap::new(); + let mut dict = HashMap::with_capacity(params.len()); for (key, values) in params { let value = match values.len() { 0 => Value::Null, diff --git a/esi/src/functions.rs b/esi/src/functions.rs index f1c87fb..f5d981d 100644 --- a/esi/src/functions.rs +++ b/esi/src/functions.rs @@ -778,13 +778,13 @@ pub fn string_split(args: &[Value]) -> Result { let parts: Vec = if sep.is_empty() { // Empty separator: split into individual bytes (ESI is byte/ASCII-oriented) - let mut out = Vec::new(); let limit = max_splits.map(|n| n as usize); let bytes = source.as_bytes(); + let mut out = Vec::with_capacity(limit.unwrap_or(bytes.len())); - for (splits_done, (i, &b)) in bytes.iter().enumerate().enumerate() { + for (i, &b) in bytes.iter().enumerate() { if let Some(limit) = limit { - if splits_done >= limit { + if i >= limit { // Remaining bytes as one final element out.push(source[i..].to_string()); return Ok(Value::new_list( diff --git a/esi/src/lib.rs b/esi/src/lib.rs index 0ae935c..9bf3916 100644 --- a/esi/src/lib.rs +++ b/esi/src/lib.rs @@ -564,7 +564,7 @@ impl Processor { /// - Dispatches includes immediately; waits for them later in document order /// - Uses `select()` to harvest in-flight includes while preserving output order /// - /// For Fastly `Response` bodies, prefer [`process_response`], which wires up + /// For Fastly `Response` bodies, prefer `process_response`, which wires up /// cache headers and response metadata for you. /// /// # Arguments @@ -958,7 +958,7 @@ impl Processor { // `buf[i]` is `None` while the slot is waiting for a response, // `Some(bytes)` once it is ready. Try-block includes use the SAME // buf slots as bare includes — no separate content_slots system. - let mut buf: Vec> = Vec::new(); + let mut buf: Vec> = Vec::with_capacity(self.queue.len()); let mut next_out: usize = 0; // RequestKey → FIFO queue of SlotEntry for all in-flight requests. diff --git a/esi/src/parser.rs b/esi/src/parser.rs index 644fa1f..5ac3dfc 100644 --- a/esi/src/parser.rs +++ b/esi/src/parser.rs @@ -203,11 +203,9 @@ fn interpolated_text<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - recognize(streaming_bytes::take_while1(|c| { - !is_open_bracket(c) && !is_dollar(c) - })) - .map(|s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s)))) - .parse(input) + streaming_bytes::take_while1(|c| !is_open_bracket(c) && !is_dollar(c)) + .map(|s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s)))) + .parse(input) } // Complete version for attribute value parsing - doesn't return Incomplete @@ -215,7 +213,7 @@ fn interpolated_text_complete<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - recognize(take_while1(|c| !is_open_bracket(c) && !is_dollar(c))) + take_while1(|c| !is_open_bracket(c) && !is_dollar(c)) .map(|s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s)))) .parse(input) } @@ -235,7 +233,7 @@ pub fn interpolated_content(input: &Bytes) -> IResult<&[u8], Vec, Error })) .parse(i) }, - Vec::new, + || Vec::with_capacity(4), |mut acc: Vec, item: ParseResult| { item.append_to(&mut acc); acc @@ -265,7 +263,7 @@ fn top_level_text<'a>( original: &Bytes, input: &'a [u8], ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - recognize(streaming_bytes::take_while1(|c| !is_open_bracket(c))) + streaming_bytes::take_while1(|c| !is_open_bracket(c)) .map(|s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s)))) .parse(input) } @@ -632,7 +630,10 @@ fn esi_attempt<'a>( ) } -// Zero-copy version used by both esi_tag and esi_tag_old (via parse_interpolated) +/// Parse which contains multiple and an optional +/// +/// Per ESI spec, can contain multiple blocks and at most one block. +/// We parse the entire content of and then separate out the attempts and except blocks to construct the Try tag. fn esi_try<'a>( original: &Bytes, input: &'a [u8], @@ -641,7 +642,7 @@ fn esi_try<'a>( let (input, v) = tag_content(original, input)?; let (input, _) = streaming_bytes::tag(TAG_ESI_TRY_CLOSE).parse(input)?; - let mut attempts = vec![]; + let mut attempts = Vec::with_capacity(v.len()); let mut except = None; for element in v { match element { @@ -746,6 +747,11 @@ fn esi_break(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { } /// Parse ... +/// +/// Per ESI spec, the content of is treated as a literal string and not parsed for nested tags or expressions. +/// However, we still need to capture the content as a Bytes slice for runtime evaluation. +/// We use tag_content to capture the raw content bytes without parsing nested tags, +/// and then construct the Function tag with the name and raw body. fn esi_function_tag<'a>( original: &Bytes, input: &'a [u8], @@ -783,7 +789,10 @@ fn esi_return(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { .parse(input) } -/// Zero-copy parser for ... +/// Parse which contains multiple and an optional +/// +/// Per ESI spec, can contain multiple blocks and at most one block. +/// We parse the entire content of and then separate out the when branches and otherwise block to construct the Choose tag. fn esi_choose<'a>( original: &Bytes, input: &'a [u8], @@ -792,7 +801,7 @@ fn esi_choose<'a>( let (input, v) = tag_content(original, input)?; let (input, _) = streaming_bytes::tag(TAG_ESI_CHOOSE_CLOSE).parse(input)?; - let mut when_branches = vec![]; + let mut when_branches = Vec::with_capacity(v.len()); let mut otherwise_events = Vec::new(); let mut current_when: Option = None; let mut in_otherwise = false; diff --git a/esi/src/parser_types.rs b/esi/src/parser_types.rs index 4d4bf06..9706a9b 100644 --- a/esi/src/parser_types.rs +++ b/esi/src/parser_types.rs @@ -119,8 +119,8 @@ pub enum Element { /// /// Produced by the expression parser for attribute values, `esi:vars`, /// `esi:when` test conditions, and `esi:assign` bodies. Evaluated at -/// runtime by [`eval_expr`](crate::expression::eval_expr) to produce -/// a [`Value`](crate::expression::Value). +/// runtime by `eval_expr` to produce +/// a `Value`. #[derive(Debug, Clone, PartialEq)] pub enum Expr { /// Integer literal (e.g. `42`, `-1`). diff --git a/esi/tests/esi-tests.rs b/esi/tests/esi-tests.rs index c6a5530..4274fd8 100644 --- a/esi/tests/esi-tests.rs +++ b/esi/tests/esi-tests.rs @@ -2322,10 +2322,16 @@ $(copy)
"#; // dict should be unchanged: {1: 'one', 2: 'two', 3: 'three'} // dict_to_string sorts by key and formats as k=v&k=v - assert_eq!(lines[0], "1=one&2=two&3=three", "Original dict should be unchanged"); + assert_eq!( + lines[0], "1=one&2=two&3=three", + "Original dict should be unchanged" + ); // copy should have key 2 replaced: {1: 'one', 2: 'Second', 3: 'three'} - assert_eq!(lines[1], "1=one&2=Second&3=three", "Copy should have key 2 = 'Second'"); + assert_eq!( + lines[1], "1=one&2=Second&3=three", + "Copy should have key 2 = 'Second'" + ); Ok(()) } From f83cfe996d546cb0896116acc9b64c8daf9cf894 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Wed, 4 Mar 2026 17:55:59 -0600 Subject: [PATCH 101/119] refactor(parser): simplify parse_content_complete by removing duplicated parsers --- esi/src/parser.rs | 92 ++++++++++------------------------------------- 1 file changed, 19 insertions(+), 73 deletions(-) diff --git a/esi/src/parser.rs b/esi/src/parser.rs index 5ac3dfc..7f4b9d6 100644 --- a/esi/src/parser.rs +++ b/esi/src/parser.rs @@ -899,43 +899,11 @@ fn esi_vars_short(input: &[u8]) -> IResult<&[u8], ParseResult, Error<&[u8]>> { .parse(input) } -/// Parse content for tags that don't support nested ESI (text + expressions + HTML only) +/// Parse content for tags that don't support nested ESI (text + expressions only) /// Uses COMPLETE mode - input must be captured entirely before calling this -/// Parses: text, expressions ($...), and HTML tags -/// Does NOT parse: nested ESI tags (treated as literal text) +/// Parses: text and expressions ($...) +/// Does NOT parse: nested ESI tags or HTML tags (treated as literal text) fn parse_content_complete(original: &Bytes, content: &[u8]) -> Vec { - // Text in complete mode - stops at $ or < for expression/tag parsing - fn text_complete<'a>( - original: &Bytes, - input: &'a [u8], - ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - take_while1(|c| !is_dollar(c) && !is_open_bracket(c)) - .map(|s: &[u8]| ParseResult::Single(Element::Content(slice_as_bytes(original, s)))) - .parse(input) - } - - // HTML tag in complete mode - any tag that's NOT an ESI tag - fn html_tag_complete<'a>( - original: &Bytes, - input: &'a [u8], - ) -> IResult<&'a [u8], ParseResult, Error<&'a [u8]>> { - // Check that this is NOT an esi: tag - let (_, _) = peek((open_bracket, not(tag(ESI_PREFIX)))).parse(input)?; - - // Parse the HTML tag (simplified - just capture until >) - let (rest, html) = recognize(( - open_bracket, - take_until(&[CLOSE_BRACKET][..]), - close_bracket, - )) - .parse(input)?; - - Ok(( - rest, - ParseResult::Single(Element::Html(slice_as_bytes(original, html))), - )) - } - // Parse content using complete parsers let mut elements = Vec::new(); let mut remaining = content; @@ -948,22 +916,15 @@ fn parse_content_complete(original: &Bytes, content: &[u8]) -> Vec { continue; } - // Try HTML tag (starts with < but NOT IResult<&[u8], &[u8], Error<&[u8]>> streaming_bytes::tag(&[CLOSE_BRACKET] as &[u8]).parse(input) } -/// Helper to find and consume the closing '>' character -#[inline] -fn close_bracket(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { - tag(&[CLOSE_BRACKET] as &[u8]).parse(input) -} - -/// Helper to find and consume the closing self-closing tag characters '/>' +/// Helper to find and consume the closing self-closing tag characters '/> #[inline] fn streaming_self_closing(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { streaming_bytes::tag(TAG_SELF_CLOSE).parse(input) } -/// Helper to find and consume the opening '<' character -#[inline] -fn open_bracket(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { - tag(&[OPEN_BRACKET] as &[u8]).parse(input) -} - /// Helper to find and consume the opening '<' character #[inline] fn streaming_open_bracket(input: &[u8]) -> IResult<&[u8], &[u8], Error<&[u8]>> { @@ -1749,29 +1698,26 @@ fn list_literal(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { /// Parse primary expressions (highest precedence atoms) /// Handles: variables, functions, literals, grouped expressions +/// Extends interpolated_expression with grouped expressions and negative integers fn primary_expr(input: &[u8]) -> IResult<&[u8], Expr, Error<&[u8]>> { match input.first() { - // Parse grouped expression: (expr) + // Parse grouped expression: (expr) — only valid in expression context, not interpolated content Some(&OPEN_PAREN) => delimited( tag(&[OPEN_PAREN] as &[u8]), delimited(multispace0, expr, multispace0), tag(&[CLOSE_PAREN] as &[u8]), ) .parse(input), - // Parse dictionary literal: {key:value, key:value} - Some(&OPEN_BRACE) => dict_literal(input), - // Parse list literal: [value, value] - Some(&OPEN_SQ_BRACKET) => list_literal(input), - // Parse function call or variable: $func(...) or $(VAR) - Some(&DOLLAR) => alt((esi_function, esi_variable)).parse(input), - // Parse integer literal (with optional leading minus) - Some(b'0'..=b'9' | &HYPHEN) => integer(input), - // Parse string literal (single or triple quoted) - Some(&SINGLE_QUOTE) => string(input), - _ => Err(nom::Err::Error(Error::new( - input, - nom::error::ErrorKind::Alt, - ))), + // Parse negative integer — only valid in expression context + Some(&HYPHEN) => integer(input), + // Delegate shared cases to interpolated_expression's dispatch + _ => { + let (rest, result) = interpolated_expression(input)?; + match result { + ParseResult::Single(Element::Expr(expr)) => Ok((rest, expr)), + _ => unreachable!("interpolated_expression always returns Single(Expr)"), + } + } } } From 880b285a1cb2dd4bd7794747813cf0b2c0fb4701 Mon Sep 17 00:00:00 2001 From: Vadim Getmanshchuk Date: Wed, 4 Mar 2026 22:45:04 -0600 Subject: [PATCH 102/119] Enhance ESI parsing and configuration - Clarified some documentation mistakes in README - Improved ESI expression evaluation by optimizing null value handling. - Refactored caching logic to utilize `Cow` for better performance and reduced allocations. - Enhanced error handling and logging in cache configuration and request processing. - Updated various functions to use zero-copy techniques for string manipulation, improving efficiency. - Cleaned up code formatting and comments for better readability and maintainability. --- README.md | 20 +-- esi/src/cache.rs | 21 ++- esi/src/element_handler.rs | 12 +- esi/src/expression.rs | 180 +++++++++++----------- esi/src/functions.rs | 143 ++++++++++------- esi/src/lib.rs | 28 ++-- esi/src/literals.rs | 2 +- esi/src/parser.rs | 25 +-- esi/tests/parser.rs | 85 ++++++++++ examples/esi_example_variants/src/main.rs | 3 +- 10 files changed, 316 insertions(+), 203 deletions(-) diff --git a/README.md b/README.md index 6e7fac0..abdabce 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,7 @@ Both `` and `` support the following attributes: **Caching:** - `ttl="duration"` - Cache time-to-live (e.g., `"120m"`, `"1h"`, `"2d"`, `"0s"` to disable) -- `no-store="true"` - Bypass cache entirely +- `no-store="on|off"` - Enable/disable cache bypass (`on` bypasses cache, `off` leaves caching enabled) **Request Configuration:** @@ -168,9 +168,9 @@ These functions modify the HTTP response sent to the client: ```html $set_response_code(404, 'Page not found') ``` -- `$set_redirect(url [, code])` - Set HTTP redirect (default 302) +- `$set_redirect(url)` - Set HTTP redirect (302 Moved Temporarily) ```html - $set_redirect('https://example.com/new-location') $set_redirect('https://example.com/moved', 301) + $set_redirect('https://example.com/new-location') $set_redirect('https://example.com/moved' ``` **Diagnostic:** @@ -226,7 +226,7 @@ The following variables are available in ESI expressions: let config = esi::Configuration::default() .with_escaped(true) // unescape HTML entities in URLs (default: true) .with_chunk_size(32768) // streaming read buffer, in bytes (default: 16384) - .with_function_recursion_depth(10) // max depth for user-defined function calls (default: 5) + .with_function_recursion_depth(10) // max depth for user-defined function calls (default: 5) .with_caching(esi::cache::CacheConfig { is_rendered_cacheable: true, rendered_cache_control: true, @@ -237,12 +237,12 @@ let config = esi::Configuration::default() }); ``` -| Field | Builder method | Default | Description | -| -------------------------- | -------------------------------------- | --------- | ---------------------------------------------------------------------------------------------------------------------------------- | -| `is_escaped_content` | `with_escaped(bool)` | `true` | Unescape HTML entities in URLs. Set to `false` for non-HTML templates (e.g. JSON). | -| `chunk_size` | `with_chunk_size(usize)` | `16384` | Size (bytes) of the read buffer used when streaming ESI input. Larger values may improve throughput; smaller values reduce memory. | -| `function_recursion_depth` | `with_function_recursion_depth(usize)` | `5` | Maximum call-stack depth for user-defined ESI functions. | -| `cache` | `with_caching(CacheConfig)` | see below | Cache settings for rendered output and included fragments. | +| Field | Builder method | Default | Description | +| -------------------------- | ------------------------------------- | --------- | ---------------------------------------------------------------------------------------------------------------------------------- | +| `is_escaped_content` | `with_escaped(bool)` | `true` | Unescape HTML entities in URLs. Set to `false` for non-HTML templates (e.g. JSON). | +| `chunk_size` | `with_chunk_size(usize)` | `16384` | Size (bytes) of the read buffer used when streaming ESI input. Larger values may improve throughput; smaller values reduce memory. | +| `function_recursion_depth` | `max_function_recursion_depth(usize)` | `5` | Maximum call-stack depth for user-defined ESI functions. | +| `cache` | `with_caching(CacheConfig)` | see below | Cache settings for rendered output and included fragments. | **`CacheConfig` fields:** diff --git a/esi/src/cache.rs b/esi/src/cache.rs index 54da937..d94acb8 100644 --- a/esi/src/cache.rs +++ b/esi/src/cache.rs @@ -46,7 +46,7 @@ impl Default for CacheConfig { pub fn calculate_ttl(response: &Response, config: &CacheConfig) -> Result> { // If includes_force_ttl is set, everything is cacheable if let Some(force_ttl) = config.includes_force_ttl { - trace!("Using includes_force_ttl: {}s", force_ttl); + trace!("Using includes_force_ttl: {force_ttl}s"); return Ok(Some(force_ttl)); } @@ -58,19 +58,18 @@ pub fn calculate_ttl(response: &Response, config: &CacheConfig) -> Result