From e15ddef375e08471164f01711ec41c5e210afa42 Mon Sep 17 00:00:00 2001
From: Richard Lundeen <rlundeen@microsoft.com>
Date: Thu, 9 Apr 2026 16:32:01 -0700
Subject: [PATCH 1/3] Update to seed

---
 pyrit/models/seeds/seed_objective.py             |  5 ++---
 pyrit/models/seeds/seed_prompt.py                |  5 ++---
 tests/unit/datasets/test_beaver_tails_dataset.py | 13 +++++++------
 tests/unit/datasets/test_toxic_chat_dataset.py   | 12 +++++++-----
 4 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/pyrit/models/seeds/seed_objective.py b/pyrit/models/seeds/seed_objective.py
index c36dfef699..68711b7400 100644
--- a/pyrit/models/seeds/seed_objective.py
+++ b/pyrit/models/seeds/seed_objective.py
@@ -35,9 +35,8 @@ def __post_init__(self) -> None:
         """
         if self.is_general_technique:
             raise ValueError("SeedObjective cannot be a general technique.")
-        if not self.is_jinja_template:
-            self.value = self.escape_for_jinja(self.value)
-        self.value = super().render_template_value_silent(**PATHS_DICT)
+        if self.is_jinja_template:
+            self.value = super().render_template_value_silent(**PATHS_DICT)
 
     @classmethod
     def from_yaml_with_required_parameters(
diff --git a/pyrit/models/seeds/seed_prompt.py b/pyrit/models/seeds/seed_prompt.py
index c6894531e5..aac2877013 100644
--- a/pyrit/models/seeds/seed_prompt.py
+++ b/pyrit/models/seeds/seed_prompt.py
@@ -55,9 +55,8 @@ def __post_init__(self) -> None:
             ValueError: If file-based data type cannot be inferred from extension.
 
         """
-        if not self.is_jinja_template:
-            self.value = self.escape_for_jinja(self.value)
-        self.value = self.render_template_value_silent(**PATHS_DICT)
+        if self.is_jinja_template:
+            self.value = self.render_template_value_silent(**PATHS_DICT)
 
         if not self.data_type:
             # If data_type is not provided, infer it from the value
diff --git a/tests/unit/datasets/test_beaver_tails_dataset.py b/tests/unit/datasets/test_beaver_tails_dataset.py
index 77bc9dd5b2..a2953e1c86 100644
--- a/tests/unit/datasets/test_beaver_tails_dataset.py
+++ b/tests/unit/datasets/test_beaver_tails_dataset.py
@@ -97,14 +97,14 @@ def test_dataset_name(self):
         assert loader.dataset_name == "beaver_tails"
 
     @pytest.mark.asyncio
-    async def test_fetch_dataset_skips_prompt_with_template_syntax_error(self):
-        """Test that prompts causing TemplateSyntaxError are skipped gracefully."""
+    async def test_fetch_dataset_preserves_prompt_with_jinja_syntax(self):
+        """Test that prompts containing Jinja2 syntax are preserved as literal text."""
 
         class MockDataset:
             def __init__(self):
                 self._data = [
                     {
-                        "prompt": "This contains {% endraw %} which breaks Jinja2",
+                        "prompt": "This contains {% endraw %} which is Jinja2 syntax",
                         "response": "response",
                         "category": {"animal_abuse": True},
                         "is_safe": False,
@@ -124,6 +124,7 @@ def __iter__(self):
 
         with patch.object(loader, "_fetch_from_huggingface", new=AsyncMock(return_value=MockDataset())):
             dataset = await loader.fetch_dataset()
-            # The broken prompt should be skipped, only the normal one remains
-            assert len(dataset.seeds) == 1
-            assert dataset.seeds[0].value == "Normal unsafe prompt"
+            # Both prompts should be preserved — untrusted text is never passed through Jinja
+            assert len(dataset.seeds) == 2
+            assert dataset.seeds[0].value == "This contains {% endraw %} which is Jinja2 syntax"
+            assert dataset.seeds[1].value == "Normal unsafe prompt"
diff --git a/tests/unit/datasets/test_toxic_chat_dataset.py b/tests/unit/datasets/test_toxic_chat_dataset.py
index 71de6c89a5..eca32cf912 100644
--- a/tests/unit/datasets/test_toxic_chat_dataset.py
+++ b/tests/unit/datasets/test_toxic_chat_dataset.py
@@ -92,8 +92,8 @@ async def test_fetch_dataset_preserves_jinja2_content(self):
             assert dataset.seeds[1].value == "<!DOCTYPE html>{%block%}broken"
 
     @pytest.mark.asyncio
-    async def test_fetch_dataset_skips_jinja2_incompatible_entries(self):
-        """Test that entries with Jinja2-incompatible content are skipped."""
+    async def test_fetch_dataset_preserves_jinja2_syntax_in_entries(self):
+        """Test that entries with Jinja2 syntax are preserved as literal text."""
         data_with_endraw = [
             {
                 "conv_id": "good1",
@@ -105,7 +105,7 @@ async def test_fetch_dataset_skips_jinja2_incompatible_entries(self):
                 "openai_moderation": "[]",
             },
             {
-                "conv_id": "bad1",
+                "conv_id": "jinja1",
                 "user_input": "This has {% endraw %} in it",
                 "model_output": "N/A",
                 "human_annotation": "False",
@@ -128,9 +128,11 @@ async def test_fetch_dataset_skips_jinja2_incompatible_entries(self):
         with patch.object(loader, "_fetch_from_huggingface", new=AsyncMock(return_value=data_with_endraw)):
             dataset = await loader.fetch_dataset()
 
-            assert len(dataset.seeds) == 2
+            # All entries are preserved — untrusted text is never passed through Jinja
+            assert len(dataset.seeds) == 3
             assert dataset.seeds[0].value == "Normal question"
-            assert dataset.seeds[1].value == "Another normal question"
+            assert dataset.seeds[1].value == "This has {% endraw %} in it"
+            assert dataset.seeds[2].value == "Another normal question"
 
     @pytest.mark.asyncio
     async def test_fetch_dataset_preserves_for_loop_content(self):

From 02e609ef260a26f67891709e2a39dd756f222540 Mon Sep 17 00:00:00 2001
From: Richard Lundeen <rlundeen@microsoft.com>
Date: Thu, 9 Apr 2026 17:04:26 -0700
Subject: [PATCH 2/3] updating datasets

---
 .../remote/beaver_tails_dataset.py            | 27 ++++------
 .../remote/toxic_chat_dataset.py              | 50 ++++++-------------
 2 files changed, 27 insertions(+), 50 deletions(-)

diff --git a/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py b/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py
index f1f2d39135..76c8dec70c 100644
--- a/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py
+++ b/pyrit/datasets/seed_datasets/remote/beaver_tails_dataset.py
@@ -3,8 +3,6 @@
 
 import logging
 
-from jinja2 import TemplateSyntaxError
-
 from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import (
     _RemoteDatasetLoader,
 )
@@ -101,21 +99,18 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
 
             harm_categories = [k for k, v in item["category"].items() if v]
 
-            try:
-                seed_prompts.append(
-                    SeedPrompt(
-                        value=item["prompt"],
-                        data_type="text",
-                        dataset_name=self.dataset_name,
-                        harm_categories=harm_categories,
-                        description=description,
-                        source=source_url,
-                        authors=authors,
-                        groups=groups,
-                    )
+            seed_prompts.append(
+                SeedPrompt(
+                    value=item["prompt"],
+                    data_type="text",
+                    dataset_name=self.dataset_name,
+                    harm_categories=harm_categories,
+                    description=description,
+                    source=source_url,
+                    authors=authors,
+                    groups=groups,
                 )
-            except TemplateSyntaxError:
-                logger.warning("Skipping BeaverTails prompt due to Jinja2 template syntax error in prompt text")
+            )
 
         logger.info(f"Successfully loaded {len(seed_prompts)} prompts from BeaverTails dataset")
 
diff --git a/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py b/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py
index 20a025781b..78259f8f3e 100644
--- a/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py
+++ b/pyrit/datasets/seed_datasets/remote/toxic_chat_dataset.py
@@ -5,8 +5,6 @@
 import logging
 from typing import Any
 
-from jinja2 import TemplateSyntaxError
-
 from pyrit.datasets.seed_datasets.remote.remote_dataset_loader import (
     _RemoteDatasetLoader,
 )
@@ -122,42 +120,26 @@ async def fetch_dataset(self, *, cache: bool = True) -> SeedDataset:
         source_url = f"https://huggingface.co/datasets/{self.HF_DATASET_NAME}"
         groups = ["UC San Diego"]
 
-        raw_prefix = "{% raw %}"
-        raw_suffix = "{% endraw %}"
-
         seed_prompts: list[SeedPrompt] = []
         for item in data:
             user_input = item["user_input"]
             harm_categories = self._extract_harm_categories(item)
-            try:
-                prompt = SeedPrompt(
-                    value=user_input,
-                    data_type="text",
-                    dataset_name=self.dataset_name,
-                    description=description,
-                    source=source_url,
-                    authors=authors,
-                    groups=groups,
-                    harm_categories=harm_categories,
-                    metadata={
-                        "toxicity": str(item.get("toxicity", "")),
-                        "jailbreaking": str(item.get("jailbreaking", "")),
-                        "human_annotation": str(item.get("human_annotation", "")),
-                    },
-                )
-
-                # If user_input contains Jinja2 control structures (e.g., {% for %}),
-                # render_template_value_silent may skip rendering and leave the raw wrapper.
-                if prompt.value.startswith(raw_prefix) and prompt.value.endswith(raw_suffix):
-                    prompt.value = prompt.value[len(raw_prefix) : -len(raw_suffix)]
-
-                seed_prompts.append(prompt)
-            except TemplateSyntaxError:
-                conv_id = item.get("conv_id", "unknown")
-                logger.debug(
-                    f"Skipping entry with conv_id={conv_id}: failed to parse as Jinja2 template",
-                    exc_info=True,
-                )
+            prompt = SeedPrompt(
+                value=user_input,
+                data_type="text",
+                dataset_name=self.dataset_name,
+                description=description,
+                source=source_url,
+                authors=authors,
+                groups=groups,
+                harm_categories=harm_categories,
+                metadata={
+                    "toxicity": str(item.get("toxicity", "")),
+                    "jailbreaking": str(item.get("jailbreaking", "")),
+                    "human_annotation": str(item.get("human_annotation", "")),
+                },
+            )
+            seed_prompts.append(prompt)
 
         logger.info(f"Successfully loaded {len(seed_prompts)} prompts from ToxicChat dataset")
 

From cb1c54b48cbbe7fd36c9c968ce3fb5a4ebdba974 Mon Sep 17 00:00:00 2001
From: Richard Lundeen <rlundeen@microsoft.com>
Date: Thu, 9 Apr 2026 17:18:18 -0700
Subject: [PATCH 3/3] pr feedback

---
 pyrit/models/seeds/seed_objective.py | 1 +
 pyrit/models/seeds/seed_prompt.py    | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/pyrit/models/seeds/seed_objective.py b/pyrit/models/seeds/seed_objective.py
index 68711b7400..0f0edd743a 100644
--- a/pyrit/models/seeds/seed_objective.py
+++ b/pyrit/models/seeds/seed_objective.py
@@ -35,6 +35,7 @@ def __post_init__(self) -> None:
         """
         if self.is_general_technique:
             raise ValueError("SeedObjective cannot be a general technique.")
+        # Only trusted templates are rendered through Jinja — see seed_prompt.py for details.
         if self.is_jinja_template:
             self.value = super().render_template_value_silent(**PATHS_DICT)
 
diff --git a/pyrit/models/seeds/seed_prompt.py b/pyrit/models/seeds/seed_prompt.py
index aac2877013..1a4e8a566d 100644
--- a/pyrit/models/seeds/seed_prompt.py
+++ b/pyrit/models/seeds/seed_prompt.py
@@ -55,6 +55,10 @@ def __post_init__(self) -> None:
             ValueError: If file-based data type cannot be inferred from extension.
 
         """
+        # Only trusted templates (is_jinja_template=True, e.g. from YAML files) are rendered
+        # through Jinja. Untrusted text (e.g. from remote datasets) must NOT be rendered — a
+        # crafted payload containing "{% endraw %}" can escape the raw wrapper and execute
+        # arbitrary Jinja expressions. See seed_objective.py for the same pattern.
         if self.is_jinja_template:
             self.value = self.render_template_value_silent(**PATHS_DICT)