huggingface · aazizyan · May 27, 2026 · May 27, 2026
diff --git a/docs/source/chat_templates.md b/docs/source/chat_templates.md
@@ -20,7 +20,7 @@ TRL ships patched templates under [`trl/chat_templates/`](https://github.com/hug
 
 ## Supported model families
 
-TRL stores reference copies of the original templates so it can identify supported models at init and swap in a training template when needed. The following families are recognized: Cohere, Cohere2, DeepSeek-V3, Gemma, Gemma3, GLM-4-MoE, GPT-OSS, Llama 3 / 3.1 / 3.2, Phi-3, Phi-3.5, Qwen2-VL, Qwen2.5, Qwen2.5-VL, Qwen3 (including the Instruct-2507 variant), Qwen3-VL, Qwen3.5, Qwen3.6.
+TRL stores reference copies of the original templates so it can identify supported models at init and swap in a training template when needed. The following families are recognized: Cohere, Cohere2, DeepSeek-V3, Gemma, Gemma3, GLM-4-MoE, GPT-OSS, Idefics3, Llama 3 / 3.1 / 3.2, Phi-3, Phi-3.5, Qwen2-VL, Qwen2.5, Qwen2.5-VL, Qwen3 (including the Instruct-2507 variant), Qwen3-VL, Qwen3.5, Qwen3.6.
 
 ## Training templates
 
@@ -108,6 +108,12 @@ Patched GPT-OSS template. Diff vs `gptoss.jinja`:
 
 Wrap assistant message output with `&#123;% generation %&#125;` / `&#123;% endgeneration %&#125;` so that `return_assistant_tokens_mask=True` produces correct masks for SFT assistant-only loss.
 
+### `idefics3_training.jinja`
+
+Patched Idefics3 template. Diff vs `idefics3.jinja`:
+
+Split the assistant message into its own branch so the `&#123;% generation %&#125;` / `&#123;% endgeneration %&#125;` markers wrap the assistant content. This enables `return_assistant_tokens_mask=True` to produce correct masks for SFT assistant-only loss.
+
 ### `llama3_training.jinja`
 
 Patched Llama 3 template. Diff vs `llama3.jinja`:

diff --git a/tests/test_chat_template_utils.py b/tests/test_chat_template_utils.py
@@ -472,6 +472,7 @@ def test_prefix_preserving_template_processor(self):
             ),
         ),
         pytest.param("trl-internal-testing/tiny-GptOssForCausalLM", id="gptoss"),
+        pytest.param("trl-internal-testing/tiny-Idefics3ForConditionalGeneration", id="idefics3"),
         pytest.param("trl-internal-testing/tiny-LlamaForCausalLM-3", id="llama3"),
         pytest.param("trl-internal-testing/tiny-Phi3ForCausalLM-3", id="phi3"),
         pytest.param("trl-internal-testing/tiny-Phi3ForCausalLM-3.5", id="phi3.5"),

diff --git a/trl/chat_template_utils.py b/trl/chat_template_utils.py
@@ -321,6 +321,8 @@ def clone_chat_template(
 
 gptoss_chat_template = (_CHAT_TEMPLATES_DIR / "gptoss.jinja").read_text(encoding="utf-8")
 
+idefics3_chat_template = (_CHAT_TEMPLATES_DIR / "idefics3.jinja").read_text(encoding="utf-8")
+
 llama3_chat_template = (_CHAT_TEMPLATES_DIR / "llama3.jinja").read_text(encoding="utf-8")
 
 llama3_1_chat_template = (_CHAT_TEMPLATES_DIR / "llama3_1.jinja").read_text(encoding="utf-8")
@@ -562,6 +564,8 @@ def is_chat_template_prefix_preserving(processing_class: PreTrainedTokenizerBase
 
 gptoss_training_chat_template = (_CHAT_TEMPLATES_DIR / "gptoss_training.jinja").read_text(encoding="utf-8")
 
+idefics3_training_chat_template = (_CHAT_TEMPLATES_DIR / "idefics3_training.jinja").read_text(encoding="utf-8")
+
 llama3_training_chat_template = (_CHAT_TEMPLATES_DIR / "llama3_training.jinja").read_text(encoding="utf-8")
 
 phi3_training_chat_template = (_CHAT_TEMPLATES_DIR / "phi3_training.jinja").read_text(encoding="utf-8")
@@ -600,9 +604,9 @@ def get_training_chat_template(
 
     Returns a patched chat template that is prefix-preserving and includes `{%% generation %%}` / `{%% endgeneration
     %%}` markers for assistant-only loss masking. Returns `None` if the template already satisfies both requirements.
-    Currently Cohere, Cohere 2, DeepSeek-V3, Gemma, Gemma 2, Gemma 3, GLM-4-MoE, GPT-OSS, LLaMA 3, Phi-3, Phi-3.5,
-    Qwen2-VL, Qwen2.5, Qwen2.5-VL, Qwen3 (including the Instruct-2507 variant), Qwen3-VL, Qwen3.5, and Qwen3.6 are
-    supported.
+    Currently Cohere, Cohere 2, DeepSeek-V3, Gemma, Gemma 2, Gemma 3, GLM-4-MoE, GPT-OSS, Idefics3, LLaMA 3, Phi-3,
+    Phi-3.5, Qwen2-VL, Qwen2.5, Qwen2.5-VL, Qwen3 (including the Instruct-2507 variant), Qwen3-VL, Qwen3.5, and Qwen3.6
+    are supported.
 
     Args:
         processing_class (`PreTrainedTokenizerBase` or `ProcessorMixin`):
@@ -689,6 +693,9 @@ def get_training_chat_template(
     if processing_class.chat_template == gptoss_chat_template:
         return gptoss_training_chat_template
 
+    if processing_class.chat_template == idefics3_chat_template:
+        return idefics3_training_chat_template
+
     if processing_class.chat_template == llama3_chat_template:
         return llama3_training_chat_template
 

diff --git a/trl/chat_templates/README.md b/trl/chat_templates/README.md
@@ -41,6 +41,10 @@ Original GLM-4-MoE chat template.
 
 Original GPT-OSS chat template.
 
+### `idefics3.jinja`
+
+Original Idefics3 chat template (as shipped by `HuggingFaceM4/Idefics3-8B-Llama3`). Does not support tool calling.
+
 ### `llama3.jinja`
 
 Original Llama 3 chat template.
@@ -136,6 +140,12 @@ Patched GPT-OSS template. Diff vs `gptoss.jinja`:
 
 Wrap assistant message output with `{% generation %}` / `{% endgeneration %}` so that `return_assistant_tokens_mask=True` produces correct masks for SFT assistant-only loss.
 
+### `idefics3_training.jinja`
+
+Patched Idefics3 template. Diff vs `idefics3.jinja`:
+
+Split the assistant message into its own branch so the `{% generation %}` / `{% endgeneration %}` markers wrap the assistant content. This enables `return_assistant_tokens_mask=True` to produce correct masks for SFT assistant-only loss.
+
 ### `llama3_training.jinja`
 
 Patched Llama 3 template. Diff vs `llama3.jinja`:

diff --git a/trl/chat_templates/idefics3.jinja b/trl/chat_templates/idefics3.jinja
@@ -0,0 +1,2 @@
+<|begin_of_text|>{% for message in messages %}{{message['role'].capitalize()}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>
+{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}
diff --git a/trl/chat_templates/idefics3_training.jinja b/trl/chat_templates/idefics3_training.jinja
@@ -0,0 +1,10 @@
+{#- Training variant of the Idefics3 chat template (see idefics3.jinja for the original).
+    Modifications vs the original:
+    - Split the assistant message into its own branch so the {% generation %} / {% endgeneration %}
+      markers wrap the assistant content (everything after the 'Assistant:' prompt cue, up to and
+      including the trailing '<end_of_utterance>\n'). This enables assistant-only loss masking in
+      SFT training.
+-#}
+<|begin_of_text|>{% for message in messages %}{% if message['role'] == 'assistant' %}Assistant:{% generation %}{% if message['content'][0]['type'] != 'image' %} {% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>
+{% endgeneration %}{% else %}{{message['role'].capitalize()}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>
+{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		<\|begin_of_text\|>{% for message in messages %}{{message['role'].capitalize()}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>
		{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}
Comment thread aazizyan marked this conversation as resolved.