From 9e022ccd0d922992cf95cff06addf184c2637eba Mon Sep 17 00:00:00 2001
From: Faisal Fehad <public404@protonmail.com>
Date: Sat, 18 Apr 2026 21:12:25 +0100
Subject: [PATCH] fix: fall back to openai-harmony rendering for gpt-oss models
 without chat_template
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gpt-oss models whose packaged tokenizer does not ship with a
``chat_template`` (several community MLX quants of the context-1
family, for example) fail every chat-style endpoint
(``/v1/chat/completions``, ``/v1/messages``, ``/v1/responses``) with:

    Chat template error: Cannot use chat template functions because
    tokenizer.chat_template is not set ...

even though oMLX has full Harmony infrastructure (adapter, streaming
parser, anthropic→harmony converter, structured tool-call extraction).
The only reachable endpoint is ``/v1/completions``, which drops the
parser's ``tool_calls`` on the floor — so tool use on those models is
effectively unusable.

Render the prompt directly via ``openai-harmony`` (already a
dependency) when ``model_type == "gpt_oss"`` and the tokenizer has no
``chat_template``.  Models that ship a template continue to use it,
so nothing changes on the happy path.

- ``omlx/adapter/harmony.py``: add ``render_harmony_prompt()``.
  Translates OpenAI messages + tools into a Harmony prompt string;
  handles content blocks, assistant ``tool_calls``, ``tool`` responses,
  ``reasoning_effort`` and ``conversation_start_date`` via
  ``chat_template_kwargs``.  Resolves tool-response function names
  via ``tool_call_id`` when the ``tool`` message omits ``name``.
- ``omlx/engine/batched.py::_apply_chat_template``: branch to the new
  renderer only when the fallback is needed.
- ``tests/test_harmony_render.py``: 19 unit tests covering basic
  rendering, content-as-list, tool rendering, tool-call roundtrip
  (incl. name resolution via tool_call_id), template kwargs, and
  malformed input.

Verified end-to-end on a gpt-oss model without ``chat_template``:
``/v1/chat/completions`` and ``/v1/messages`` now return structured
``tool_calls``; a model that does ship a template is unaffected.
---
 omlx/adapter/harmony.py      | 205 ++++++++++++++++++++++++
 omlx/engine/batched.py       |  17 +-
 tests/test_harmony_render.py | 298 +++++++++++++++++++++++++++++++++++
 3 files changed, 519 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_harmony_render.py

diff --git a/omlx/adapter/harmony.py b/omlx/adapter/harmony.py
index ca1c796b..0ae74629 100644
--- a/omlx/adapter/harmony.py
+++ b/omlx/adapter/harmony.py
@@ -20,15 +20,24 @@
 - commentary: Tool/function calls (non-streaming only)
 """
 
+import json
 import logging
 import re
 from dataclasses import dataclass, field
 from typing import Any
 
 from openai_harmony import (
+    Author,
+    Conversation,
+    DeveloperContent,
     HarmonyEncoding,
+    HarmonyEncodingName,
+    Message,
+    ReasoningEffort,
     Role,
     StreamableParser,
+    SystemContent,
+    ToolDescription,
     load_harmony_encoding,
 )
 
@@ -108,6 +117,202 @@ def preprocess_harmony_messages(
     return result
 
 
+_REASONING_EFFORT_MAP = {
+    "low": ReasoningEffort.LOW,
+    "medium": ReasoningEffort.MEDIUM,
+    "high": ReasoningEffort.HIGH,
+}
+
+# Shared across all render_harmony_prompt() calls — the encoding is
+# stateless and cheap to reuse (same pattern as parse_tool_calls_from_tokens
+# below, which reloads on every call; here we pay the cost once).
+_GPT_OSS_ENCODING: HarmonyEncoding | None = None
+
+
+def _get_gpt_oss_encoding() -> HarmonyEncoding:
+    global _GPT_OSS_ENCODING
+    if _GPT_OSS_ENCODING is None:
+        _GPT_OSS_ENCODING = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
+    return _GPT_OSS_ENCODING
+
+
+def _extract_text(content: Any) -> str:
+    """Flatten OpenAI content (str or list of blocks) into plain text."""
+    if isinstance(content, str):
+        return content
+    if not isinstance(content, list):
+        return "" if content is None else str(content)
+    parts: list[str] = []
+    for block in content:
+        if isinstance(block, dict):
+            if block.get("type") == "text":
+                parts.append(block.get("text", ""))
+            elif "text" in block:
+                parts.append(block["text"])
+        elif isinstance(block, str):
+            parts.append(block)
+    return "".join(parts)
+
+
+def _tool_description_from_openai(tool: dict[str, Any]) -> ToolDescription | None:
+    """Convert an OpenAI tool spec (``{"type":"function","function":{...}}``) to a ToolDescription."""
+    spec = tool.get("function") if tool.get("type") == "function" else tool
+    if not isinstance(spec, dict):
+        return None
+    name = spec.get("name")
+    if not name:
+        return None
+    description = spec.get("description", "") or ""
+    parameters = spec.get("parameters") or spec.get("input_schema") or {
+        "type": "object",
+        "properties": {},
+    }
+    return ToolDescription.new(name, description, parameters=parameters)
+
+
+def render_harmony_prompt(
+    messages: list[dict[str, Any]],
+    tools: list[dict[str, Any]] | None = None,
+    chat_template_kwargs: dict[str, Any] | None = None,
+) -> str:
+    """Render chat messages + tools into a Harmony-format prompt string.
+
+    Used as a tokenizer-free fallback for gpt-oss models whose packaged
+    tokenizer does not ship with a ``chat_template``.  The official
+    ``openai-harmony`` library does the heavy lifting; this wrapper
+    translates the OpenAI-style inputs oMLX already uses.
+
+    Args:
+        messages: OpenAI chat messages.  ``system`` becomes developer
+            instructions; ``assistant`` with a ``tool_calls`` field is
+            emitted on the commentary channel; ``tool`` messages are
+            rendered as tool responses.
+        tools: Optional OpenAI-format function tools.  Also accepts raw
+            function specs (``{"name", "description", "parameters"}``).
+        chat_template_kwargs: Optional template-style kwargs.  Supports
+            ``reasoning_effort`` ("low"/"medium"/"high") and
+            ``conversation_start_date``.
+
+    Returns:
+        A decoded Harmony prompt string ready to feed into ``generate``.
+    """
+    ct = chat_template_kwargs or {}
+
+    system_content = SystemContent.new()
+    effort = ct.get("reasoning_effort")
+    if isinstance(effort, str):
+        mapped = _REASONING_EFFORT_MAP.get(effort.lower())
+        if mapped is not None:
+            system_content = system_content.with_reasoning_effort(mapped)
+    start_date = ct.get("conversation_start_date")
+    if isinstance(start_date, str) and start_date:
+        system_content = system_content.with_conversation_start_date(start_date)
+
+    system_texts: list[str] = []
+    convo_msgs: list[Message] = []
+    # Maps tool_call_id -> function name for resolving ``role=tool`` messages
+    # whose ``name`` field is omitted (OpenAI spec allows this).
+    tool_call_names: dict[str, str] = {}
+
+    for msg in messages:
+        if not isinstance(msg, dict):
+            continue
+        role = msg.get("role")
+
+        if role == "system":
+            text = _extract_text(msg.get("content"))
+            if text:
+                system_texts.append(text)
+            continue
+
+        if role == "user":
+            text = _extract_text(msg.get("content"))
+            convo_msgs.append(Message.from_role_and_content(Role.USER, text))
+            continue
+
+        if role == "assistant":
+            text = _extract_text(msg.get("content"))
+            if text:
+                convo_msgs.append(
+                    Message.from_role_and_content(Role.ASSISTANT, text)
+                        .with_channel("final")
+                )
+            for tc in msg.get("tool_calls") or []:
+                if not isinstance(tc, dict):
+                    continue
+                fn = tc.get("function")
+                if not isinstance(fn, dict):
+                    continue
+                name = fn.get("name") or ""
+                if not name:
+                    continue
+                tc_id = tc.get("id")
+                if isinstance(tc_id, str):
+                    tool_call_names[tc_id] = name
+                args = fn.get("arguments")
+                if isinstance(args, (dict, list)):
+                    args_str = json.dumps(args, ensure_ascii=False)
+                else:
+                    args_str = args or ""
+                convo_msgs.append(
+                    Message.from_role_and_content(Role.ASSISTANT, args_str)
+                        .with_channel("commentary")
+                        .with_recipient(f"functions.{name}")
+                        .with_content_type("<|constrain|> json")
+                )
+            continue
+
+        if role == "tool":
+            text = _extract_text(msg.get("content"))
+            # Prefer an explicit ``name``; fall back to the function name
+            # recorded when the matching assistant tool_call was emitted.
+            name = msg.get("name")
+            if not name:
+                tc_id = msg.get("tool_call_id")
+                if isinstance(tc_id, str):
+                    name = tool_call_names.get(tc_id)
+            if not name:
+                # No recoverable name — skip rather than fabricate one, which
+                # would confuse the model about which function it called.
+                logger.warning(
+                    "Skipping tool message: no ``name`` and no matching "
+                    "tool_call_id in earlier assistant message."
+                )
+                continue
+            convo_msgs.append(
+                Message.from_author_and_content(
+                    Author.new(Role.TOOL, f"functions.{name}"),
+                    text,
+                ).with_channel("commentary")
+            )
+            continue
+
+        # Unknown roles: drop silently — openai-harmony would reject them.
+
+    developer_content: DeveloperContent | None = None
+    instructions = "\n\n".join(t for t in system_texts if t)
+    if instructions:
+        developer_content = DeveloperContent.new().with_instructions(instructions)
+    if tools:
+        tool_descs = [td for t in tools if (td := _tool_description_from_openai(t))]
+        if tool_descs:
+            developer_content = (developer_content or DeveloperContent.new()) \
+                .with_function_tools(tool_descs)
+
+    conv_messages: list[Message] = [
+        Message.from_role_and_content(Role.SYSTEM, system_content),
+    ]
+    if developer_content is not None:
+        conv_messages.append(Message.from_role_and_content(Role.DEVELOPER, developer_content))
+    conv_messages.extend(convo_msgs)
+
+    encoding = _get_gpt_oss_encoding()
+    tokens = encoding.render_conversation_for_completion(
+        Conversation.from_messages(conv_messages), Role.ASSISTANT
+    )
+    return encoding.decode(tokens)
+
+
 def _get_special_token_ids(tokenizer: Any) -> set[int]:
     """
     Get special token IDs from model tokenizer.
diff --git a/omlx/engine/batched.py b/omlx/engine/batched.py
index a73c5bd0..39e916b8 100644
--- a/omlx/engine/batched.py
+++ b/omlx/engine/batched.py
@@ -21,12 +21,13 @@
 
 # Optional Harmony adapter import
 try:
-    from ..adapter.harmony import preprocess_harmony_messages
+    from ..adapter.harmony import preprocess_harmony_messages, render_harmony_prompt
 
     HAS_HARMONY_ADAPTER = True
 except ImportError:
     HAS_HARMONY_ADAPTER = False
     preprocess_harmony_messages = None  # type: ignore
+    render_harmony_prompt = None  # type: ignore
 
 
 class BatchedEngine(BaseEngine):
@@ -307,6 +308,20 @@ def _apply_chat_template(
             chat_template_kwargs: Optional kwargs passed to tokenizer.apply_chat_template
                 (e.g. enable_thinking, reasoning_effort). Overrides global _enable_thinking.
         """
+        # Fallback for gpt-oss models whose tokenizer has no chat_template:
+        # render the Harmony prompt directly via openai-harmony.  Models that
+        # do ship a chat_template continue to use it (preserves any
+        # model-specific tuning the template author intended).
+        if (
+            self.model_type == "gpt_oss"
+            and HAS_HARMONY_ADAPTER
+            and render_harmony_prompt is not None
+            and not getattr(self._tokenizer, "chat_template", None)
+        ):
+            return render_harmony_prompt(
+                messages, tools=tools, chat_template_kwargs=chat_template_kwargs
+            )
+
         if hasattr(self._tokenizer, 'apply_chat_template'):
             is_partial = detect_and_strip_partial(messages)
             template_kwargs = {
diff --git a/tests/test_harmony_render.py b/tests/test_harmony_render.py
new file mode 100644
index 00000000..f1f4c983
--- /dev/null
+++ b/tests/test_harmony_render.py
@@ -0,0 +1,298 @@
+# SPDX-License-Identifier: Apache-2.0
+"""Tests for ``render_harmony_prompt`` (omlx.adapter.harmony).
+
+The helper produces a Harmony-format prompt from OpenAI-style chat messages,
+used as a fallback when a gpt-oss model's tokenizer lacks a ``chat_template``.
+"""
+
+import json
+
+import pytest
+
+from omlx.adapter.harmony import render_harmony_prompt
+
+
+# ── Basic rendering ───────────────────────────────────────────────────
+
+
+class TestBasicRendering:
+    def test_user_message_only(self):
+        prompt = render_harmony_prompt(
+            [{"role": "user", "content": "hi"}]
+        )
+        assert "<|start|>system" in prompt
+        assert "<|start|>user<|message|>hi<|end|>" in prompt
+        # Generation prompt ends expecting assistant continuation.
+        assert prompt.rstrip().endswith("<|start|>assistant")
+
+    def test_system_becomes_developer_instructions(self):
+        prompt = render_harmony_prompt(
+            [
+                {"role": "system", "content": "Be terse."},
+                {"role": "user", "content": "hi"},
+            ]
+        )
+        assert "<|start|>developer" in prompt
+        # Instructions section carries the user's system text.
+        assert "Be terse." in prompt
+
+    def test_multiple_system_messages_concatenated(self):
+        prompt = render_harmony_prompt(
+            [
+                {"role": "system", "content": "A."},
+                {"role": "system", "content": "B."},
+                {"role": "user", "content": "hi"},
+            ]
+        )
+        assert "A." in prompt and "B." in prompt
+
+    def test_content_as_list_of_blocks(self):
+        """OpenAI clients may send content as ``[{type:'text', text:'...'}]``."""
+        prompt = render_harmony_prompt(
+            [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "hello "},
+                        {"type": "text", "text": "world"},
+                    ],
+                }
+            ]
+        )
+        assert "hello world" in prompt
+
+
+# ── Reasoning effort + conversation date ──────────────────────────────
+
+
+class TestTemplateKwargs:
+    def test_reasoning_effort_high(self):
+        prompt = render_harmony_prompt(
+            [{"role": "user", "content": "hi"}],
+            chat_template_kwargs={"reasoning_effort": "high"},
+        )
+        assert "Reasoning: high" in prompt
+
+    def test_reasoning_effort_low(self):
+        prompt = render_harmony_prompt(
+            [{"role": "user", "content": "hi"}],
+            chat_template_kwargs={"reasoning_effort": "low"},
+        )
+        assert "Reasoning: low" in prompt
+
+    def test_conversation_start_date(self):
+        prompt = render_harmony_prompt(
+            [{"role": "user", "content": "hi"}],
+            chat_template_kwargs={"conversation_start_date": "2026-01-15"},
+        )
+        assert "2026-01-15" in prompt
+
+    def test_unknown_reasoning_effort_ignored(self):
+        prompt = render_harmony_prompt(
+            [{"role": "user", "content": "hi"}],
+            chat_template_kwargs={"reasoning_effort": "ludicrous"},
+        )
+        # Renders without raising; default effort still emitted.
+        assert "<|start|>user" in prompt
+
+
+# ── Tools ─────────────────────────────────────────────────────────────
+
+
+class TestToolRendering:
+    SEARCH_TOOL = {
+        "type": "function",
+        "function": {
+            "name": "search",
+            "description": "Search the corpus",
+            "parameters": {
+                "type": "object",
+                "properties": {"query": {"type": "string"}},
+                "required": ["query"],
+            },
+        },
+    }
+
+    def test_tool_rendered_into_developer_block(self):
+        prompt = render_harmony_prompt(
+            [{"role": "user", "content": "hi"}],
+            tools=[self.SEARCH_TOOL],
+        )
+        assert "<|start|>developer" in prompt
+        # openai-harmony emits a ``namespace functions {`` block and a
+        # ``type search = (...) => any;`` entry per tool.  Check both so
+        # the test fails if the tool fell out of the developer block.
+        assert "namespace functions" in prompt
+        assert "type search" in prompt
+        assert "Search the corpus" in prompt
+
+    def test_tools_without_system_do_not_emit_instructions_heading(self):
+        """Developer block should carry ``# Tools`` but not ``# Instructions``
+        when the caller supplied tools but no system message."""
+        prompt = render_harmony_prompt(
+            [{"role": "user", "content": "hi"}],
+            tools=[self.SEARCH_TOOL],
+        )
+        assert "<|start|>developer" in prompt
+        assert "# Tools" in prompt
+        assert "# Instructions" not in prompt
+
+    def test_raw_function_spec_accepted(self):
+        """Some callers pass ``{name, description, parameters}`` without the
+        ``{"type": "function", "function": {...}}`` wrapper."""
+        prompt = render_harmony_prompt(
+            [{"role": "user", "content": "hi"}],
+            tools=[
+                {
+                    "name": "grep",
+                    "description": "Grep",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"pattern": {"type": "string"}},
+                        "required": ["pattern"],
+                    },
+                }
+            ],
+        )
+        assert "grep" in prompt
+
+    def test_tool_without_name_dropped(self):
+        prompt = render_harmony_prompt(
+            [{"role": "user", "content": "hi"}],
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "description": "should-not-appear-sentinel-xyz",
+                    },
+                }
+            ],
+        )
+        # Rendering succeeds and does not inject the bogus tool.
+        assert "should-not-appear-sentinel-xyz" not in prompt
+
+
+# ── Assistant tool_calls + tool responses roundtrip ───────────────────
+
+
+class TestToolCallRoundtrip:
+    def test_assistant_tool_call_and_tool_response(self):
+        prompt = render_harmony_prompt(
+            [
+                {"role": "user", "content": "weather in Paris?"},
+                {
+                    "role": "assistant",
+                    "content": None,
+                    "tool_calls": [
+                        {
+                            "id": "call_1",
+                            "type": "function",
+                            "function": {
+                                "name": "get_weather",
+                                "arguments": {"city": "Paris"},
+                            },
+                        }
+                    ],
+                },
+                {
+                    "role": "tool",
+                    "name": "get_weather",
+                    "tool_call_id": "call_1",
+                    "content": json.dumps({"temp_c": 14}),
+                },
+            ]
+        )
+        # Assistant commentary channel addressed to the function.
+        assert "to=functions.get_weather" in prompt
+        assert "commentary" in prompt
+        assert '{"city": "Paris"}' in prompt
+        # Tool response is emitted as a commentary message authored by the tool.
+        assert "functions.get_weather" in prompt
+        assert '"temp_c": 14' in prompt
+
+    def test_tool_response_name_resolved_from_tool_call_id(self):
+        """When a ``tool`` message omits ``name``, the function name is
+        recovered from the preceding assistant tool_call with the matching id."""
+        prompt = render_harmony_prompt(
+            [
+                {"role": "user", "content": "q"},
+                {
+                    "role": "assistant",
+                    "tool_calls": [
+                        {
+                            "id": "call_42",
+                            "type": "function",
+                            "function": {
+                                "name": "lookup",
+                                "arguments": {"x": 1},
+                            },
+                        }
+                    ],
+                },
+                # No ``name`` key — must be resolved via tool_call_id.
+                {"role": "tool", "tool_call_id": "call_42", "content": "done"},
+            ]
+        )
+        assert "functions.lookup" in prompt
+        # The resolved name should appear both on the call and the response.
+        assert prompt.count("functions.lookup") >= 2
+
+    def test_tool_response_dropped_when_name_unresolvable(self):
+        """A ``tool`` message with no ``name`` and no matching tool_call_id
+        is skipped rather than given a fabricated function name."""
+        prompt = render_harmony_prompt(
+            [
+                {"role": "user", "content": "q"},
+                {"role": "tool", "tool_call_id": "nowhere", "content": "orphan"},
+            ]
+        )
+        assert "orphan" not in prompt
+        assert "functions.nowhere" not in prompt
+
+    def test_tool_call_arguments_accept_string(self):
+        """Arguments may arrive pre-stringified (OpenAI wire format)."""
+        prompt = render_harmony_prompt(
+            [
+                {"role": "user", "content": "q"},
+                {
+                    "role": "assistant",
+                    "tool_calls": [
+                        {
+                            "function": {
+                                "name": "search",
+                                "arguments": '{"query": "x"}',
+                            }
+                        }
+                    ],
+                },
+            ]
+        )
+        assert 'to=functions.search' in prompt
+        assert '{"query": "x"}' in prompt
+
+
+# ── Robustness ────────────────────────────────────────────────────────
+
+
+class TestRobustness:
+    def test_empty_messages_does_not_crash(self):
+        prompt = render_harmony_prompt([])
+        # Minimum viable Harmony prompt: system header + assistant lead-in.
+        assert "<|start|>system" in prompt
+        assert prompt.rstrip().endswith("<|start|>assistant")
+
+    def test_non_dict_messages_skipped(self):
+        prompt = render_harmony_prompt(
+            ["garbage", None, {"role": "user", "content": "ok"}]
+        )
+        assert "ok" in prompt
+
+    def test_unknown_role_skipped(self):
+        prompt = render_harmony_prompt(
+            [
+                {"role": "stranger", "content": "drop me"},
+                {"role": "user", "content": "keep me"},
+            ]
+        )
+        assert "drop me" not in prompt
+        assert "keep me" in prompt