From 9e022ccd0d922992cf95cff06addf184c2637eba Mon Sep 17 00:00:00 2001 From: Faisal Fehad Date: Sat, 18 Apr 2026 21:12:25 +0100 Subject: [PATCH] fix: fall back to openai-harmony rendering for gpt-oss models without chat_template MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gpt-oss models whose packaged tokenizer does not ship with a ``chat_template`` (several community MLX quants of the context-1 family, for example) fail every chat-style endpoint (``/v1/chat/completions``, ``/v1/messages``, ``/v1/responses``) with: Chat template error: Cannot use chat template functions because tokenizer.chat_template is not set ... even though oMLX has full Harmony infrastructure (adapter, streaming parser, anthropic→harmony converter, structured tool-call extraction). The only reachable endpoint is ``/v1/completions``, which drops the parser's ``tool_calls`` on the floor — so tool use on those models is effectively unusable. Render the prompt directly via ``openai-harmony`` (already a dependency) when ``model_type == "gpt_oss"`` and the tokenizer has no ``chat_template``. Models that ship a template continue to use it, so nothing changes on the happy path. - ``omlx/adapter/harmony.py``: add ``render_harmony_prompt()``. Translates OpenAI messages + tools into a Harmony prompt string; handles content blocks, assistant ``tool_calls``, ``tool`` responses, ``reasoning_effort`` and ``conversation_start_date`` via ``chat_template_kwargs``. Resolves tool-response function names via ``tool_call_id`` when the ``tool`` message omits ``name``. - ``omlx/engine/batched.py::_apply_chat_template``: branch to the new renderer only when the fallback is needed. - ``tests/test_harmony_render.py``: 19 unit tests covering basic rendering, content-as-list, tool rendering, tool-call roundtrip (incl. name resolution via tool_call_id), template kwargs, and malformed input. Verified end-to-end on a gpt-oss model without ``chat_template``: ``/v1/chat/completions`` and ``/v1/messages`` now return structured ``tool_calls``; a model that does ship a template is unaffected. --- omlx/adapter/harmony.py | 205 ++++++++++++++++++++++++ omlx/engine/batched.py | 17 +- tests/test_harmony_render.py | 298 +++++++++++++++++++++++++++++++++++ 3 files changed, 519 insertions(+), 1 deletion(-) create mode 100644 tests/test_harmony_render.py diff --git a/omlx/adapter/harmony.py b/omlx/adapter/harmony.py index ca1c796b..0ae74629 100644 --- a/omlx/adapter/harmony.py +++ b/omlx/adapter/harmony.py @@ -20,15 +20,24 @@ - commentary: Tool/function calls (non-streaming only) """ +import json import logging import re from dataclasses import dataclass, field from typing import Any from openai_harmony import ( + Author, + Conversation, + DeveloperContent, HarmonyEncoding, + HarmonyEncodingName, + Message, + ReasoningEffort, Role, StreamableParser, + SystemContent, + ToolDescription, load_harmony_encoding, ) @@ -108,6 +117,202 @@ def preprocess_harmony_messages( return result +_REASONING_EFFORT_MAP = { + "low": ReasoningEffort.LOW, + "medium": ReasoningEffort.MEDIUM, + "high": ReasoningEffort.HIGH, +} + +# Shared across all render_harmony_prompt() calls — the encoding is +# stateless and cheap to reuse (same pattern as parse_tool_calls_from_tokens +# below, which reloads on every call; here we pay the cost once). +_GPT_OSS_ENCODING: HarmonyEncoding | None = None + + +def _get_gpt_oss_encoding() -> HarmonyEncoding: + global _GPT_OSS_ENCODING + if _GPT_OSS_ENCODING is None: + _GPT_OSS_ENCODING = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) + return _GPT_OSS_ENCODING + + +def _extract_text(content: Any) -> str: + """Flatten OpenAI content (str or list of blocks) into plain text.""" + if isinstance(content, str): + return content + if not isinstance(content, list): + return "" if content is None else str(content) + parts: list[str] = [] + for block in content: + if isinstance(block, dict): + if block.get("type") == "text": + parts.append(block.get("text", "")) + elif "text" in block: + parts.append(block["text"]) + elif isinstance(block, str): + parts.append(block) + return "".join(parts) + + +def _tool_description_from_openai(tool: dict[str, Any]) -> ToolDescription | None: + """Convert an OpenAI tool spec (``{"type":"function","function":{...}}``) to a ToolDescription.""" + spec = tool.get("function") if tool.get("type") == "function" else tool + if not isinstance(spec, dict): + return None + name = spec.get("name") + if not name: + return None + description = spec.get("description", "") or "" + parameters = spec.get("parameters") or spec.get("input_schema") or { + "type": "object", + "properties": {}, + } + return ToolDescription.new(name, description, parameters=parameters) + + +def render_harmony_prompt( + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, + chat_template_kwargs: dict[str, Any] | None = None, +) -> str: + """Render chat messages + tools into a Harmony-format prompt string. + + Used as a tokenizer-free fallback for gpt-oss models whose packaged + tokenizer does not ship with a ``chat_template``. The official + ``openai-harmony`` library does the heavy lifting; this wrapper + translates the OpenAI-style inputs oMLX already uses. + + Args: + messages: OpenAI chat messages. ``system`` becomes developer + instructions; ``assistant`` with a ``tool_calls`` field is + emitted on the commentary channel; ``tool`` messages are + rendered as tool responses. + tools: Optional OpenAI-format function tools. Also accepts raw + function specs (``{"name", "description", "parameters"}``). + chat_template_kwargs: Optional template-style kwargs. Supports + ``reasoning_effort`` ("low"/"medium"/"high") and + ``conversation_start_date``. + + Returns: + A decoded Harmony prompt string ready to feed into ``generate``. + """ + ct = chat_template_kwargs or {} + + system_content = SystemContent.new() + effort = ct.get("reasoning_effort") + if isinstance(effort, str): + mapped = _REASONING_EFFORT_MAP.get(effort.lower()) + if mapped is not None: + system_content = system_content.with_reasoning_effort(mapped) + start_date = ct.get("conversation_start_date") + if isinstance(start_date, str) and start_date: + system_content = system_content.with_conversation_start_date(start_date) + + system_texts: list[str] = [] + convo_msgs: list[Message] = [] + # Maps tool_call_id -> function name for resolving ``role=tool`` messages + # whose ``name`` field is omitted (OpenAI spec allows this). + tool_call_names: dict[str, str] = {} + + for msg in messages: + if not isinstance(msg, dict): + continue + role = msg.get("role") + + if role == "system": + text = _extract_text(msg.get("content")) + if text: + system_texts.append(text) + continue + + if role == "user": + text = _extract_text(msg.get("content")) + convo_msgs.append(Message.from_role_and_content(Role.USER, text)) + continue + + if role == "assistant": + text = _extract_text(msg.get("content")) + if text: + convo_msgs.append( + Message.from_role_and_content(Role.ASSISTANT, text) + .with_channel("final") + ) + for tc in msg.get("tool_calls") or []: + if not isinstance(tc, dict): + continue + fn = tc.get("function") + if not isinstance(fn, dict): + continue + name = fn.get("name") or "" + if not name: + continue + tc_id = tc.get("id") + if isinstance(tc_id, str): + tool_call_names[tc_id] = name + args = fn.get("arguments") + if isinstance(args, (dict, list)): + args_str = json.dumps(args, ensure_ascii=False) + else: + args_str = args or "" + convo_msgs.append( + Message.from_role_and_content(Role.ASSISTANT, args_str) + .with_channel("commentary") + .with_recipient(f"functions.{name}") + .with_content_type("<|constrain|> json") + ) + continue + + if role == "tool": + text = _extract_text(msg.get("content")) + # Prefer an explicit ``name``; fall back to the function name + # recorded when the matching assistant tool_call was emitted. + name = msg.get("name") + if not name: + tc_id = msg.get("tool_call_id") + if isinstance(tc_id, str): + name = tool_call_names.get(tc_id) + if not name: + # No recoverable name — skip rather than fabricate one, which + # would confuse the model about which function it called. + logger.warning( + "Skipping tool message: no ``name`` and no matching " + "tool_call_id in earlier assistant message." + ) + continue + convo_msgs.append( + Message.from_author_and_content( + Author.new(Role.TOOL, f"functions.{name}"), + text, + ).with_channel("commentary") + ) + continue + + # Unknown roles: drop silently — openai-harmony would reject them. + + developer_content: DeveloperContent | None = None + instructions = "\n\n".join(t for t in system_texts if t) + if instructions: + developer_content = DeveloperContent.new().with_instructions(instructions) + if tools: + tool_descs = [td for t in tools if (td := _tool_description_from_openai(t))] + if tool_descs: + developer_content = (developer_content or DeveloperContent.new()) \ + .with_function_tools(tool_descs) + + conv_messages: list[Message] = [ + Message.from_role_and_content(Role.SYSTEM, system_content), + ] + if developer_content is not None: + conv_messages.append(Message.from_role_and_content(Role.DEVELOPER, developer_content)) + conv_messages.extend(convo_msgs) + + encoding = _get_gpt_oss_encoding() + tokens = encoding.render_conversation_for_completion( + Conversation.from_messages(conv_messages), Role.ASSISTANT + ) + return encoding.decode(tokens) + + def _get_special_token_ids(tokenizer: Any) -> set[int]: """ Get special token IDs from model tokenizer. diff --git a/omlx/engine/batched.py b/omlx/engine/batched.py index a73c5bd0..39e916b8 100644 --- a/omlx/engine/batched.py +++ b/omlx/engine/batched.py @@ -21,12 +21,13 @@ # Optional Harmony adapter import try: - from ..adapter.harmony import preprocess_harmony_messages + from ..adapter.harmony import preprocess_harmony_messages, render_harmony_prompt HAS_HARMONY_ADAPTER = True except ImportError: HAS_HARMONY_ADAPTER = False preprocess_harmony_messages = None # type: ignore + render_harmony_prompt = None # type: ignore class BatchedEngine(BaseEngine): @@ -307,6 +308,20 @@ def _apply_chat_template( chat_template_kwargs: Optional kwargs passed to tokenizer.apply_chat_template (e.g. enable_thinking, reasoning_effort). Overrides global _enable_thinking. """ + # Fallback for gpt-oss models whose tokenizer has no chat_template: + # render the Harmony prompt directly via openai-harmony. Models that + # do ship a chat_template continue to use it (preserves any + # model-specific tuning the template author intended). + if ( + self.model_type == "gpt_oss" + and HAS_HARMONY_ADAPTER + and render_harmony_prompt is not None + and not getattr(self._tokenizer, "chat_template", None) + ): + return render_harmony_prompt( + messages, tools=tools, chat_template_kwargs=chat_template_kwargs + ) + if hasattr(self._tokenizer, 'apply_chat_template'): is_partial = detect_and_strip_partial(messages) template_kwargs = { diff --git a/tests/test_harmony_render.py b/tests/test_harmony_render.py new file mode 100644 index 00000000..f1f4c983 --- /dev/null +++ b/tests/test_harmony_render.py @@ -0,0 +1,298 @@ +# SPDX-License-Identifier: Apache-2.0 +"""Tests for ``render_harmony_prompt`` (omlx.adapter.harmony). + +The helper produces a Harmony-format prompt from OpenAI-style chat messages, +used as a fallback when a gpt-oss model's tokenizer lacks a ``chat_template``. +""" + +import json + +import pytest + +from omlx.adapter.harmony import render_harmony_prompt + + +# ── Basic rendering ─────────────────────────────────────────────────── + + +class TestBasicRendering: + def test_user_message_only(self): + prompt = render_harmony_prompt( + [{"role": "user", "content": "hi"}] + ) + assert "<|start|>system" in prompt + assert "<|start|>user<|message|>hi<|end|>" in prompt + # Generation prompt ends expecting assistant continuation. + assert prompt.rstrip().endswith("<|start|>assistant") + + def test_system_becomes_developer_instructions(self): + prompt = render_harmony_prompt( + [ + {"role": "system", "content": "Be terse."}, + {"role": "user", "content": "hi"}, + ] + ) + assert "<|start|>developer" in prompt + # Instructions section carries the user's system text. + assert "Be terse." in prompt + + def test_multiple_system_messages_concatenated(self): + prompt = render_harmony_prompt( + [ + {"role": "system", "content": "A."}, + {"role": "system", "content": "B."}, + {"role": "user", "content": "hi"}, + ] + ) + assert "A." in prompt and "B." in prompt + + def test_content_as_list_of_blocks(self): + """OpenAI clients may send content as ``[{type:'text', text:'...'}]``.""" + prompt = render_harmony_prompt( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "hello "}, + {"type": "text", "text": "world"}, + ], + } + ] + ) + assert "hello world" in prompt + + +# ── Reasoning effort + conversation date ────────────────────────────── + + +class TestTemplateKwargs: + def test_reasoning_effort_high(self): + prompt = render_harmony_prompt( + [{"role": "user", "content": "hi"}], + chat_template_kwargs={"reasoning_effort": "high"}, + ) + assert "Reasoning: high" in prompt + + def test_reasoning_effort_low(self): + prompt = render_harmony_prompt( + [{"role": "user", "content": "hi"}], + chat_template_kwargs={"reasoning_effort": "low"}, + ) + assert "Reasoning: low" in prompt + + def test_conversation_start_date(self): + prompt = render_harmony_prompt( + [{"role": "user", "content": "hi"}], + chat_template_kwargs={"conversation_start_date": "2026-01-15"}, + ) + assert "2026-01-15" in prompt + + def test_unknown_reasoning_effort_ignored(self): + prompt = render_harmony_prompt( + [{"role": "user", "content": "hi"}], + chat_template_kwargs={"reasoning_effort": "ludicrous"}, + ) + # Renders without raising; default effort still emitted. + assert "<|start|>user" in prompt + + +# ── Tools ───────────────────────────────────────────────────────────── + + +class TestToolRendering: + SEARCH_TOOL = { + "type": "function", + "function": { + "name": "search", + "description": "Search the corpus", + "parameters": { + "type": "object", + "properties": {"query": {"type": "string"}}, + "required": ["query"], + }, + }, + } + + def test_tool_rendered_into_developer_block(self): + prompt = render_harmony_prompt( + [{"role": "user", "content": "hi"}], + tools=[self.SEARCH_TOOL], + ) + assert "<|start|>developer" in prompt + # openai-harmony emits a ``namespace functions {`` block and a + # ``type search = (...) => any;`` entry per tool. Check both so + # the test fails if the tool fell out of the developer block. + assert "namespace functions" in prompt + assert "type search" in prompt + assert "Search the corpus" in prompt + + def test_tools_without_system_do_not_emit_instructions_heading(self): + """Developer block should carry ``# Tools`` but not ``# Instructions`` + when the caller supplied tools but no system message.""" + prompt = render_harmony_prompt( + [{"role": "user", "content": "hi"}], + tools=[self.SEARCH_TOOL], + ) + assert "<|start|>developer" in prompt + assert "# Tools" in prompt + assert "# Instructions" not in prompt + + def test_raw_function_spec_accepted(self): + """Some callers pass ``{name, description, parameters}`` without the + ``{"type": "function", "function": {...}}`` wrapper.""" + prompt = render_harmony_prompt( + [{"role": "user", "content": "hi"}], + tools=[ + { + "name": "grep", + "description": "Grep", + "parameters": { + "type": "object", + "properties": {"pattern": {"type": "string"}}, + "required": ["pattern"], + }, + } + ], + ) + assert "grep" in prompt + + def test_tool_without_name_dropped(self): + prompt = render_harmony_prompt( + [{"role": "user", "content": "hi"}], + tools=[ + { + "type": "function", + "function": { + "description": "should-not-appear-sentinel-xyz", + }, + } + ], + ) + # Rendering succeeds and does not inject the bogus tool. + assert "should-not-appear-sentinel-xyz" not in prompt + + +# ── Assistant tool_calls + tool responses roundtrip ─────────────────── + + +class TestToolCallRoundtrip: + def test_assistant_tool_call_and_tool_response(self): + prompt = render_harmony_prompt( + [ + {"role": "user", "content": "weather in Paris?"}, + { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": { + "name": "get_weather", + "arguments": {"city": "Paris"}, + }, + } + ], + }, + { + "role": "tool", + "name": "get_weather", + "tool_call_id": "call_1", + "content": json.dumps({"temp_c": 14}), + }, + ] + ) + # Assistant commentary channel addressed to the function. + assert "to=functions.get_weather" in prompt + assert "commentary" in prompt + assert '{"city": "Paris"}' in prompt + # Tool response is emitted as a commentary message authored by the tool. + assert "functions.get_weather" in prompt + assert '"temp_c": 14' in prompt + + def test_tool_response_name_resolved_from_tool_call_id(self): + """When a ``tool`` message omits ``name``, the function name is + recovered from the preceding assistant tool_call with the matching id.""" + prompt = render_harmony_prompt( + [ + {"role": "user", "content": "q"}, + { + "role": "assistant", + "tool_calls": [ + { + "id": "call_42", + "type": "function", + "function": { + "name": "lookup", + "arguments": {"x": 1}, + }, + } + ], + }, + # No ``name`` key — must be resolved via tool_call_id. + {"role": "tool", "tool_call_id": "call_42", "content": "done"}, + ] + ) + assert "functions.lookup" in prompt + # The resolved name should appear both on the call and the response. + assert prompt.count("functions.lookup") >= 2 + + def test_tool_response_dropped_when_name_unresolvable(self): + """A ``tool`` message with no ``name`` and no matching tool_call_id + is skipped rather than given a fabricated function name.""" + prompt = render_harmony_prompt( + [ + {"role": "user", "content": "q"}, + {"role": "tool", "tool_call_id": "nowhere", "content": "orphan"}, + ] + ) + assert "orphan" not in prompt + assert "functions.nowhere" not in prompt + + def test_tool_call_arguments_accept_string(self): + """Arguments may arrive pre-stringified (OpenAI wire format).""" + prompt = render_harmony_prompt( + [ + {"role": "user", "content": "q"}, + { + "role": "assistant", + "tool_calls": [ + { + "function": { + "name": "search", + "arguments": '{"query": "x"}', + } + } + ], + }, + ] + ) + assert 'to=functions.search' in prompt + assert '{"query": "x"}' in prompt + + +# ── Robustness ──────────────────────────────────────────────────────── + + +class TestRobustness: + def test_empty_messages_does_not_crash(self): + prompt = render_harmony_prompt([]) + # Minimum viable Harmony prompt: system header + assistant lead-in. + assert "<|start|>system" in prompt + assert prompt.rstrip().endswith("<|start|>assistant") + + def test_non_dict_messages_skipped(self): + prompt = render_harmony_prompt( + ["garbage", None, {"role": "user", "content": "ok"}] + ) + assert "ok" in prompt + + def test_unknown_role_skipped(self): + prompt = render_harmony_prompt( + [ + {"role": "stranger", "content": "drop me"}, + {"role": "user", "content": "keep me"}, + ] + ) + assert "drop me" not in prompt + assert "keep me" in prompt