diff --git a/docs/strongdm/attractor/README.md b/docs/strongdm/attractor/README.md index baaa853e..0da67d96 100644 --- a/docs/strongdm/attractor/README.md +++ b/docs/strongdm/attractor/README.md @@ -26,7 +26,7 @@ Although bringing your own agentic loop and unified LLM SDK is not required to b - Sensitive Codex state roots (`codex-home*`, `.codex/auth.json`, `.codex/config.toml`) are excluded from `stage.tgz` and `run.tgz`. - Idle watchdog enforces process-group cleanup for stalled Codex CLI stages. - Codex schema behavior: - - Structured output schema requires `final` and `summary`, but allows additional properties for CLI compatibility. + - Structured output schema requires `final` and `summary` and sets `additionalProperties: false` (strict object contract required by Codex/OpenAI structured-output validation). - If codex rejects schema validation (`invalid_json_schema`-class errors), Attractor retries once without `--output-schema` and records fallback metadata in stage artifacts. - If codex returns unknown structured keys on schema-enabled output, Attractor emits a loud warning, writes `structured_output_unknown_keys.json`, retries once without `--output-schema`, and records fallback metadata in `cli_invocation.json`. - If codex emits known state-db discrepancy signatures, Attractor retries once with a fresh isolated state root and records state-db fallback metadata. diff --git a/internal/attractor/modeldb/catalog_test.go b/internal/attractor/modeldb/catalog_test.go index 98c32db2..bb530c30 100644 --- a/internal/attractor/modeldb/catalog_test.go +++ b/internal/attractor/modeldb/catalog_test.go @@ -149,3 +149,22 @@ func TestCatalogHasProviderModel_SparkEntry(t *testing.T) { t.Error("expected SupportsReasoning=true") } } + +func TestLoadEmbeddedCatalog_ContainsAnthropicSonnet46(t *testing.T) { + c, err := LoadEmbeddedCatalog() + if err != nil { + t.Fatalf("LoadEmbeddedCatalog: %v", err) + } + if !CatalogHasProviderModel(c, "anthropic", "claude-sonnet-4.6") { + t.Fatal("expected embedded catalog to contain anthropic/claude-sonnet-4.6") + } + if !CatalogHasProviderModel(c, "anthropic", "claude-sonnet-4-6") { + t.Fatal("expected dash-format anthropic model id to resolve for claude-sonnet-4.6") + } + if !CatalogHasProviderModel(c, "openai", "gpt-5.3-codex") { + t.Fatal("expected embedded catalog to contain openai/gpt-5.3-codex") + } + if !CatalogHasProviderModel(c, "google", "gemini-3.1-pro-preview") { + t.Fatal("expected embedded catalog to contain google/gemini-3.1-pro-preview") + } +} diff --git a/internal/attractor/modeldb/pinned/openrouter_models.json b/internal/attractor/modeldb/pinned/openrouter_models.json index a768f173..b8dffbfa 100644 --- a/internal/attractor/modeldb/pinned/openrouter_models.json +++ b/internal/attractor/modeldb/pinned/openrouter_models.json @@ -537,58 +537,6 @@ }, "expiration_date": null }, - { - "id": "allenai/olmo-3.1-32b-think", - "canonical_slug": "allenai/olmo-3.1-32b-think-20251215", - "hugging_face_id": "allenai/Olmo-3.1-32B-Think", - "name": "AllenAI: Olmo 3.1 32B Think", - "created": 1765907719, - "description": "Olmo 3.1 32B Think is a large-scale, 32-billion-parameter model designed for deep reasoning, complex multi-step logic, and advanced instruction following. Building on the Olmo 3 series, version 3.1 delivers refined reasoning behavior and stronger performance across demanding evaluations and nuanced conversational tasks. Developed by Ai2 under the Apache 2.0 license, Olmo 3.1 32B Think continues the Olmo initiative’s commitment to openness, providing full transparency across model weights, code, and training methodology.", - "context_length": 65536, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Other", - "instruct_type": null - }, - "pricing": { - "prompt": "0.00000015", - "completion": "0.0000005" - }, - "top_provider": { - "context_length": 65536, - "max_completion_tokens": 65536, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "include_reasoning", - "logit_bias", - "max_tokens", - "presence_penalty", - "reasoning", - "repetition_penalty", - "response_format", - "seed", - "stop", - "structured_outputs", - "temperature", - "top_k", - "top_p" - ], - "default_parameters": { - "temperature": 0.6, - "top_p": 0.95, - "frequency_penalty": null - }, - "expiration_date": null - }, { "id": "alpindale/goliath-120b", "canonical_slug": "alpindale/goliath-120b", @@ -1021,7 +969,9 @@ }, "pricing": { "prompt": "0.000006", - "completion": "0.00003" + "completion": "0.00003", + "input_cache_read": "0.0000006", + "input_cache_write": "0.0000075" }, "top_provider": { "context_length": 200000, @@ -1091,7 +1041,7 @@ "top_p": null, "frequency_penalty": null }, - "expiration_date": null + "expiration_date": "2026-05-05" }, { "id": "anthropic/claude-3.7-sonnet:thinking", @@ -1142,7 +1092,7 @@ "top_p": null, "frequency_penalty": null }, - "expiration_date": null + "expiration_date": "2026-05-05" }, { "id": "anthropic/claude-haiku-4.5", @@ -1174,14 +1124,16 @@ "top_provider": { "context_length": 200000, "max_completion_tokens": 64000, - "is_moderated": false + "is_moderated": true }, "per_request_limits": null, "supported_parameters": [ "include_reasoning", "max_tokens", "reasoning", + "response_format", "stop", + "structured_outputs", "temperature", "tool_choice", "tools", @@ -1669,7 +1621,11 @@ }, "pricing": { "prompt": "0", - "completion": "0" + "completion": "0", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0" }, "top_provider": { "context_length": 131000, @@ -1723,21 +1679,15 @@ }, "per_request_limits": null, "supported_parameters": [ - "frequency_penalty", "include_reasoning", - "logit_bias", "max_tokens", - "min_p", - "presence_penalty", "reasoning", - "repetition_penalty", "response_format", "stop", "structured_outputs", "temperature", "tool_choice", "tools", - "top_k", "top_p" ], "default_parameters": { @@ -2520,9 +2470,8 @@ "instruct_type": null }, "pricing": { - "prompt": "0.0000003", - "completion": "0.0000012", - "input_cache_read": "0.00000015" + "prompt": "0.00000032", + "completion": "0.00000089" }, "top_provider": { "context_length": 163840, @@ -2539,7 +2488,6 @@ "response_format", "seed", "stop", - "structured_outputs", "temperature", "tool_choice", "tools", @@ -2569,13 +2517,13 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000019", - "completion": "0.00000087", - "input_cache_read": "0.000000095" + "prompt": "0.0000002", + "completion": "0.00000077", + "input_cache_read": "0.000000135" }, "top_provider": { "context_length": 163840, - "max_completion_tokens": 65536, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null, @@ -2722,9 +2670,9 @@ "instruct_type": "deepseek-r1" }, "pricing": { - "prompt": "0.0000004", - "completion": "0.00000175", - "input_cache_read": "0.0000002" + "prompt": "0.00000045", + "completion": "0.00000215", + "input_cache_read": "0.000000225" }, "top_provider": { "context_length": 163840, @@ -2760,51 +2708,6 @@ }, "expiration_date": null }, - { - "id": "deepseek/deepseek-r1-0528:free", - "canonical_slug": "deepseek/deepseek-r1-0528", - "hugging_face_id": "deepseek-ai/DeepSeek-R1-0528", - "name": "DeepSeek: R1 0528 (free)", - "created": 1748455170, - "description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass.\n\nFully open-source model.", - "context_length": 163840, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "DeepSeek", - "instruct_type": "deepseek-r1" - }, - "pricing": { - "prompt": "0", - "completion": "0" - }, - "top_provider": { - "context_length": 163840, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "include_reasoning", - "max_tokens", - "presence_penalty", - "reasoning", - "repetition_penalty", - "temperature" - ], - "default_parameters": { - "temperature": null, - "top_p": null, - "frequency_penalty": null - }, - "expiration_date": null - }, { "id": "deepseek/deepseek-r1-distill-llama-70b", "canonical_slug": "deepseek/deepseek-r1-distill-llama-70b", @@ -2825,13 +2728,12 @@ "instruct_type": "deepseek-r1" }, "pricing": { - "prompt": "0.00000003", - "completion": "0.00000011", - "input_cache_read": "0.000000015" + "prompt": "0.0000007", + "completion": "0.0000008" }, "top_provider": { "context_length": 131072, - "max_completion_tokens": 131072, + "max_completion_tokens": 16384, "is_moderated": false }, "per_request_limits": null, @@ -2887,6 +2789,7 @@ "supported_parameters": [ "frequency_penalty", "include_reasoning", + "logprobs", "max_tokens", "presence_penalty", "reasoning", @@ -2897,6 +2800,7 @@ "structured_outputs", "temperature", "top_k", + "top_logprobs", "top_p" ], "default_parameters": {}, @@ -3033,12 +2937,11 @@ }, "pricing": { "prompt": "0.00000025", - "completion": "0.00000038", - "input_cache_read": "0.000000125" + "completion": "0.0000004" }, "top_provider": { "context_length": 163840, - "max_completion_tokens": 65536, + "max_completion_tokens": 163840, "is_moderated": false }, "per_request_limits": null, @@ -3102,7 +3005,9 @@ "supported_parameters": [ "frequency_penalty", "include_reasoning", + "logit_bias", "max_tokens", + "min_p", "presence_penalty", "reasoning", "repetition_penalty", @@ -3143,9 +3048,9 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000027", - "completion": "0.00000041", - "input_cache_read": "0.000000135" + "prompt": "0.0000004", + "completion": "0.0000012", + "input_cache_read": "0.0000002" }, "top_provider": { "context_length": 163840, @@ -3154,19 +3059,12 @@ }, "per_request_limits": null, "supported_parameters": [ - "frequency_penalty", "include_reasoning", - "logit_bias", "max_tokens", - "presence_penalty", "reasoning", - "repetition_penalty", "response_format", - "seed", - "stop", "structured_outputs", "temperature", - "top_k", "top_p" ], "default_parameters": { @@ -3442,7 +3340,7 @@ "id": "google/gemini-2.5-flash-image", "canonical_slug": "google/gemini-2.5-flash-image", "hugging_face_id": "", - "name": "Google: Gemini 2.5 Flash Image (Nano Banana)", + "name": "Google: Nano Banana (Gemini 2.5 Flash Image)", "created": 1759870431, "description": "Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation, edits, and multi-turn conversations. Aspect ratios can be controlled with the [image_config API Parameter](https://openrouter.ai/docs/features/multimodal/image-generation#image-aspect-ratio-configuration)", "context_length": 32768, @@ -3478,6 +3376,7 @@ "max_tokens", "response_format", "seed", + "stop", "structured_outputs", "temperature", "top_p" @@ -3606,66 +3505,8 @@ "expiration_date": null }, { - "id": "google/gemini-2.5-flash-preview-09-2025", - "canonical_slug": "google/gemini-2.5-flash-preview-09-2025", - "hugging_face_id": "", - "name": "Google: Gemini 2.5 Flash Preview 09-2025", - "created": 1758820178, - "description": "Gemini 2.5 Flash Preview September 2025 Checkpoint is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater accuracy and nuanced context handling. \n\nAdditionally, Gemini 2.5 Flash is configurable through the \"max tokens for reasoning\" parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning).", - "context_length": 1048576, - "architecture": { - "modality": "text+image+file+audio+video->text", - "input_modalities": [ - "image", - "file", - "text", - "audio", - "video" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Gemini", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000003", - "completion": "0.0000025", - "image": "0.0000003", - "audio": "0.000001", - "internal_reasoning": "0.0000025", - "input_cache_read": "0.00000003", - "input_cache_write": "0.00000008333333333333334" - }, - "top_provider": { - "context_length": 1048576, - "max_completion_tokens": 65536, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "include_reasoning", - "max_tokens", - "reasoning", - "response_format", - "seed", - "stop", - "structured_outputs", - "temperature", - "tool_choice", - "tools", - "top_p" - ], - "default_parameters": { - "temperature": null, - "top_p": null, - "frequency_penalty": null - }, - "expiration_date": "2026-02-17" - }, - { - "id": "google/gemini-3.1-pro-preview-06-05", - "canonical_slug": "google/gemini-3.1-pro-preview", + "id": "google/gemini-2.5-pro", + "canonical_slug": "google/gemini-2.5-pro", "hugging_face_id": "", "name": "Google: Gemini 2.5 Pro", "created": 1750169544, @@ -3722,8 +3563,8 @@ "expiration_date": null }, { - "id": "google/gemini-3.1-pro-preview", - "canonical_slug": "google/gemini-3.1-pro-preview-06-05", + "id": "google/gemini-2.5-pro-preview", + "canonical_slug": "google/gemini-2.5-pro-preview-06-05", "hugging_face_id": "", "name": "Google: Gemini 2.5 Pro Preview 06-05", "created": 1749137257, @@ -3775,8 +3616,8 @@ "expiration_date": null }, { - "id": "google/gemini-3.1-pro-preview-05-06", - "canonical_slug": "google/gemini-3.1-pro-preview-03-25", + "id": "google/gemini-2.5-pro-preview-05-06", + "canonical_slug": "google/gemini-2.5-pro-preview-03-25", "hugging_face_id": "", "name": "Google: Gemini 2.5 Pro Preview 05-06", "created": 1746578513, @@ -4000,7 +3841,7 @@ "top_p": null, "frequency_penalty": null }, - "expiration_date": null + "expiration_date": "2026-03-09" }, { "id": "google/gemma-2-27b-it", @@ -4106,13 +3947,12 @@ "instruct_type": "gemma" }, "pricing": { - "prompt": "0.00000003", - "completion": "0.0000001", - "input_cache_read": "0.000000015" + "prompt": "0.00000004", + "completion": "0.00000013" }, "top_provider": { "context_length": 131072, - "max_completion_tokens": 131072, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null, @@ -4283,7 +4123,7 @@ "name": "Google: Gemma 3 4B", "created": 1741905510, "description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities, including structured outputs and function calling.", - "context_length": 96000, + "context_length": 131072, "architecture": { "modality": "text+image->text", "input_modalities": [ @@ -4297,11 +4137,11 @@ "instruct_type": "gemma" }, "pricing": { - "prompt": "0.00000001703012", - "completion": "0.0000000681536" + "prompt": "0.00000004", + "completion": "0.00000008" }, "top_provider": { - "context_length": 96000, + "context_length": 131072, "max_completion_tokens": null, "is_moderated": false }, @@ -5088,7 +4928,11 @@ "per_request_limits": null, "supported_parameters": [ "max_tokens", + "response_format", + "structured_outputs", "temperature", + "tool_choice", + "tools", "top_p" ], "default_parameters": {}, @@ -5412,7 +5256,6 @@ "per_request_limits": null, "supported_parameters": [ "frequency_penalty", - "logit_bias", "max_tokens", "min_p", "presence_penalty", @@ -5792,7 +5635,7 @@ "top_p" ], "default_parameters": {}, - "expiration_date": null + "expiration_date": "2026-02-25" }, { "id": "meta-llama/llama-guard-3-8b", @@ -5936,7 +5779,7 @@ "name": "WizardLM-2 8x22B", "created": 1713225600, "description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models.\n\nIt is an instruct finetune of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b).\n\nTo read more about the model release, [click here](https://wizardlm.github.io/WizardLM2/).\n\n#moe", - "context_length": 65536, + "context_length": 65535, "architecture": { "modality": "text->text", "input_modalities": [ @@ -5949,22 +5792,20 @@ "instruct_type": "vicuna" }, "pricing": { - "prompt": "0.00000048", - "completion": "0.00000048" + "prompt": "0.00000062", + "completion": "0.00000062" }, "top_provider": { - "context_length": 65536, - "max_completion_tokens": 16384, + "context_length": 65535, + "max_completion_tokens": 8000, "is_moderated": false }, "per_request_limits": null, "supported_parameters": [ "frequency_penalty", "max_tokens", - "min_p", "presence_penalty", "repetition_penalty", - "response_format", "seed", "stop", "temperature", @@ -6009,7 +5850,11 @@ "temperature", "top_p" ], - "default_parameters": {}, + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, "expiration_date": null }, { @@ -6056,7 +5901,11 @@ "top_k", "top_p" ], - "default_parameters": {}, + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, "expiration_date": null }, { @@ -6215,47 +6064,73 @@ }, { "id": "minimax/minimax-m2.5", - "canonical_slug": "minimax/minimax-m2.5", - "hugging_face_id": "", + "canonical_slug": "minimax/minimax-m2.5-20260211", + "hugging_face_id": "MiniMaxAI/MiniMax-M2.5", "name": "MiniMax: MiniMax M2.5", - "created": 1771200000, - "description": "MiniMax M2.5 - advanced reasoning and coding model with agentic capabilities", + "created": 1770908502, + "description": "MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1 to extend into general office work, reaching fluency in generating and operating Word, Excel, and Powerpoint files, context switching between diverse software environments, and working across different agent and human teams. Scoring 80.2% on SWE-Bench Verified, 51.3% on Multi-SWE-Bench, and 76.3% on BrowseComp, M2.5 is also more token efficient than previous generations, having been trained to optimize its actions and output through planning.", "context_length": 196608, "architecture": { "modality": "text->text", - "input_modalities": ["text"], - "output_modalities": ["text"], + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], "tokenizer": "Other", "instruct_type": null }, "pricing": { - "prompt": "0.00000015", + "prompt": "0.000000295", "completion": "0.0000012", - "image": "0", - "request": "0", - "input_cache_read": "0", - "input_cache_write": "0", - "web_search": "0", - "internal_reasoning": "0" + "input_cache_read": "0.00000003" }, "top_provider": { "context_length": 196608, - "max_completion_tokens": 16384, + "max_completion_tokens": 196608, "is_moderated": false }, - "supported_parameters": ["tools", "temperature", "top_p", "max_tokens", "stream", "stop"], "per_request_limits": null, - "expiration_date": null - }, - { - "id": "mistralai/codestral-2508", - "canonical_slug": "mistralai/codestral-2508", - "hugging_face_id": "", - "name": "Mistral: Codestral 2508", - "created": 1754079630, - "description": "Mistral's cutting-edge language model for coding released end of July 2025. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation.\n\n[Blog Post](https://mistral.ai/news/codestral-25-08)", - "context_length": 256000, - "architecture": { + "supported_parameters": [ + "frequency_penalty", + "include_reasoning", + "logit_bias", + "logprobs", + "max_tokens", + "min_p", + "parallel_tool_calls", + "presence_penalty", + "reasoning", + "reasoning_effort", + "repetition_penalty", + "response_format", + "seed", + "stop", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_k", + "top_logprobs", + "top_p" + ], + "default_parameters": { + "temperature": 1, + "top_p": 0.95, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "mistralai/codestral-2508", + "canonical_slug": "mistralai/codestral-2508", + "hugging_face_id": "", + "name": "Mistral: Codestral 2508", + "created": 1754079630, + "description": "Mistral's cutting-edge language model for coding released end of July 2025. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation.\n\n[Blog Post](https://mistral.ai/news/codestral-25-08)", + "context_length": 256000, + "architecture": { "modality": "text->text", "input_modalities": [ "text" @@ -6314,13 +6189,12 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000005", - "completion": "0.00000022", - "input_cache_read": "0.000000025" + "prompt": "0.0000004", + "completion": "0.000002" }, "top_provider": { "context_length": 262144, - "max_completion_tokens": 65536, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null, @@ -6328,7 +6202,6 @@ "frequency_penalty", "max_tokens", "presence_penalty", - "repetition_penalty", "response_format", "seed", "stop", @@ -6336,7 +6209,6 @@ "temperature", "tool_choice", "tools", - "top_k", "top_p" ], "default_parameters": { @@ -6472,11 +6344,8 @@ "per_request_limits": null, "supported_parameters": [ "frequency_penalty", - "logit_bias", "max_tokens", - "min_p", "presence_penalty", - "repetition_penalty", "response_format", "seed", "stop", @@ -6484,7 +6353,6 @@ "temperature", "tool_choice", "tools", - "top_k", "top_p" ], "default_parameters": { @@ -6494,53 +6362,6 @@ }, "expiration_date": null }, - { - "id": "mistralai/ministral-3b", - "canonical_slug": "mistralai/ministral-3b", - "hugging_face_id": null, - "name": "Mistral: Ministral 3B", - "created": 1729123200, - "description": "Ministral 3B is a 3B parameter model optimized for on-device and edge computing. It excels in knowledge, commonsense reasoning, and function-calling, outperforming larger models like Mistral 7B on most benchmarks. Supporting up to 128k context length, it’s ideal for orchestrating agentic workflows and specialist tasks with efficient inference.", - "context_length": 131072, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Mistral", - "instruct_type": null - }, - "pricing": { - "prompt": "0.00000004", - "completion": "0.00000004" - }, - "top_provider": { - "context_length": 131072, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "max_tokens", - "presence_penalty", - "response_format", - "seed", - "stop", - "structured_outputs", - "temperature", - "tool_choice", - "tools", - "top_p" - ], - "default_parameters": { - "temperature": 0.3 - }, - "expiration_date": null - }, { "id": "mistralai/ministral-3b-2512", "canonical_slug": "mistralai/ministral-3b-2512", @@ -6591,53 +6412,6 @@ }, "expiration_date": null }, - { - "id": "mistralai/ministral-8b", - "canonical_slug": "mistralai/ministral-8b", - "hugging_face_id": null, - "name": "Mistral: Ministral 8B", - "created": 1729123200, - "description": "Ministral 8B is an 8B parameter model featuring a unique interleaved sliding-window attention pattern for faster, memory-efficient inference. Designed for edge use cases, it supports up to 128k context length and excels in knowledge and reasoning tasks. It outperforms peers in the sub-10B category, making it perfect for low-latency, privacy-first applications.", - "context_length": 131072, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Mistral", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000001", - "completion": "0.0000001" - }, - "top_provider": { - "context_length": 131072, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "max_tokens", - "presence_penalty", - "response_format", - "seed", - "stop", - "structured_outputs", - "temperature", - "tool_choice", - "tools", - "top_p" - ], - "default_parameters": { - "temperature": 0.3 - }, - "expiration_date": null - }, { "id": "mistralai/ministral-8b-2512", "canonical_slug": "mistralai/ministral-8b-2512", @@ -6778,52 +6552,6 @@ }, "expiration_date": null }, - { - "id": "mistralai/mistral-7b-instruct-v0.2", - "canonical_slug": "mistralai/mistral-7b-instruct-v0.2", - "hugging_face_id": "mistralai/Mistral-7B-Instruct-v0.2", - "name": "Mistral: Mistral 7B Instruct v0.2", - "created": 1703721600, - "description": "A high-performing, industry-standard 7.3B parameter model, with optimizations for speed and context length.\n\nAn improved version of [Mistral 7B Instruct](/modelsmistralai/mistral-7b-instruct-v0.1), with the following changes:\n\n- 32k context window (vs 8k context in v0.1)\n- Rope-theta = 1e6\n- No Sliding-Window Attention", - "context_length": 32768, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Mistral", - "instruct_type": "mistral" - }, - "pricing": { - "prompt": "0.0000002", - "completion": "0.0000002" - }, - "top_provider": { - "context_length": 32768, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "logit_bias", - "max_tokens", - "min_p", - "presence_penalty", - "repetition_penalty", - "stop", - "temperature", - "top_k", - "top_p" - ], - "default_parameters": { - "temperature": 0.3 - }, - "expiration_date": null - }, { "id": "mistralai/mistral-7b-instruct-v0.3", "canonical_slug": "mistralai/mistral-7b-instruct-v0.3", @@ -7293,7 +7021,6 @@ "response_format", "seed", "stop", - "structured_outputs", "temperature", "tool_choice", "tools", @@ -7314,7 +7041,7 @@ "name": "Mistral: Mistral Small 3.1 24B", "created": 1742238937, "description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and vision tasks, including image analysis, programming, mathematical reasoning, and multilingual support across dozens of languages. Equipped with an extensive 128k token context window and optimized for efficient local inference, it supports use cases such as conversational agents, function calling, long-document comprehension, and privacy-sensitive deployments. The updated version is [Mistral Small 3.2](mistralai/mistral-small-3.2-24b-instruct)", - "context_length": 131072, + "context_length": 128000, "architecture": { "modality": "text+image->text", "input_modalities": [ @@ -7328,13 +7055,12 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000003", - "completion": "0.00000011", - "input_cache_read": "0.000000015" + "prompt": "0.00000035", + "completion": "0.00000056" }, "top_provider": { - "context_length": 131072, - "max_completion_tokens": 131072, + "context_length": 128000, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null, @@ -7343,13 +7069,8 @@ "max_tokens", "presence_penalty", "repetition_penalty", - "response_format", "seed", - "stop", - "structured_outputs", "temperature", - "tool_choice", - "tools", "top_k", "top_p" ], @@ -7499,53 +7220,6 @@ }, "expiration_date": null }, - { - "id": "mistralai/mistral-tiny", - "canonical_slug": "mistralai/mistral-tiny", - "hugging_face_id": null, - "name": "Mistral Tiny", - "created": 1704844800, - "description": "Note: This model is being deprecated. Recommended replacement is the newer [Ministral 8B](/mistral/ministral-8b)\n\nThis model is currently powered by Mistral-7B-v0.2, and incorporates a \"better\" fine-tuning than [Mistral 7B](/models/mistralai/mistral-7b-instruct-v0.1), inspired by community work. It's best used for large batch processing tasks where cost is a significant factor but reasoning capabilities are not crucial.", - "context_length": 32768, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Mistral", - "instruct_type": null - }, - "pricing": { - "prompt": "0.00000025", - "completion": "0.00000025" - }, - "top_provider": { - "context_length": 32768, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "max_tokens", - "presence_penalty", - "response_format", - "seed", - "stop", - "structured_outputs", - "temperature", - "tool_choice", - "tools", - "top_p" - ], - "default_parameters": { - "temperature": 0.3 - }, - "expiration_date": null - }, { "id": "mistralai/mixtral-8x22b-instruct", "canonical_slug": "mistralai/mixtral-8x22b-instruct", @@ -7644,13 +7318,13 @@ "expiration_date": null }, { - "id": "mistralai/pixtral-12b", - "canonical_slug": "mistralai/pixtral-12b", - "hugging_face_id": "mistralai/Pixtral-12B-2409", - "name": "Mistral: Pixtral 12B", - "created": 1725926400, - "description": "The first multi-modal, text+image-to-text model from Mistral AI. Its weights were launched via torrent: https://x.com/mistralai/status/1833758285167722836.", - "context_length": 32768, + "id": "mistralai/pixtral-large-2411", + "canonical_slug": "mistralai/pixtral-large-2411", + "hugging_face_id": "", + "name": "Mistral: Pixtral Large 2411", + "created": 1731977388, + "description": "Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411). The model is able to understand documents, charts and natural images.\n\nThe model is available under the Mistral Research License (MRL) for research and educational use, and the Mistral Commercial License for experimentation, testing, and production for commercial purposes.\n\n", + "context_length": 131072, "architecture": { "modality": "text+image->text", "input_modalities": [ @@ -7664,63 +7338,11 @@ "instruct_type": null }, "pricing": { - "prompt": "0.0000001", - "completion": "0.0000001" + "prompt": "0.000002", + "completion": "0.000006" }, "top_provider": { - "context_length": 32768, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "logit_bias", - "max_tokens", - "min_p", - "presence_penalty", - "repetition_penalty", - "response_format", - "seed", - "stop", - "structured_outputs", - "temperature", - "tool_choice", - "tools", - "top_k", - "top_p" - ], - "default_parameters": { - "temperature": 0.3 - }, - "expiration_date": null - }, - { - "id": "mistralai/pixtral-large-2411", - "canonical_slug": "mistralai/pixtral-large-2411", - "hugging_face_id": "", - "name": "Mistral: Pixtral Large 2411", - "created": 1731977388, - "description": "Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411). The model is able to understand documents, charts and natural images.\n\nThe model is available under the Mistral Research License (MRL) for research and educational use, and the Mistral Commercial License for experimentation, testing, and production for commercial purposes.\n\n", - "context_length": 131072, - "architecture": { - "modality": "text+image->text", - "input_modalities": [ - "text", - "image" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Mistral", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000002", - "completion": "0.000006" - }, - "top_provider": { - "context_length": 131072, + "context_length": 131072, "max_completion_tokens": null, "is_moderated": false }, @@ -7801,7 +7423,7 @@ "name": "MoonshotAI: Kimi K2 0711", "created": 1752263252, "description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. Kimi K2 excels across a broad range of benchmarks, particularly in coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) tasks. It supports long-context inference up to 128K tokens and is designed with a novel training stack that includes the MuonClip optimizer for stable large-scale MoE training.", - "context_length": 131072, + "context_length": 131000, "architecture": { "modality": "text->text", "input_modalities": [ @@ -7814,11 +7436,11 @@ "instruct_type": null }, "pricing": { - "prompt": "0.0000005", - "completion": "0.0000024" + "prompt": "0.00000055", + "completion": "0.0000022" }, "top_provider": { - "context_length": 131072, + "context_length": 131000, "max_completion_tokens": null, "is_moderated": false }, @@ -7851,7 +7473,7 @@ "name": "MoonshotAI: Kimi K2 0905", "created": 1757021147, "description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It supports long-context inference up to 256k tokens, extended from the previous 128k.\n\nThis update improves agentic coding with higher accuracy and better generalization across scaffolds, and enhances frontend coding with more aesthetic and functional outputs for web, 3D, and related tasks. Kimi K2 is optimized for agentic capabilities, including advanced tool use, reasoning, and code synthesis. It excels across coding (LiveCodeBench, SWE-bench), reasoning (ZebraLogic, GPQA), and tool-use (Tau2, AceBench) benchmarks. The model is trained with a novel stack incorporating the MuonClip optimizer for stable large-scale MoE training.", - "context_length": 262144, + "context_length": 131072, "architecture": { "modality": "text->text", "input_modalities": [ @@ -7864,13 +7486,13 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000039", - "completion": "0.0000019", - "input_cache_read": "0.000000195" + "prompt": "0.0000004", + "completion": "0.000002", + "input_cache_read": "0.00000015" }, "top_provider": { - "context_length": 262144, - "max_completion_tokens": 262144, + "context_length": 131072, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null, @@ -7948,7 +7570,7 @@ "name": "MoonshotAI: Kimi K2 Thinking", "created": 1762440622, "description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in Kimi K2, it activates 32 billion parameters per forward pass and supports 256 k-token context windows. The model is optimized for persistent step-by-step thought, dynamic tool invocation, and complex reasoning workflows that span hundreds of turns. It interleaves step-by-step reasoning with tool use, enabling autonomous research, coding, and writing that can persist for hundreds of sequential actions without drift.\n\nIt sets new open-source benchmarks on HLE, BrowseComp, SWE-Multilingual, and LiveCodeBench, while maintaining stable multi-agent behavior through 200–300 tool calls. Built on a large-scale MoE architecture with MuonClip optimization, it combines strong reasoning depth with high inference efficiency for demanding agentic and analytical tasks.", - "context_length": 262144, + "context_length": 131072, "architecture": { "modality": "text->text", "input_modalities": [ @@ -7961,13 +7583,13 @@ "instruct_type": null }, "pricing": { - "prompt": "0.0000004", - "completion": "0.00000175", - "input_cache_read": "0.0000002" + "prompt": "0.00000047", + "completion": "0.000002", + "input_cache_read": "0.000000141" }, "top_provider": { - "context_length": 262144, - "max_completion_tokens": 65535, + "context_length": 131072, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null, @@ -7975,7 +7597,6 @@ "frequency_penalty", "include_reasoning", "logit_bias", - "logprobs", "max_tokens", "min_p", "presence_penalty", @@ -7989,7 +7610,6 @@ "tool_choice", "tools", "top_k", - "top_logprobs", "top_p" ], "default_parameters": { @@ -8021,12 +7641,12 @@ }, "pricing": { "prompt": "0.00000045", - "completion": "0.00000225", - "input_cache_read": "0.000000070000002" + "completion": "0.0000022", + "input_cache_read": "0.000000225" }, "top_provider": { "context_length": 262144, - "max_completion_tokens": null, + "max_completion_tokens": 65535, "is_moderated": false }, "per_request_limits": null, @@ -8037,8 +7657,10 @@ "logprobs", "max_tokens", "min_p", + "parallel_tool_calls", "presence_penalty", "reasoning", + "reasoning_effort", "repetition_penalty", "response_format", "seed", @@ -8270,56 +7892,6 @@ }, "expiration_date": null }, - { - "id": "nousresearch/deephermes-3-mistral-24b-preview", - "canonical_slug": "nousresearch/deephermes-3-mistral-24b-preview", - "hugging_face_id": "NousResearch/DeepHermes-3-Mistral-24B-Preview", - "name": "Nous: DeepHermes 3 Mistral 24B Preview", - "created": 1746830904, - "description": "DeepHermes 3 (Mistral 24B Preview) is an instruction-tuned language model by Nous Research based on Mistral-Small-24B, designed for chat, function calling, and advanced multi-turn reasoning. It introduces a dual-mode system that toggles between intuitive chat responses and structured “deep reasoning” mode using special system prompts. Fine-tuned via distillation from R1, it supports structured output (JSON mode) and function call syntax for agent-based applications.\n\nDeepHermes 3 supports a **reasoning toggle via system prompt**, allowing users to switch between fast, intuitive responses and deliberate, multi-step reasoning. When activated with the following specific system instruction, the model enters a *\"deep thinking\"* mode—generating extended chains of thought wrapped in `` tags before delivering a final answer. \n\nSystem Prompt: You are a deep thinking AI, you may use extremely long chains of thought to deeply consider the problem and deliberate with yourself via systematic reasoning processes to help come to a correct solution prior to answering. You should enclose your thoughts and internal monologue inside tags, and then provide your solution or response to the problem.\n", - "context_length": 32768, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Other", - "instruct_type": null - }, - "pricing": { - "prompt": "0.00000002", - "completion": "0.0000001", - "input_cache_read": "0.00000001" - }, - "top_provider": { - "context_length": 32768, - "max_completion_tokens": 32768, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "include_reasoning", - "max_tokens", - "presence_penalty", - "reasoning", - "repetition_penalty", - "response_format", - "seed", - "stop", - "structured_outputs", - "temperature", - "tool_choice", - "tools", - "top_k", - "top_p" - ], - "default_parameters": {}, - "expiration_date": null - }, { "id": "nousresearch/hermes-2-pro-llama-3-8b", "canonical_slug": "nousresearch/hermes-2-pro-llama-3-8b", @@ -8561,13 +8133,12 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000011", - "completion": "0.00000038", - "input_cache_read": "0.000000055" + "prompt": "0.00000013", + "completion": "0.0000004" }, "top_provider": { "context_length": 131072, - "max_completion_tokens": 131072, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null, @@ -8579,12 +8150,7 @@ "reasoning", "repetition_penalty", "response_format", - "seed", - "stop", - "structured_outputs", "temperature", - "tool_choice", - "tools", "top_k", "top_p" ], @@ -8638,51 +8204,6 @@ "default_parameters": {}, "expiration_date": null }, - { - "id": "nvidia/llama-3.1-nemotron-ultra-253b-v1", - "canonical_slug": "nvidia/llama-3.1-nemotron-ultra-253b-v1", - "hugging_face_id": "nvidia/Llama-3_1-Nemotron-Ultra-253B-v1", - "name": "NVIDIA: Llama 3.1 Nemotron Ultra 253B v1", - "created": 1744115059, - "description": "Llama-3.1-Nemotron-Ultra-253B-v1 is a large language model (LLM) optimized for advanced reasoning, human-interactive chat, retrieval-augmented generation (RAG), and tool-calling tasks. Derived from Meta’s Llama-3.1-405B-Instruct, it has been significantly customized using Neural Architecture Search (NAS), resulting in enhanced efficiency, reduced memory usage, and improved inference latency. The model supports a context length of up to 128K tokens and can operate efficiently on an 8x NVIDIA H100 node.\n\nNote: you must include `detailed thinking on` in the system prompt to enable reasoning. Please see [Usage Recommendations](https://huggingface.co/nvidia/Llama-3_1-Nemotron-Ultra-253B-v1#quick-start-and-usage-recommendations) for more.", - "context_length": 131072, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Llama3", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000006", - "completion": "0.0000018" - }, - "top_provider": { - "context_length": 131072, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "include_reasoning", - "max_tokens", - "presence_penalty", - "reasoning", - "repetition_penalty", - "response_format", - "structured_outputs", - "temperature", - "top_k", - "top_p" - ], - "default_parameters": {}, - "expiration_date": null - }, { "id": "nvidia/llama-3.3-nemotron-super-49b-v1.5", "canonical_slug": "nvidia/llama-3.3-nemotron-super-49b-v1.5", @@ -8738,7 +8259,7 @@ "hugging_face_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "name": "NVIDIA: Nemotron 3 Nano 30B A3B", "created": 1765731275, - "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.\n\nNote: For the free endpoint, all prompts and output are logged to improve the provider's model and its product and services. Please do not upload any personal, confidential, or otherwise sensitive information. This is a trial use only. Do not use for production or business-critical systems.", + "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.", "context_length": 262144, "architecture": { "modality": "text->text", @@ -8772,7 +8293,6 @@ "response_format", "seed", "stop", - "structured_outputs", "temperature", "tool_choice", "tools", @@ -8792,7 +8312,7 @@ "hugging_face_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", "name": "NVIDIA: Nemotron 3 Nano 30B A3B (free)", "created": 1765731275, - "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.\n\nNote: For the free endpoint, all prompts and output are logged to improve the provider's model and its product and services. Please do not upload any personal, confidential, or otherwise sensitive information. This is a trial use only. Do not use for production or business-critical systems.", + "description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems.\n\nThe model is fully open with open-weights, datasets and recipes so developers can easily\ncustomize, optimize, and deploy the model on their infrastructure for maximum privacy and\nsecurity.", "context_length": 256000, "architecture": { "modality": "text->text", @@ -8980,7 +8500,11 @@ "top_k", "top_p" ], - "default_parameters": {}, + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, "expiration_date": null }, { @@ -9024,55 +8548,12 @@ "tools", "top_p" ], - "default_parameters": {}, - "expiration_date": null - }, - { - "id": "openai/chatgpt-4o-latest", - "canonical_slug": "openai/chatgpt-4o-latest", - "hugging_face_id": null, - "name": "OpenAI: ChatGPT-4o", - "created": 1723593600, - "description": "OpenAI ChatGPT 4o is continually updated by OpenAI to point to the current version of GPT-4o used by ChatGPT. It therefore differs slightly from the API version of [GPT-4o](/models/openai/gpt-4o) in that it has additional RLHF. It is intended for research and evaluation.\n\nOpenAI notes that this model is not suited for production use-cases as it may be removed or redirected to another model in the future.", - "context_length": 128000, - "architecture": { - "modality": "text+image->text", - "input_modalities": [ - "text", - "image" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "GPT", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000005", - "completion": "0.000015" - }, - "top_provider": { - "context_length": 128000, - "max_completion_tokens": 16384, - "is_moderated": true + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "logit_bias", - "logprobs", - "max_tokens", - "presence_penalty", - "response_format", - "seed", - "stop", - "structured_outputs", - "temperature", - "top_logprobs", - "top_p" - ], - "default_parameters": {}, - "expiration_date": "2026-02-17" + "expiration_date": null }, { "id": "openai/gpt-3.5-turbo", @@ -10911,20 +10392,14 @@ }, "per_request_limits": null, "supported_parameters": [ - "frequency_penalty", "include_reasoning", - "logit_bias", - "logprobs", "max_tokens", - "presence_penalty", "reasoning", "response_format", "seed", - "stop", "structured_outputs", "tool_choice", - "tools", - "top_logprobs" + "tools" ], "default_parameters": { "temperature": null, @@ -10934,17 +10409,19 @@ "expiration_date": null }, { - "id": "openai/gpt-5.3-codex-spark", - "canonical_slug": "openai/gpt-5.3-codex-spark-20260212", + "id": "openai/gpt-5.2-pro", + "canonical_slug": "openai/gpt-5.2-pro-20251211", "hugging_face_id": "", - "name": "OpenAI: GPT-5.3-Codex-Spark", - "created": 1739491200, - "description": "GPT-5.3-Codex-Spark is a smaller, faster distillation of GPT-5.3-Codex optimized for real-time coding on Cerebras WSE-3 hardware. CLI-only (no API). 128k context, text-only, 1000+ tok/s.", - "context_length": 128000, + "name": "OpenAI: GPT-5.2 Pro", + "created": 1765389780, + "description": "GPT-5.2 Pro is OpenAI’s most advanced model, offering major improvements in agentic coding and long context performance over GPT-5 Pro. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", + "context_length": 400000, "architecture": { - "modality": "text->text", + "modality": "text+image+file->text", "input_modalities": [ - "text" + "image", + "text", + "file" ], "output_modalities": [ "text" @@ -10953,63 +10430,23 @@ "instruct_type": null }, "pricing": { - "prompt": "0", - "completion": "0" + "prompt": "0.000021", + "completion": "0.000168", + "web_search": "0.01" }, "top_provider": { - "context_length": 128000, - "max_completion_tokens": 16384, - "is_moderated": false + "context_length": 400000, + "max_completion_tokens": 128000, + "is_moderated": true }, "per_request_limits": null, "supported_parameters": [ + "include_reasoning", "max_tokens", "reasoning", - "tool_choice", - "tools" - ], - "default_parameters": {}, - "expiration_date": null - }, - { - "id": "openai/gpt-5.2-pro", - "canonical_slug": "openai/gpt-5.2-pro-20251211", - "hugging_face_id": "", - "name": "OpenAI: GPT-5.2 Pro", - "created": 1765389780, - "description": "GPT-5.2 Pro is OpenAI’s most advanced model, offering major improvements in agentic coding and long context performance over GPT-5 Pro. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy in high-stakes use cases. It supports test-time routing features and advanced prompt understanding, including user-specified intent like \"think hard about this.\" Improvements include reductions in hallucination, sycophancy, and better performance in coding, writing, and health-related tasks.", - "context_length": 400000, - "architecture": { - "modality": "text+image+file->text", - "input_modalities": [ - "image", - "text", - "file" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "GPT", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000021", - "completion": "0.000168", - "web_search": "0.01" - }, - "top_provider": { - "context_length": 400000, - "max_completion_tokens": 128000, - "is_moderated": true - }, - "per_request_limits": null, - "supported_parameters": [ - "include_reasoning", - "max_tokens", - "reasoning", - "response_format", - "seed", - "structured_outputs", + "response_format", + "seed", + "structured_outputs", "tool_choice", "tools" ], @@ -11915,60 +11352,13 @@ }, "expiration_date": null }, - { - "id": "opengvlab/internvl3-78b", - "canonical_slug": "opengvlab/internvl3-78b", - "hugging_face_id": "OpenGVLab/InternVL3-78B", - "name": "OpenGVLab: InternVL3 78B", - "created": 1757962555, - "description": "The InternVL3 series is an advanced multimodal large language model (MLLM). Compared to InternVL 2.5, InternVL3 demonstrates stronger multimodal perception and reasoning capabilities. \n\nIn addition, InternVL3 is benchmarked against the Qwen2.5 Chat models, whose pre-trained base models serve as the initialization for its language component. Benefiting from Native Multimodal Pre-Training, the InternVL3 series surpasses the Qwen2.5 series in overall text performance.", - "context_length": 32768, - "architecture": { - "modality": "text+image->text", - "input_modalities": [ - "image", - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Other", - "instruct_type": null - }, - "pricing": { - "prompt": "0.00000015", - "completion": "0.0000006", - "input_cache_read": "0.000000075" - }, - "top_provider": { - "context_length": 32768, - "max_completion_tokens": 32768, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "max_tokens", - "presence_penalty", - "repetition_penalty", - "response_format", - "seed", - "stop", - "structured_outputs", - "temperature", - "top_k", - "top_p" - ], - "default_parameters": {}, - "expiration_date": null - }, { "id": "openrouter/auto", "canonical_slug": "openrouter/auto", "hugging_face_id": null, "name": "Auto Router", "created": 1699401600, - "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nLearn more, including how to customize the models for routing, in our [docs](/docs/guides/routing/routers/auto-router).\n\nRequests will be routed to the following models:\n- [anthropic/claude-3-haiku](/anthropic/claude-3-haiku)\n- [anthropic/claude-3.5-haiku](/anthropic/claude-3.5-haiku)\n- [anthropic/claude-3.7-sonnet](/anthropic/claude-3.7-sonnet)\n- [anthropic/claude-haiku-4.5](/anthropic/claude-haiku-4.5)\n- [anthropic/claude-opus-4](/anthropic/claude-opus-4)\n- [anthropic/claude-opus-4.1](/anthropic/claude-opus-4.1)\n- [anthropic/claude-opus-4.5](/anthropic/claude-opus-4.5)\n- [anthropic/claude-sonnet-4](/anthropic/claude-sonnet-4)\n- [anthropic/claude-sonnet-4.5](/anthropic/claude-sonnet-4.5)\n- [cohere/command-r-08-2024](/cohere/command-r-08-2024)\n- [cohere/command-r-plus-08-2024](/cohere/command-r-plus-08-2024)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [google/gemini-2.0-flash-001](/google/gemini-2.0-flash-001)\n- [google/gemini-2.5-flash](/google/gemini-2.5-flash)\n- [google/gemini-3.1-pro-preview](/google/gemini-3.1-pro-preview)\n- [google/gemini-3-flash-preview](/google/gemini-3-flash-preview)\n- [google/gemini-3-pro-preview](/google/gemini-3-pro-preview)\n- [meta-llama/llama-3-70b-instruct](/meta-llama/llama-3-70b-instruct)\n- [meta-llama/llama-3-8b-instruct](/meta-llama/llama-3-8b-instruct)\n- [meta-llama/llama-3.1-405b-instruct](/meta-llama/llama-3.1-405b-instruct)\n- [meta-llama/llama-3.1-70b-instruct](/meta-llama/llama-3.1-70b-instruct)\n- [meta-llama/llama-3.1-8b-instruct](/meta-llama/llama-3.1-8b-instruct)\n- [meta-llama/llama-3.3-70b-instruct](/meta-llama/llama-3.3-70b-instruct)\n- [mistralai/codestral-2508](/mistralai/codestral-2508)\n- [mistralai/mistral-7b-instruct](/mistralai/mistral-7b-instruct)\n- [mistralai/mistral-large](/mistralai/mistral-large)\n- [mistralai/mistral-large-2407](/mistralai/mistral-large-2407)\n- [mistralai/mistral-large-2411](/mistralai/mistral-large-2411)\n- [mistralai/mistral-medium-3.1](/mistralai/mistral-medium-3.1)\n- [mistralai/mistral-nemo](/mistralai/mistral-nemo)\n- [mistralai/mistral-small-3.2-24b-instruct-2506](/mistralai/mistral-small-3.2-24b-instruct-2506)\n- [mistralai/mixtral-8x22b-instruct](/mistralai/mixtral-8x22b-instruct)\n- [mistralai/mixtral-8x7b-instruct](/mistralai/mixtral-8x7b-instruct)\n- [moonshotai/kimi-k2-thinking](/moonshotai/kimi-k2-thinking)\n- [openai/chatgpt-4o-latest](/openai/chatgpt-4o-latest)\n- [openai/gpt-3.5-turbo](/openai/gpt-3.5-turbo)\n- [openai/gpt-4](/openai/gpt-4)\n- [openai/gpt-4-1106-preview](/openai/gpt-4-1106-preview)\n- [openai/gpt-4-turbo](/openai/gpt-4-turbo)\n- [openai/gpt-4-turbo-preview](/openai/gpt-4-turbo-preview)\n- [openai/gpt-4.1](/openai/gpt-4.1)\n- [openai/gpt-4.1-mini](/openai/gpt-4.1-mini)\n- [openai/gpt-4.1-nano](/openai/gpt-4.1-nano)\n- [openai/gpt-4o](/openai/gpt-4o)\n- [openai/gpt-4o-2024-05-13](/openai/gpt-4o-2024-05-13)\n- [openai/gpt-4o-2024-08-06](/openai/gpt-4o-2024-08-06)\n- [openai/gpt-4o-2024-11-20](/openai/gpt-4o-2024-11-20)\n- [openai/gpt-4o-mini](/openai/gpt-4o-mini)\n- [openai/gpt-4o-mini-2024-07-18](/openai/gpt-4o-mini-2024-07-18)\n- [openai/gpt-5](/openai/gpt-5)\n- [openai/gpt-5-mini](/openai/gpt-5-mini)\n- [openai/gpt-5-nano](/openai/gpt-5-nano)\n- [openai/gpt-5.1](/openai/gpt-5.1)\n- [openai/gpt-5.2](/openai/gpt-5.2)\n- [openai/gpt-5.2-pro](/openai/gpt-5.2-pro)\n- [openai/gpt-oss-120b](/openai/gpt-oss-120b)\n- [perplexity/sonar](/perplexity/sonar)\n- [qwen/qwen3-14b](/qwen/qwen3-14b)\n- [qwen/qwen3-235b-a22b](/qwen/qwen3-235b-a22b)\n- [qwen/qwen3-32b](/qwen/qwen3-32b)\n- [x-ai/grok-3](/x-ai/grok-3)\n- [x-ai/grok-3-mini](/x-ai/grok-3-mini)\n- [x-ai/grok-4](/x-ai/grok-4)", + "description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output.\n\nTo see which model was used, visit [Activity](/activity), or read the `model` attribute of the response. Your response will be priced at the same rate as the routed model.\n\nLearn more, including how to customize the models for routing, in our [docs](/docs/guides/routing/routers/auto-router).\n\nRequests will be routed to the following models:\n- [anthropic/claude-haiku-4.5](/anthropic/claude-haiku-4.5)\n- [anthropic/claude-opus-4.6](/anthropic/claude-opus-4.6)\n- [anthropic/claude-sonnet-4.5](/anthropic/claude-sonnet-4.5)\n- [deepseek/deepseek-r1](/deepseek/deepseek-r1)\n- [google/gemini-2.5-flash-lite](/google/gemini-2.5-flash-lite)\n- [google/gemini-3-flash-preview](/google/gemini-3-flash-preview)\n- [google/gemini-3-pro-preview](/google/gemini-3-pro-preview)\n- [meta-llama/llama-3.3-70b-instruct](/meta-llama/llama-3.3-70b-instruct)\n- [mistralai/codestral-2508](/mistralai/codestral-2508)\n- [mistralai/mistral-large](/mistralai/mistral-large)\n- [mistralai/mistral-medium-3.1](/mistralai/mistral-medium-3.1)\n- [mistralai/mistral-small-3.2-24b-instruct-2506](/mistralai/mistral-small-3.2-24b-instruct-2506)\n- [moonshotai/kimi-k2-thinking](/moonshotai/kimi-k2-thinking)\n- [moonshotai/kimi-k2.5](/moonshotai/kimi-k2.5)\n- [openai/gpt-5](/openai/gpt-5)\n- [openai/gpt-5-mini](/openai/gpt-5-mini)\n- [openai/gpt-5-nano](/openai/gpt-5-nano)\n- [openai/gpt-5.1](/openai/gpt-5.1)\n- [openai/gpt-5.2](/openai/gpt-5.2)\n- [openai/gpt-5.2-pro](/openai/gpt-5.2-pro)\n- [openai/gpt-oss-120b](/openai/gpt-oss-120b)\n- [perplexity/sonar](/perplexity/sonar)\n- [qwen/qwen3-235b-a22b](/qwen/qwen3-235b-a22b)\n- [x-ai/grok-3](/x-ai/grok-3)\n- [x-ai/grok-3-mini](/x-ai/grok-3-mini)\n- [x-ai/grok-4](/x-ai/grok-4)", "context_length": 2000000, "architecture": { "modality": "text+image+file+audio+video->text+image", @@ -12118,56 +11508,6 @@ }, "expiration_date": null }, - { - "id": "openrouter/pony-alpha", - "canonical_slug": "openrouter/pony-alpha", - "hugging_face_id": "", - "name": "Pony Alpha", - "created": 1770393855, - "description": "Pony is a cutting-edge foundation model with strong performance in coding, agentic workflows, reasoning, and roleplay, making it well suited for hands-on coding and real-world use.\n\n**Note:** All prompts and completions for this model are logged by the provider and may be used to improve the model.", - "context_length": 200000, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Other", - "instruct_type": null - }, - "pricing": { - "prompt": "0", - "completion": "0", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" - }, - "top_provider": { - "context_length": 200000, - "max_completion_tokens": 131000, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "include_reasoning", - "max_tokens", - "reasoning", - "response_format", - "structured_outputs", - "temperature", - "tools", - "top_p" - ], - "default_parameters": { - "temperature": 1, - "top_p": 0.95, - "frequency_penalty": null - }, - "expiration_date": null - }, { "id": "perplexity/sonar", "canonical_slug": "perplexity/sonar", @@ -12191,10 +11531,7 @@ "pricing": { "prompt": "0.000001", "completion": "0.000001", - "request": "0.005", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" + "web_search": "0.005" }, "top_provider": { "context_length": 127072, @@ -12236,8 +11573,6 @@ "pricing": { "prompt": "0.000002", "completion": "0.000008", - "request": "0", - "image": "0", "web_search": "0.005", "internal_reasoning": "0.000003" }, @@ -12284,10 +11619,7 @@ "pricing": { "prompt": "0.000003", "completion": "0.000015", - "request": "0", - "image": "0", - "web_search": "0.005", - "internal_reasoning": "0" + "web_search": "0.005" }, "top_provider": { "context_length": 200000, @@ -12330,10 +11662,7 @@ "pricing": { "prompt": "0.000003", "completion": "0.000015", - "request": "0.018", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" + "web_search": "0.018" }, "top_provider": { "context_length": 200000, @@ -12383,10 +11712,7 @@ "pricing": { "prompt": "0.000002", "completion": "0.000008", - "request": "0", - "image": "0", - "web_search": "0.005", - "internal_reasoning": "0" + "web_search": "0.005" }, "top_provider": { "context_length": 128000, @@ -12493,7 +11819,6 @@ "per_request_limits": null, "supported_parameters": [ "frequency_penalty", - "logit_bias", "max_tokens", "min_p", "presence_penalty", @@ -12547,6 +11872,7 @@ "min_p", "presence_penalty", "repetition_penalty", + "response_format", "seed", "stop", "temperature", @@ -12582,13 +11908,12 @@ "instruct_type": "chatml" }, "pricing": { - "prompt": "0.00000003", - "completion": "0.00000011", - "input_cache_read": "0.000000015" + "prompt": "0.00000020000000000000002", + "completion": "0.00000020000000000000002" }, "top_provider": { "context_length": 32768, - "max_completion_tokens": 32768, + "max_completion_tokens": 8192, "is_moderated": false }, "per_request_limits": null, @@ -12599,10 +11924,8 @@ "min_p", "presence_penalty", "repetition_penalty", - "response_format", "seed", "stop", - "structured_outputs", "temperature", "top_k", "top_p" @@ -12631,8 +11954,8 @@ "instruct_type": null }, "pricing": { - "prompt": "0.0000002", - "completion": "0.0000002" + "prompt": "0.00000020000000000000002", + "completion": "0.00000020000000000000002" }, "top_provider": { "context_length": 32768, @@ -12678,11 +12001,7 @@ "pricing": { "prompt": "0.0000016", "completion": "0.0000064", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0", - "input_cache_read": "0.00000064" + "input_cache_read": "0.00000032" }, "top_provider": { "context_length": 32768, @@ -12710,7 +12029,7 @@ "name": "Qwen: Qwen-Plus", "created": 1738409840, "description": "Qwen-Plus, based on the Qwen2.5 foundation model, is a 131K context model with a balanced performance, speed, and cost combination.", - "context_length": 131072, + "context_length": 1000000, "architecture": { "modality": "text->text", "input_modalities": [ @@ -12725,15 +12044,11 @@ "pricing": { "prompt": "0.0000004", "completion": "0.0000012", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0", - "input_cache_read": "0.00000016" + "input_cache_read": "0.00000008" }, "top_provider": { - "context_length": 131072, - "max_completion_tokens": 8192, + "context_length": 1000000, + "max_completion_tokens": 32768, "is_moderated": false }, "per_request_limits": null, @@ -12771,11 +12086,7 @@ }, "pricing": { "prompt": "0.0000004", - "completion": "0.0000012", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" + "completion": "0.0000012" }, "top_provider": { "context_length": 1000000, @@ -12822,11 +12133,7 @@ }, "pricing": { "prompt": "0.0000004", - "completion": "0.000004", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" + "completion": "0.0000012" }, "top_provider": { "context_length": 1000000, @@ -12861,7 +12168,7 @@ "name": "Qwen: Qwen-Turbo", "created": 1738410974, "description": "Qwen-Turbo, based on Qwen2.5, is a 1M context model that provides fast speed and low cost, suitable for simple tasks.", - "context_length": 1000000, + "context_length": 131072, "architecture": { "modality": "text->text", "input_modalities": [ @@ -12876,14 +12183,10 @@ "pricing": { "prompt": "0.00000005", "completion": "0.0000002", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0", - "input_cache_read": "0.00000002" + "input_cache_read": "0.00000001" }, "top_provider": { - "context_length": 1000000, + "context_length": 131072, "max_completion_tokens": 8192, "is_moderated": false }, @@ -12923,15 +12226,11 @@ }, "pricing": { "prompt": "0.0000008", - "completion": "0.0000032", - "request": "0", - "image": "0.001024", - "web_search": "0", - "internal_reasoning": "0" + "completion": "0.0000032" }, "top_provider": { "context_length": 131072, - "max_completion_tokens": 8192, + "max_completion_tokens": 32768, "is_moderated": false }, "per_request_limits": null, @@ -12959,7 +12258,7 @@ "name": "Qwen: Qwen VL Plus", "created": 1738731255, "description": "Qwen's Enhanced Large Visual Language Model. Significantly upgraded for detailed recognition capabilities and text recognition abilities, supporting ultra-high pixel resolutions up to millions of pixels and extreme aspect ratios for image input. It delivers significant performance across a broad range of visual tasks.\n", - "context_length": 7500, + "context_length": 131072, "architecture": { "modality": "text+image->text", "input_modalities": [ @@ -12975,14 +12274,11 @@ "pricing": { "prompt": "0.00000021", "completion": "0.00000063", - "request": "0", - "image": "0.0002688", - "web_search": "0", - "internal_reasoning": "0" + "input_cache_read": "0.000000042" }, "top_provider": { - "context_length": 7500, - "max_completion_tokens": 1500, + "context_length": 131072, + "max_completion_tokens": 8192, "is_moderated": false }, "per_request_limits": null, @@ -13047,7 +12343,7 @@ "name": "Qwen: Qwen2.5 VL 32B Instruct", "created": 1742839838, "description": "Qwen2.5-VL-32B is a multimodal vision-language model fine-tuned through reinforcement learning for enhanced mathematical reasoning, structured outputs, and visual problem-solving capabilities. It excels at visual analysis tasks, including object recognition, textual interpretation within images, and precise event localization in extended videos. Qwen2.5-VL-32B demonstrates state-of-the-art performance across multimodal benchmarks such as MMMU, MathVista, and VideoMME, while maintaining strong reasoning and clarity in text-based tasks like MMLU, mathematical problem-solving, and code generation.", - "context_length": 16384, + "context_length": 128000, "architecture": { "modality": "text+image->text", "input_modalities": [ @@ -13061,20 +12357,17 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000005", - "completion": "0.00000022", - "input_cache_read": "0.000000025" + "prompt": "0.0000002", + "completion": "0.0000006" }, "top_provider": { - "context_length": 16384, - "max_completion_tokens": 16384, + "context_length": 128000, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null, "supported_parameters": [ "frequency_penalty", - "logit_bias", - "logprobs", "max_tokens", "min_p", "presence_penalty", @@ -13082,10 +12375,8 @@ "response_format", "seed", "stop", - "structured_outputs", "temperature", "top_k", - "top_logprobs", "top_p" ], "default_parameters": {}, @@ -13112,9 +12403,8 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000015", - "completion": "0.0000006", - "input_cache_read": "0.000000075" + "prompt": "0.0000008", + "completion": "0.0000008" }, "top_provider": { "context_length": 32768, @@ -13126,7 +12416,6 @@ "frequency_penalty", "logit_bias", "max_tokens", - "min_p", "presence_penalty", "repetition_penalty", "response_format", @@ -13138,7 +12427,7 @@ "top_p" ], "default_parameters": {}, - "expiration_date": "2026-02-16" + "expiration_date": null }, { "id": "qwen/qwen3-14b", @@ -13160,9 +12449,8 @@ "instruct_type": "qwen3" }, "pricing": { - "prompt": "0.00000005", - "completion": "0.00000022", - "input_cache_read": "0.000000025" + "prompt": "0.00000006", + "completion": "0.00000024" }, "top_provider": { "context_length": 40960, @@ -13198,7 +12486,7 @@ "name": "Qwen: Qwen3 235B A22B", "created": 1745875757, "description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and code tasks, and a \"non-thinking\" mode for general conversational efficiency. The model demonstrates strong reasoning ability, multilingual support (100+ languages and dialects), advanced instruction-following, and agent tool-calling capabilities. It natively handles a 32K token context window and extends up to 131K tokens using YaRN-based scaling.", - "context_length": 40960, + "context_length": 131072, "architecture": { "modality": "text->text", "input_modalities": [ @@ -13211,34 +12499,25 @@ "instruct_type": "qwen3" }, "pricing": { - "prompt": "0.0000002", - "completion": "0.0000006" + "prompt": "0.000000455", + "completion": "0.00000182" }, "top_provider": { - "context_length": 40960, - "max_completion_tokens": null, + "context_length": 131072, + "max_completion_tokens": 8192, "is_moderated": false }, "per_request_limits": null, "supported_parameters": [ - "frequency_penalty", "include_reasoning", - "logit_bias", - "logprobs", "max_tokens", - "min_p", "presence_penalty", "reasoning", - "repetition_penalty", "response_format", "seed", - "stop", - "structured_outputs", "temperature", "tool_choice", "tools", - "top_k", - "top_logprobs", "top_p" ], "default_parameters": {}, @@ -13305,7 +12584,7 @@ "name": "Qwen: Qwen3 235B A22B Thinking 2507", "created": 1753449557, "description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144 tokens of context. This \"thinking-only\" variant enhances structured logical reasoning, mathematics, science, and long-form generation, showing strong benchmark performance across AIME, SuperGPQA, LiveCodeBench, and MMLU-Redux. It enforces a special reasoning mode () and is designed for high-token outputs (up to 81,920 tokens) in challenging domains.\n\nThe model is instruction-tuned and excels at step-by-step reasoning, tool use, agentic workflows, and multilingual tasks. This release represents the most capable open-source variant in the Qwen3-235B series, surpassing many closed models in structured reasoning use cases.", - "context_length": 262144, + "context_length": 131072, "architecture": { "modality": "text->text", "input_modalities": [ @@ -13318,13 +12597,16 @@ "instruct_type": "qwen3" }, "pricing": { - "prompt": "0.00000011", - "completion": "0.0000006", - "input_cache_read": "0.000000055" + "prompt": "0", + "completion": "0", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0" }, "top_provider": { - "context_length": 262144, - "max_completion_tokens": 262144, + "context_length": 131072, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null, @@ -13374,9 +12656,8 @@ "instruct_type": "qwen3" }, "pricing": { - "prompt": "0.00000006", - "completion": "0.00000022", - "input_cache_read": "0.00000003" + "prompt": "0.00000008", + "completion": "0.00000028" }, "top_provider": { "context_length": 40960, @@ -13429,9 +12710,8 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000008", - "completion": "0.00000033", - "input_cache_read": "0.00000004" + "prompt": "0.00000009", + "completion": "0.0000003" }, "top_provider": { "context_length": 262144, @@ -13446,7 +12726,6 @@ "repetition_penalty", "response_format", "seed", - "stop", "structured_outputs", "temperature", "tool_choice", @@ -13538,7 +12817,6 @@ "supported_parameters": [ "frequency_penalty", "include_reasoning", - "logprobs", "max_tokens", "min_p", "presence_penalty", @@ -13552,7 +12830,6 @@ "tool_choice", "tools", "top_k", - "top_logprobs", "top_p" ], "default_parameters": {}, @@ -13612,7 +12889,7 @@ "name": "Qwen: Qwen3 8B", "created": 1745876632, "description": "Qwen3-8B is a dense 8.2B parameter causal language model from the Qwen3 series, designed for both reasoning-heavy tasks and efficient dialogue. It supports seamless switching between \"thinking\" mode for math, coding, and logical inference, and \"non-thinking\" mode for general conversation. The model is fine-tuned for instruction-following, agent integration, creative writing, and multilingual use across 100+ languages and dialects. It natively supports a 32K token context window and can extend to 131K tokens with YaRN scaling.", - "context_length": 32000, + "context_length": 40960, "architecture": { "modality": "text->text", "input_modalities": [ @@ -13630,7 +12907,7 @@ "input_cache_read": "0.00000005" }, "top_provider": { - "context_length": 32000, + "context_length": 40960, "max_completion_tokens": 8192, "is_moderated": false }, @@ -13638,8 +12915,10 @@ "supported_parameters": [ "include_reasoning", "max_tokens", + "presence_penalty", "reasoning", "response_format", + "seed", "structured_outputs", "temperature", "tool_choice", @@ -13686,11 +12965,9 @@ "supported_parameters": [ "frequency_penalty", "logit_bias", - "logprobs", "max_tokens", "min_p", "presence_penalty", - "reasoning", "repetition_penalty", "response_format", "seed", @@ -13700,7 +12977,6 @@ "tool_choice", "tools", "top_k", - "top_logprobs", "top_p" ], "default_parameters": {}, @@ -13760,7 +13036,7 @@ "name": "Qwen: Qwen3 Coder Flash", "created": 1758115536, "description": "Qwen3 Coder Flash is Alibaba's fast and cost efficient version of their proprietary Qwen3 Coder Plus. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.", - "context_length": 128000, + "context_length": 1000000, "architecture": { "modality": "text->text", "input_modalities": [ @@ -13775,14 +13051,10 @@ "pricing": { "prompt": "0.0000003", "completion": "0.0000015", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0", - "input_cache_read": "0.00000008" + "input_cache_read": "0.00000006" }, "top_provider": { - "context_length": 128000, + "context_length": 1000000, "max_completion_tokens": 65536, "is_moderated": false }, @@ -13824,9 +13096,9 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000007", - "completion": "0.0000003", - "input_cache_read": "0.000000035" + "prompt": "0.00000012", + "completion": "0.00000075", + "input_cache_read": "0.00000006" }, "top_provider": { "context_length": 262144, @@ -13865,7 +13137,7 @@ "name": "Qwen: Qwen3 Coder Plus", "created": 1758662707, "description": "Qwen3 Coder Plus is Alibaba's proprietary version of the Open Source Qwen3 Coder 480B A35B. It is a powerful coding agent model specializing in autonomous programming via tool calling and environment interaction, combining coding proficiency with versatile general-purpose abilities.", - "context_length": 128000, + "context_length": 1000000, "architecture": { "modality": "text->text", "input_modalities": [ @@ -13880,14 +13152,10 @@ "pricing": { "prompt": "0.000001", "completion": "0.000005", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0", - "input_cache_read": "0.0000001" + "input_cache_read": "0.0000002" }, "top_provider": { - "context_length": 128000, + "context_length": 1000000, "max_completion_tokens": 65536, "is_moderated": false }, @@ -14008,7 +13276,7 @@ "name": "Qwen: Qwen3 Max", "created": 1758662808, "description": "Qwen3-Max is an updated release built on the Qwen3 series, offering major improvements in reasoning, instruction following, multilingual support, and long-tail knowledge coverage compared to the January 2025 version. It delivers higher accuracy in math, coding, logic, and science tasks, follows complex instructions in Chinese and English more reliably, reduces hallucinations, and produces higher-quality responses for open-ended Q&A, writing, and conversation. The model supports over 100 languages with stronger translation and commonsense reasoning, and is optimized for retrieval-augmented generation (RAG) and tool calling, though it does not include a dedicated “thinking” mode.", - "context_length": 256000, + "context_length": 262144, "architecture": { "modality": "text->text", "input_modalities": [ @@ -14023,14 +13291,10 @@ "pricing": { "prompt": "0.0000012", "completion": "0.000006", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0", "input_cache_read": "0.00000024" }, "top_provider": { - "context_length": 256000, + "context_length": 262144, "max_completion_tokens": 32768, "is_moderated": false }, @@ -14235,7 +13499,6 @@ "supported_parameters": [ "frequency_penalty", "logit_bias", - "logprobs", "max_tokens", "min_p", "presence_penalty", @@ -14248,7 +13511,6 @@ "tool_choice", "tools", "top_k", - "top_logprobs", "top_p" ], "default_parameters": { @@ -14265,7 +13527,7 @@ "name": "Qwen: Qwen3 VL 235B A22B Thinking", "created": 1758668690, "description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math. The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows, turning sketches or mockups into code and assisting with UI debugging, while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.", - "context_length": 262144, + "context_length": 131072, "architecture": { "modality": "text+image->text", "input_modalities": [ @@ -14279,12 +13541,16 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000045", - "completion": "0.0000035" + "prompt": "0", + "completion": "0", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0" }, "top_provider": { - "context_length": 262144, - "max_completion_tokens": 262144, + "context_length": 131072, + "max_completion_tokens": 32768, "is_moderated": false }, "per_request_limits": null, @@ -14319,7 +13585,7 @@ "name": "Qwen: Qwen3 VL 30B A3B Instruct", "created": 1759794476, "description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception of real-world/synthetic categories, 2D/3D spatial grounding, and long-form visual comprehension, achieving competitive multimodal benchmark results. For agentic use, it handles multi-image multi-turn instructions, video timeline alignments, GUI automation, and visual coding from sketches to debugged UI. Text performance matches flagship Qwen3 models, suiting document AI, OCR, UI assistance, spatial tasks, and agent research.", - "context_length": 262144, + "context_length": 131072, "architecture": { "modality": "text+image->text", "input_modalities": [ @@ -14333,20 +13599,17 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000015", - "completion": "0.0000006", - "input_cache_read": "0.000000075" + "prompt": "0.00000013", + "completion": "0.00000052" }, "top_provider": { - "context_length": 262144, - "max_completion_tokens": null, + "context_length": 131072, + "max_completion_tokens": 32768, "is_moderated": false }, "per_request_limits": null, "supported_parameters": [ "frequency_penalty", - "logit_bias", - "logprobs", "max_tokens", "min_p", "presence_penalty", @@ -14359,7 +13622,6 @@ "tool_choice", "tools", "top_k", - "top_logprobs", "top_p" ], "default_parameters": { @@ -14390,8 +13652,12 @@ "instruct_type": null }, "pricing": { - "prompt": "0.0000002", - "completion": "0.000001" + "prompt": "0", + "completion": "0", + "request": "0", + "image": "0", + "web_search": "0", + "internal_reasoning": "0" }, "top_provider": { "context_length": 131072, @@ -14429,7 +13695,7 @@ "name": "Qwen: Qwen3 VL 32B Instruct", "created": 1761231332, "description": "Qwen3-VL-32B-Instruct is a large-scale multimodal vision-language model designed for high-precision understanding and reasoning across text, images, and video. With 32 billion parameters, it combines deep visual perception with advanced text comprehension, enabling fine-grained spatial reasoning, document and scene analysis, and long-horizon video understanding.Robust OCR in 32 languages, and enhanced multimodal fusion through Interleaved-MRoPE and DeepStack architectures. Optimized for agentic interaction and visual tool use, Qwen3-VL-32B delivers state-of-the-art performance for complex real-world multimodal tasks.", - "context_length": 262144, + "context_length": 131072, "architecture": { "modality": "text+image->text", "input_modalities": [ @@ -14443,12 +13709,12 @@ "instruct_type": null }, "pricing": { - "prompt": "0.0000005", - "completion": "0.0000015" + "prompt": "0.000000104", + "completion": "0.000000416" }, "top_provider": { - "context_length": 262144, - "max_completion_tokens": null, + "context_length": 131072, + "max_completion_tokens": 32768, "is_moderated": false }, "per_request_limits": null, @@ -14460,9 +13726,12 @@ "presence_penalty", "repetition_penalty", "response_format", + "seed", "stop", "structured_outputs", "temperature", + "tool_choice", + "tools", "top_k", "top_p" ], @@ -14471,7 +13740,7 @@ "top_p": null, "frequency_penalty": null }, - "expiration_date": null + "expiration_date": "2026-02-25" }, { "id": "qwen/qwen3-vl-8b-instruct", @@ -14534,7 +13803,7 @@ "name": "Qwen: Qwen3 VL 8B Thinking", "created": 1760463746, "description": "Qwen3-VL-8B-Thinking is the reasoning-optimized variant of the Qwen3-VL-8B multimodal model, designed for advanced visual and textual reasoning across complex scenes, documents, and temporal sequences. It integrates enhanced multimodal alignment and long-context processing (native 256K, expandable to 1M tokens) for tasks such as scientific visual analysis, causal inference, and mathematical reasoning over image or video inputs.\n\nCompared to the Instruct edition, the Thinking version introduces deeper visual-language fusion and deliberate reasoning pathways that improve performance on long-chain logic tasks, STEM problem-solving, and multi-step video understanding. It achieves stronger temporal grounding via Interleaved-MRoPE and timestamp-aware embeddings, while maintaining robust OCR, multilingual comprehension, and text generation on par with large text-only LLMs.", - "context_length": 256000, + "context_length": 131072, "architecture": { "modality": "text+image->text", "input_modalities": [ @@ -14548,15 +13817,11 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000018", - "completion": "0.0000021", - "request": "0", - "image": "0", - "web_search": "0", - "internal_reasoning": "0" + "prompt": "0.000000117", + "completion": "0.000001365" }, "top_provider": { - "context_length": 256000, + "context_length": 131072, "max_completion_tokens": 32768, "is_moderated": false }, @@ -14612,20 +13877,18 @@ "supported_parameters": [ "frequency_penalty", "include_reasoning", - "logit_bias", + "logprobs", "max_tokens", - "min_p", "presence_penalty", "reasoning", - "repetition_penalty", "response_format", - "seed", "stop", "structured_outputs", "temperature", "tool_choice", "tools", "top_k", + "top_logprobs", "top_p" ], "default_parameters": {}, @@ -14977,6 +14240,7 @@ "per_request_limits": null, "supported_parameters": [ "frequency_penalty", + "logprobs", "max_tokens", "min_p", "presence_penalty", @@ -14987,6 +14251,7 @@ "structured_outputs", "temperature", "top_k", + "top_logprobs", "top_p" ], "default_parameters": {}, @@ -15204,6 +14469,7 @@ "supported_parameters": [ "frequency_penalty", "logit_bias", + "logprobs", "max_tokens", "min_p", "presence_penalty", @@ -15216,6 +14482,7 @@ "tool_choice", "tools", "top_k", + "top_logprobs", "top_p" ], "default_parameters": {}, @@ -15296,6 +14563,7 @@ "per_request_limits": null, "supported_parameters": [ "frequency_penalty", + "logprobs", "max_tokens", "presence_penalty", "response_format", @@ -15304,18 +14572,19 @@ "temperature", "tool_choice", "tools", + "top_logprobs", "top_p" ], "default_parameters": {}, "expiration_date": null }, { - "id": "tngtech/deepseek-r1t-chimera", - "canonical_slug": "tngtech/deepseek-r1t-chimera", - "hugging_face_id": "tngtech/DeepSeek-R1T-Chimera", - "name": "TNG: DeepSeek R1T Chimera", - "created": 1745760875, - "description": "DeepSeek-R1T-Chimera is created by merging DeepSeek-R1 and DeepSeek-V3 (0324), combining the reasoning capabilities of R1 with the token efficiency improvements of V3. It is based on a DeepSeek-MoE Transformer architecture and is optimized for general text generation tasks.\n\nThe model merges pretrained weights from both source models to balance performance across reasoning, efficiency, and instruction-following tasks. It is released under the MIT license and intended for research and commercial use.", + "id": "tngtech/deepseek-r1t2-chimera", + "canonical_slug": "tngtech/deepseek-r1t2-chimera", + "hugging_face_id": "tngtech/DeepSeek-TNG-R1T2-Chimera", + "name": "TNG: DeepSeek R1T2 Chimera", + "created": 1751986985, + "description": "DeepSeek-TNG-R1T2-Chimera is the second-generation Chimera model from TNG Tech. It is a 671 B-parameter mixture-of-experts text-generation model assembled from DeepSeek-AI’s R1-0528, R1, and V3-0324 checkpoints with an Assembly-of-Experts merge. The tri-parent design yields strong reasoning performance while running roughly 20 % faster than the original R1 and more than 2× faster than R1-0528 under vLLM, giving a favorable cost-to-intelligence trade-off. The checkpoint supports contexts up to 60 k tokens in standard use (tested to ~130 k) and maintains consistent token behaviour, making it suitable for long-context analysis, dialogue and other open-ended generation tasks.", "context_length": 163840, "architecture": { "modality": "text->text", @@ -15329,9 +14598,9 @@ "instruct_type": null }, "pricing": { - "prompt": "0.0000003", - "completion": "0.0000012", - "input_cache_read": "0.00000015" + "prompt": "0.00000025", + "completion": "0.00000085", + "input_cache_read": "0.000000125" }, "top_provider": { "context_length": 163840, @@ -15351,6 +14620,8 @@ "stop", "structured_outputs", "temperature", + "tool_choice", + "tools", "top_k", "top_p" ], @@ -15358,13 +14629,13 @@ "expiration_date": null }, { - "id": "tngtech/deepseek-r1t-chimera:free", - "canonical_slug": "tngtech/deepseek-r1t-chimera", - "hugging_face_id": "tngtech/DeepSeek-R1T-Chimera", - "name": "TNG: DeepSeek R1T Chimera (free)", - "created": 1745760875, - "description": "DeepSeek-R1T-Chimera is created by merging DeepSeek-R1 and DeepSeek-V3 (0324), combining the reasoning capabilities of R1 with the token efficiency improvements of V3. It is based on a DeepSeek-MoE Transformer architecture and is optimized for general text generation tasks.\n\nThe model merges pretrained weights from both source models to balance performance across reasoning, efficiency, and instruction-following tasks. It is released under the MIT license and intended for research and commercial use.", - "context_length": 163840, + "id": "undi95/remm-slerp-l2-13b", + "canonical_slug": "undi95/remm-slerp-l2-13b", + "hugging_face_id": "Undi95/ReMM-SLERP-L2-13B", + "name": "ReMM SLERP 13B", + "created": 1689984000, + "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", + "context_length": 6144, "architecture": { "modality": "text->text", "input_modalities": [ @@ -15373,43 +14644,48 @@ "output_modalities": [ "text" ], - "tokenizer": "DeepSeek", - "instruct_type": null + "tokenizer": "Llama2", + "instruct_type": "alpaca" }, "pricing": { - "prompt": "0", - "completion": "0" + "prompt": "0.00000045", + "completion": "0.00000065" }, "top_provider": { - "context_length": 163840, - "max_completion_tokens": null, + "context_length": 6144, + "max_completion_tokens": 4096, "is_moderated": false }, "per_request_limits": null, "supported_parameters": [ "frequency_penalty", - "include_reasoning", + "logit_bias", + "logprobs", "max_tokens", + "min_p", "presence_penalty", - "reasoning", "repetition_penalty", + "response_format", "seed", "stop", + "structured_outputs", "temperature", + "top_a", "top_k", + "top_logprobs", "top_p" ], "default_parameters": {}, "expiration_date": null }, { - "id": "tngtech/deepseek-r1t2-chimera", - "canonical_slug": "tngtech/deepseek-r1t2-chimera", - "hugging_face_id": "tngtech/DeepSeek-TNG-R1T2-Chimera", - "name": "TNG: DeepSeek R1T2 Chimera", - "created": 1751986985, - "description": "DeepSeek-TNG-R1T2-Chimera is the second-generation Chimera model from TNG Tech. It is a 671 B-parameter mixture-of-experts text-generation model assembled from DeepSeek-AI’s R1-0528, R1, and V3-0324 checkpoints with an Assembly-of-Experts merge. The tri-parent design yields strong reasoning performance while running roughly 20 % faster than the original R1 and more than 2× faster than R1-0528 under vLLM, giving a favorable cost-to-intelligence trade-off. The checkpoint supports contexts up to 60 k tokens in standard use (tested to ~130 k) and maintains consistent token behaviour, making it suitable for long-context analysis, dialogue and other open-ended generation tasks.", - "context_length": 163840, + "id": "upstage/solar-pro-3:free", + "canonical_slug": "upstage/solar-pro-3", + "hugging_face_id": "", + "name": "Upstage: Solar Pro 3 (free)", + "created": 1769481200, + "description": "Solar Pro 3 is Upstage's powerful Mixture-of-Experts (MoE) language model. With 102B total parameters and 12B active parameters per forward pass, it delivers exceptional performance while maintaining computational efficiency. Optimized for Korean with English and Japanese support.", + "context_length": 128000, "architecture": { "modality": "text->text", "input_modalities": [ @@ -15418,48 +14694,44 @@ "output_modalities": [ "text" ], - "tokenizer": "DeepSeek", + "tokenizer": "Other", "instruct_type": null }, "pricing": { - "prompt": "0.00000025", - "completion": "0.00000085", - "input_cache_read": "0.000000125" + "prompt": "0", + "completion": "0" }, "top_provider": { - "context_length": 163840, - "max_completion_tokens": 163840, + "context_length": 128000, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null, "supported_parameters": [ - "frequency_penalty", "include_reasoning", "max_tokens", - "presence_penalty", "reasoning", - "repetition_penalty", "response_format", - "seed", - "stop", "structured_outputs", "temperature", "tool_choice", - "tools", - "top_k", - "top_p" + "tools" ], - "default_parameters": {}, - "expiration_date": null + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, + "expiration_date": "2026-03-22" }, { - "id": "tngtech/deepseek-r1t2-chimera:free", - "canonical_slug": "tngtech/deepseek-r1t2-chimera", - "hugging_face_id": "tngtech/DeepSeek-TNG-R1T2-Chimera", - "name": "TNG: DeepSeek R1T2 Chimera (free)", - "created": 1751986985, - "description": "DeepSeek-TNG-R1T2-Chimera is the second-generation Chimera model from TNG Tech. It is a 671 B-parameter mixture-of-experts text-generation model assembled from DeepSeek-AI’s R1-0528, R1, and V3-0324 checkpoints with an Assembly-of-Experts merge. The tri-parent design yields strong reasoning performance while running roughly 20 % faster than the original R1 and more than 2× faster than R1-0528 under vLLM, giving a favorable cost-to-intelligence trade-off. The checkpoint supports contexts up to 60 k tokens in standard use (tested to ~130 k) and maintains consistent token behaviour, making it suitable for long-context analysis, dialogue and other open-ended generation tasks.", - "context_length": 163840, + "id": "writer/palmyra-x5", + "canonical_slug": "writer/palmyra-x5-20250428", + "hugging_face_id": "", + "name": "Writer: Palmyra X5", + "created": 1769003823, + "description": "Palmyra X5 is Writer's most advanced model, purpose-built for building and scaling AI agents across the enterprise. It delivers industry-leading speed and efficiency on context windows up to 1 million tokens, powered by a novel transformer architecture and hybrid attention mechanisms. This enables faster inference and expanded memory for processing large volumes of enterprise data, critical for scaling AI agents.", + "context_length": 1040000, "architecture": { "modality": "text->text", "input_modalities": [ @@ -15468,43 +14740,41 @@ "output_modalities": [ "text" ], - "tokenizer": "DeepSeek", + "tokenizer": "Other", "instruct_type": null }, "pricing": { - "prompt": "0", - "completion": "0" + "prompt": "0.0000006", + "completion": "0.000006" }, "top_provider": { - "context_length": 163840, - "max_completion_tokens": null, - "is_moderated": false + "context_length": 1040000, + "max_completion_tokens": 8192, + "is_moderated": true }, "per_request_limits": null, "supported_parameters": [ - "frequency_penalty", - "include_reasoning", "max_tokens", - "presence_penalty", - "reasoning", - "repetition_penalty", - "seed", "stop", "temperature", "top_k", "top_p" ], - "default_parameters": {}, + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, "expiration_date": null }, { - "id": "tngtech/tng-r1t-chimera", - "canonical_slug": "tngtech/tng-r1t-chimera", - "hugging_face_id": null, - "name": "TNG: R1T Chimera", - "created": 1764184161, - "description": "TNG-R1T-Chimera is an experimental LLM with a faible for creative storytelling and character interaction. It is a derivate of the original TNG/DeepSeek-R1T-Chimera released in April 2025 and is available exclusively via Chutes and OpenRouter.\n\nCharacteristics and improvements include:\n\nWe think that it has a creative and pleasant personality.\nIt has a preliminary EQ-Bench3 value of about 1305.\nIt is quite a bit more intelligent than the original, albeit a slightly slower.\nIt is much more think-token consistent, i.e. reasoning and answer blocks are properly delineated.\nTool calling is much improved.\n\nTNG Tech, the model authors, ask that users follow the careful guidelines that Microsoft has created for their \"MAI-DS-R1\" DeepSeek-based model. These guidelines are available on Hugging Face (https://huggingface.co/microsoft/MAI-DS-R1).", - "context_length": 163840, + "id": "x-ai/grok-3", + "canonical_slug": "x-ai/grok-3", + "hugging_face_id": "", + "name": "xAI: Grok 3", + "created": 1749582908, + "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\n", + "context_length": 131072, "architecture": { "modality": "text->text", "input_modalities": [ @@ -15513,260 +14783,14 @@ "output_modalities": [ "text" ], - "tokenizer": "Other", + "tokenizer": "Grok", "instruct_type": null }, "pricing": { - "prompt": "0.00000025", - "completion": "0.00000085", - "input_cache_read": "0.000000125" - }, - "top_provider": { - "context_length": 163840, - "max_completion_tokens": 65536, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "include_reasoning", - "max_tokens", - "presence_penalty", - "reasoning", - "repetition_penalty", - "response_format", - "seed", - "stop", - "structured_outputs", - "temperature", - "tool_choice", - "tools", - "top_k", - "top_p" - ], - "default_parameters": { - "temperature": null, - "top_p": null, - "frequency_penalty": null - }, - "expiration_date": null - }, - { - "id": "tngtech/tng-r1t-chimera:free", - "canonical_slug": "tngtech/tng-r1t-chimera", - "hugging_face_id": null, - "name": "TNG: R1T Chimera (free)", - "created": 1764184161, - "description": "TNG-R1T-Chimera is an experimental LLM with a faible for creative storytelling and character interaction. It is a derivate of the original TNG/DeepSeek-R1T-Chimera released in April 2025 and is available exclusively via Chutes and OpenRouter.\n\nCharacteristics and improvements include:\n\nWe think that it has a creative and pleasant personality.\nIt has a preliminary EQ-Bench3 value of about 1305.\nIt is quite a bit more intelligent than the original, albeit a slightly slower.\nIt is much more think-token consistent, i.e. reasoning and answer blocks are properly delineated.\nTool calling is much improved.\n\nTNG Tech, the model authors, ask that users follow the careful guidelines that Microsoft has created for their \"MAI-DS-R1\" DeepSeek-based model. These guidelines are available on Hugging Face (https://huggingface.co/microsoft/MAI-DS-R1).", - "context_length": 163840, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Other", - "instruct_type": null - }, - "pricing": { - "prompt": "0", - "completion": "0" - }, - "top_provider": { - "context_length": 163840, - "max_completion_tokens": 65536, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "include_reasoning", - "max_tokens", - "presence_penalty", - "reasoning", - "repetition_penalty", - "response_format", - "seed", - "stop", - "structured_outputs", - "temperature", - "tool_choice", - "tools", - "top_k", - "top_p" - ], - "default_parameters": { - "temperature": null, - "top_p": null, - "frequency_penalty": null - }, - "expiration_date": null - }, - { - "id": "undi95/remm-slerp-l2-13b", - "canonical_slug": "undi95/remm-slerp-l2-13b", - "hugging_face_id": "Undi95/ReMM-SLERP-L2-13B", - "name": "ReMM SLERP 13B", - "created": 1689984000, - "description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge", - "context_length": 6144, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Llama2", - "instruct_type": "alpaca" - }, - "pricing": { - "prompt": "0.00000045", - "completion": "0.00000065" - }, - "top_provider": { - "context_length": 6144, - "max_completion_tokens": 4096, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "frequency_penalty", - "logit_bias", - "logprobs", - "max_tokens", - "min_p", - "presence_penalty", - "repetition_penalty", - "response_format", - "seed", - "stop", - "structured_outputs", - "temperature", - "top_a", - "top_k", - "top_logprobs", - "top_p" - ], - "default_parameters": {}, - "expiration_date": null - }, - { - "id": "upstage/solar-pro-3:free", - "canonical_slug": "upstage/solar-pro-3", - "hugging_face_id": "", - "name": "Upstage: Solar Pro 3 (free)", - "created": 1769481200, - "description": "Solar Pro 3 is Upstage's powerful Mixture-of-Experts (MoE) language model. With 102B total parameters and 12B active parameters per forward pass, it delivers exceptional performance while maintaining computational efficiency. Optimized for Korean with English and Japanese support.", - "context_length": 128000, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Other", - "instruct_type": null - }, - "pricing": { - "prompt": "0", - "completion": "0" - }, - "top_provider": { - "context_length": 128000, - "max_completion_tokens": null, - "is_moderated": false - }, - "per_request_limits": null, - "supported_parameters": [ - "include_reasoning", - "max_tokens", - "reasoning", - "response_format", - "structured_outputs", - "temperature", - "tool_choice", - "tools" - ], - "default_parameters": { - "temperature": null, - "top_p": null, - "frequency_penalty": null - }, - "expiration_date": "2026-03-02" - }, - { - "id": "writer/palmyra-x5", - "canonical_slug": "writer/palmyra-x5-20250428", - "hugging_face_id": "", - "name": "Writer: Palmyra X5", - "created": 1769003823, - "description": "Palmyra X5 is Writer's most advanced model, purpose-built for building and scaling AI agents across the enterprise. It delivers industry-leading speed and efficiency on context windows up to 1 million tokens, powered by a novel transformer architecture and hybrid attention mechanisms. This enables faster inference and expanded memory for processing large volumes of enterprise data, critical for scaling AI agents.", - "context_length": 1040000, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Other", - "instruct_type": null - }, - "pricing": { - "prompt": "0.0000006", - "completion": "0.000006" - }, - "top_provider": { - "context_length": 1040000, - "max_completion_tokens": 8192, - "is_moderated": true - }, - "per_request_limits": null, - "supported_parameters": [ - "max_tokens", - "stop", - "temperature", - "top_k", - "top_p" - ], - "default_parameters": { - "temperature": null, - "top_p": null, - "frequency_penalty": null - }, - "expiration_date": null - }, - { - "id": "x-ai/grok-3", - "canonical_slug": "x-ai/grok-3", - "hugging_face_id": "", - "name": "xAI: Grok 3", - "created": 1749582908, - "description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in finance, healthcare, law, and science.\n\n", - "context_length": 131072, - "architecture": { - "modality": "text->text", - "input_modalities": [ - "text" - ], - "output_modalities": [ - "text" - ], - "tokenizer": "Grok", - "instruct_type": null - }, - "pricing": { - "prompt": "0.000003", - "completion": "0.000015", - "web_search": "0.005", - "input_cache_read": "0.00000075" + "prompt": "0.000003", + "completion": "0.000015", + "web_search": "0.005", + "input_cache_read": "0.00000075" }, "top_provider": { "context_length": 131072, @@ -16175,7 +15199,7 @@ }, "top_provider": { "context_length": 262144, - "max_completion_tokens": null, + "max_completion_tokens": 65536, "is_moderated": false }, "per_request_limits": null, @@ -16207,7 +15231,7 @@ "id": "z-ai/glm-4-32b", "canonical_slug": "z-ai/glm-4-32b-0414", "hugging_face_id": "", - "name": "Z.AI: GLM 4 32B ", + "name": "Z.ai: GLM 4 32B ", "created": 1753376617, "description": "GLM 4 32B is a cost-effective foundation language model.\n\nIt can efficiently perform complex tasks and has significantly enhanced capabilities in tool use, online search, and code-related intelligent tasks.\n\nIt is made by the same lab behind the thudm models.", "context_length": 128000, @@ -16250,10 +15274,10 @@ "id": "z-ai/glm-4.5", "canonical_slug": "z-ai/glm-4.5", "hugging_face_id": "zai-org/GLM-4.5", - "name": "Z.AI: GLM 4.5", + "name": "Z.ai: GLM 4.5", "created": 1753471347, "description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly enhanced capabilities in reasoning, code generation, and agent alignment. It supports a hybrid inference mode with two options, a \"thinking mode\" designed for complex reasoning and tool use, and a \"non-thinking mode\" optimized for instant responses. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", - "context_length": 131072, + "context_length": 131000, "architecture": { "modality": "text->text", "input_modalities": [ @@ -16266,13 +15290,12 @@ "instruct_type": null }, "pricing": { - "prompt": "0.00000035", - "completion": "0.00000155", - "input_cache_read": "0.000000175" + "prompt": "0.00000055", + "completion": "0.000002" }, "top_provider": { - "context_length": 131072, - "max_completion_tokens": 65536, + "context_length": 131000, + "max_completion_tokens": 131000, "is_moderated": false }, "per_request_limits": null, @@ -16298,13 +15321,13 @@ "top_p": null, "frequency_penalty": null }, - "expiration_date": null + "expiration_date": "2026-02-12" }, { "id": "z-ai/glm-4.5-air", "canonical_slug": "z-ai/glm-4.5-air", "hugging_face_id": "zai-org/GLM-4.5-Air", - "name": "Z.AI: GLM 4.5 Air", + "name": "Z.ai: GLM 4.5 Air", "created": 1753471258, "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "context_length": 131072, @@ -16358,7 +15381,7 @@ "id": "z-ai/glm-4.5-air:free", "canonical_slug": "z-ai/glm-4.5-air", "hugging_face_id": "zai-org/GLM-4.5-Air", - "name": "Z.AI: GLM 4.5 Air (free)", + "name": "Z.ai: GLM 4.5 Air (free)", "created": 1753471258, "description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter size. GLM-4.5-Air also supports hybrid inference modes, offering a \"thinking mode\" for advanced reasoning and tool use, and a \"non-thinking mode\" for real-time interaction. Users can control the reasoning behaviour with the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "context_length": 131072, @@ -16403,7 +15426,7 @@ "id": "z-ai/glm-4.5v", "canonical_slug": "z-ai/glm-4.5v", "hugging_face_id": "zai-org/GLM-4.5V", - "name": "Z.AI: GLM 4.5V", + "name": "Z.ai: GLM 4.5V", "created": 1754922288, "description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding, image Q&A, OCR, and document parsing, with strong gains in front-end web coding, grounding, and spatial reasoning. It offers a hybrid inference mode: a \"thinking mode\" for deep reasoning and a \"non-thinking mode\" for fast responses. Reasoning behavior can be toggled via the `reasoning` `enabled` boolean. [Learn more in our docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#enable-reasoning-with-default-config)", "context_length": 65536, @@ -16458,7 +15481,7 @@ "id": "z-ai/glm-4.6", "canonical_slug": "z-ai/glm-4.6", "hugging_face_id": "", - "name": "Z.AI: GLM 4.6", + "name": "Z.ai: GLM 4.6", "created": 1759235576, "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "context_length": 202752, @@ -16475,12 +15498,11 @@ }, "pricing": { "prompt": "0.00000035", - "completion": "0.0000015", - "input_cache_read": "0.000000175" + "completion": "0.00000171" }, "top_provider": { "context_length": 202752, - "max_completion_tokens": 65536, + "max_completion_tokens": 131072, "is_moderated": false }, "per_request_limits": null, @@ -16488,7 +15510,6 @@ "frequency_penalty", "include_reasoning", "logit_bias", - "logprobs", "max_tokens", "min_p", "presence_penalty", @@ -16501,9 +15522,7 @@ "temperature", "tool_choice", "tools", - "top_a", "top_k", - "top_logprobs", "top_p" ], "default_parameters": { @@ -16517,7 +15536,7 @@ "id": "z-ai/glm-4.6:exacto", "canonical_slug": "z-ai/glm-4.6", "hugging_face_id": "", - "name": "Z.AI: GLM 4.6 (exacto)", + "name": "Z.ai: GLM 4.6 (exacto)", "created": 1759235576, "description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.", "context_length": 204800, @@ -16571,7 +15590,7 @@ "id": "z-ai/glm-4.6v", "canonical_slug": "z-ai/glm-4.6-20251208", "hugging_face_id": "zai-org/GLM-4.6V", - "name": "Z.AI: GLM 4.6V", + "name": "Z.ai: GLM 4.6V", "created": 1765207462, "description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts and charts directly as visual inputs, and integrates native multimodal function calling to connect perception with downstream tool execution. The model also enables interleaved image-text generation and UI reconstruction workflows, including screenshot-to-HTML synthesis and iterative visual editing.", "context_length": 131072, @@ -16601,7 +15620,6 @@ "supported_parameters": [ "frequency_penalty", "include_reasoning", - "logit_bias", "max_tokens", "min_p", "presence_penalty", @@ -16628,9 +15646,9 @@ "id": "z-ai/glm-4.7", "canonical_slug": "z-ai/glm-4.7-20251222", "hugging_face_id": "zai-org/GLM-4.7", - "name": "Z.AI: GLM 4.7", + "name": "Z.ai: GLM 4.7", "created": 1766378014, - "description": "GLM-4.7 is Z.AI’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", + "description": "GLM-4.7 is Z.ai’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while delivering more natural conversational experiences and superior front-end aesthetics.", "context_length": 202752, "architecture": { "modality": "text->text", @@ -16644,13 +15662,13 @@ "instruct_type": null }, "pricing": { - "prompt": "0.0000004", - "completion": "0.0000015", - "input_cache_read": "0.0000002" + "prompt": "0.0000003", + "completion": "0.0000014", + "input_cache_read": "0.00000015" }, "top_provider": { "context_length": 202752, - "max_completion_tokens": 65535, + "max_completion_tokens": null, "is_moderated": false }, "per_request_limits": null, @@ -16658,13 +15676,10 @@ "frequency_penalty", "include_reasoning", "logit_bias", - "logprobs", "max_tokens", "min_p", - "parallel_tool_calls", "presence_penalty", "reasoning", - "reasoning_effort", "repetition_penalty", "response_format", "seed", @@ -16674,7 +15689,6 @@ "tool_choice", "tools", "top_k", - "top_logprobs", "top_p" ], "default_parameters": { @@ -16688,7 +15702,7 @@ "id": "z-ai/glm-4.7-flash", "canonical_slug": "z-ai/glm-4.7-flash-20260119", "hugging_face_id": "zai-org/GLM-4.7-Flash", - "name": "Z.AI: GLM 4.7 Flash", + "name": "Z.ai: GLM 4.7 Flash", "created": 1768833913, "description": "As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning, and tool collaboration, and has achieved leading performance among open-source models of the same size on several current public benchmark leaderboards.", "context_length": 202752, @@ -16738,6 +15752,893 @@ "frequency_penalty": null }, "expiration_date": null + }, + { + "id": "anthropic/claude-sonnet-4.6", + "canonical_slug": "anthropic/claude-4.6-sonnet-20260217", + "hugging_face_id": "", + "name": "Anthropic: Claude Sonnet 4.6", + "created": 1771342990, + "description": "Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with memory, polished document creation, and confident computer use for web QA and workflow automation.", + "context_length": 1000000, + "architecture": { + "modality": "text+image->text", + "input_modalities": [ + "text", + "image" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Claude", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000003", + "completion": "0.000015", + "web_search": "0.01", + "input_cache_read": "0.0000003", + "input_cache_write": "0.00000375" + }, + "top_provider": { + "context_length": 1000000, + "max_completion_tokens": 128000, + "is_moderated": true + }, + "per_request_limits": null, + "supported_parameters": [ + "include_reasoning", + "max_tokens", + "reasoning", + "response_format", + "stop", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_k", + "top_p", + "verbosity" + ], + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "google/gemini-3.1-flash-image-preview", + "canonical_slug": "google/gemini-3.1-flash-image-preview-20260226", + "hugging_face_id": "", + "name": "Google: Nano Banana 2 (Gemini 3.1 Flash Image Preview)", + "created": 1772119558, + "description": "Gemini 3.1 Flash Image Preview, a.k.a. \"Nano Banana 2,\" is Google’s latest state of the art image generation and editing model, delivering Pro-level visual quality at Flash speed. It combines advanced contextual understanding with fast, cost-efficient inference, making complex image generation and iterative edits significantly more accessible. Aspect ratios can be controlled with the [image_config API Parameter](https://openrouter.ai/docs/features/multimodal/image-generation#image-aspect-ratio-configuration)", + "context_length": 65536, + "architecture": { + "modality": "text+image->text+image", + "input_modalities": [ + "image", + "text" + ], + "output_modalities": [ + "image", + "text" + ], + "tokenizer": "Gemini", + "instruct_type": null + }, + "pricing": { + "prompt": "0.00000025", + "completion": "0.0000015" + }, + "top_provider": { + "context_length": 65536, + "max_completion_tokens": 65536, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "include_reasoning", + "max_tokens", + "reasoning", + "response_format", + "seed", + "stop", + "structured_outputs", + "temperature", + "top_p" + ], + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "google/gemini-3.1-pro-preview", + "canonical_slug": "google/gemini-3.1-pro-preview-20260219", + "hugging_face_id": "", + "name": "Google: Gemini 3.1 Pro Preview", + "created": 1771509627, + "description": "Gemini 3.1 Pro Preview is Google’s frontier reasoning model, delivering enhanced software engineering performance, improved agentic reliability, and more efficient token usage across complex workflows. Building on the multimodal foundation of the Gemini 3 series, it combines high-precision reasoning across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning. The 3.1 update introduces measurable gains in SWE benchmarks and real-world coding environments, along with stronger autonomous task execution in structured domains such as finance and spreadsheet-based workflows.\n\nDesigned for advanced development and agentic systems, Gemini 3.1 Pro Preview improves long-horizon stability and tool orchestration while increasing token efficiency. It introduces a new medium thinking level to better balance cost, speed, and performance. The model excels in agentic coding, structured planning, multimodal analysis, and workflow automation, making it well-suited for autonomous agents, financial modeling, spreadsheet automation, and high-context enterprise tasks.", + "context_length": 1048576, + "architecture": { + "modality": "text+image+file+audio+video->text", + "input_modalities": [ + "audio", + "file", + "image", + "text", + "video" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Gemini", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000002", + "completion": "0.000012", + "image": "0.000002", + "audio": "0.000002", + "internal_reasoning": "0.000012", + "input_cache_read": "0.0000002", + "input_cache_write": "0.000000375" + }, + "top_provider": { + "context_length": 1048576, + "max_completion_tokens": 65536, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "include_reasoning", + "max_tokens", + "reasoning", + "response_format", + "seed", + "stop", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_p" + ], + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "google/gemini-3.1-pro-preview-customtools", + "canonical_slug": "google/gemini-3.1-pro-preview-customtools-20260219", + "hugging_face_id": null, + "name": "Google: Gemini 3.1 Pro Preview Custom Tools", + "created": 1772045923, + "description": "Gemini 3.1 Pro Preview Custom Tools is a variant of Gemini 3.1 Pro that improves tool selection behavior by preventing overuse of a general bash tool when more efficient third-party or user-defined functions are available. This specialized preview endpoint significantly increases function calling reliability and ensures the model selects the most appropriate tool in coding agents and complex, multi-tool workflows.\n\nIt retains the core strengths of Gemini 3.1 Pro, including multimodal reasoning across text, image, video, audio, and code, a 1M-token context window, and strong software engineering performance.", + "context_length": 1048576, + "architecture": { + "modality": "text+image+file+audio+video->text", + "input_modalities": [ + "text", + "audio", + "image", + "video", + "file" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Gemini", + "instruct_type": null + }, + "pricing": { + "prompt": "0.000002", + "completion": "0.000012", + "image": "0.000002", + "audio": "0.000002", + "internal_reasoning": "0.000012", + "input_cache_read": "0.0000002", + "input_cache_write": "0.000000375" + }, + "top_provider": { + "context_length": 1048576, + "max_completion_tokens": 65536, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "include_reasoning", + "max_tokens", + "reasoning", + "response_format", + "seed", + "stop", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_p" + ], + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "openai/gpt-5.3-codex", + "canonical_slug": "openai/gpt-5.3-codex-20260224", + "hugging_face_id": "", + "name": "OpenAI: GPT-5.3-Codex", + "created": 1771959164, + "description": "GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex with the broader reasoning and professional knowledge capabilities of GPT-5.2. It achieves state-of-the-art results on SWE-Bench Pro and strong performance on Terminal-Bench 2.0 and OSWorld-Verified, reflecting improved multi-language coding, terminal proficiency, and real-world computer-use skills. The model is optimized for long-running, tool-using workflows and supports interactive steering during execution, making it suitable for complex development tasks, debugging, deployment, and iterative product work.\n\nBeyond coding, GPT-5.3-Codex performs strongly on structured knowledge-work benchmarks such as GDPval, supporting tasks like document drafting, spreadsheet analysis, slide creation, and operational research across domains. It is trained with enhanced cybersecurity awareness, including vulnerability identification capabilities, and deployed with additional safeguards for high-risk use cases. Compared to prior Codex models, it is more token-efficient and approximately 25% faster, targeting professional end-to-end workflows that span reasoning, execution, and computer interaction.", + "context_length": 400000, + "architecture": { + "modality": "text+image->text", + "input_modalities": [ + "text", + "image" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "GPT", + "instruct_type": null + }, + "pricing": { + "prompt": "0.00000175", + "completion": "0.000014", + "web_search": "0.01", + "input_cache_read": "0.000000175" + }, + "top_provider": { + "context_length": 400000, + "max_completion_tokens": 128000, + "is_moderated": true + }, + "per_request_limits": null, + "supported_parameters": [ + "include_reasoning", + "max_tokens", + "reasoning", + "response_format", + "seed", + "structured_outputs", + "tool_choice", + "tools" + ], + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "aion-labs/aion-2.0", + "canonical_slug": "aion-labs/aion-2.0-20260223", + "hugging_face_id": null, + "name": "AionLabs: Aion-2.0", + "created": 1771881306, + "description": "Aion-2.0 is a variant of DeepSeek V3.2 optimized for immersive roleplaying and storytelling. It is particularly strong at introducing tension, crises, and conflict into stories, making narratives feel more engaging. It also handles mature and darker themes with more nuance and depth.", + "context_length": 131072, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Other", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000008", + "completion": "0.0000016", + "input_cache_read": "0.0000002" + }, + "top_provider": { + "context_length": 131072, + "max_completion_tokens": 32768, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "include_reasoning", + "max_tokens", + "reasoning", + "temperature", + "top_p" + ], + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "bytedance-seed/seed-2.0-mini", + "canonical_slug": "bytedance-seed/seed-2.0-mini-20260224", + "hugging_face_id": "", + "name": "ByteDance Seed: Seed-2.0-Mini", + "created": 1772131107, + "description": "Seed-2.0-mini targets latency-sensitive, high-concurrency, and cost-sensitive scenarios, emphasizing fast response and flexible inference deployment. It delivers performance comparable to ByteDance-Seed-1.6, supports 256k context, four reasoning effort modes (minimal/low/medium/high), multimodal understanding, and is optimized for lightweight tasks where cost and speed take priority.", + "context_length": 262144, + "architecture": { + "modality": "text+image+video->text", + "input_modalities": [ + "text", + "image", + "video" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Other", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000001", + "completion": "0.0000004" + }, + "top_provider": { + "context_length": 262144, + "max_completion_tokens": 131072, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "frequency_penalty", + "include_reasoning", + "max_tokens", + "reasoning", + "response_format", + "stop", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_p" + ], + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "liquid/lfm-2-24b-a2b", + "canonical_slug": "liquid/lfm-2-24b-a2b-20260224", + "hugging_face_id": "LiquidAI/LFM2-24B-A2B", + "name": "LiquidAI: LFM2-24B-A2B", + "created": 1772048711, + "description": "LFM2-24B-A2B is the largest model in the LFM2 family of hybrid architectures designed for efficient on-device deployment. Built as a 24B parameter Mixture-of-Experts model with only 2B active parameters per token, it delivers high-quality generation while maintaining low inference costs. The model fits within 32 GB of RAM, making it practical to run on consumer laptops and desktops without sacrificing capability.", + "context_length": 32768, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Other", + "instruct_type": null + }, + "pricing": { + "prompt": "0.00000003", + "completion": "0.00000012" + }, + "top_provider": { + "context_length": 32768, + "max_completion_tokens": null, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "frequency_penalty", + "logit_bias", + "max_tokens", + "min_p", + "presence_penalty", + "repetition_penalty", + "stop", + "temperature", + "top_k", + "top_p" + ], + "default_parameters": { + "temperature": 0.1, + "top_p": null, + "top_k": 50, + "frequency_penalty": null, + "presence_penalty": null, + "repetition_penalty": 1.05 + }, + "expiration_date": null + }, + { + "id": "qwen/qwen3-max-thinking", + "canonical_slug": "qwen/qwen3-max-thinking-20260123", + "hugging_face_id": null, + "name": "Qwen: Qwen3 Max Thinking", + "created": 1770671901, + "description": "Qwen3-Max-Thinking is the flagship reasoning model in the Qwen3 series, designed for high-stakes cognitive tasks that require deep, multi-step reasoning. By significantly scaling model capacity and reinforcement learning compute, it delivers major gains in factual accuracy, complex reasoning, instruction following, alignment with human preferences, and agentic behavior.", + "context_length": 262144, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Qwen", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000012", + "completion": "0.000006" + }, + "top_provider": { + "context_length": 262144, + "max_completion_tokens": 32768, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "include_reasoning", + "max_tokens", + "presence_penalty", + "reasoning", + "response_format", + "seed", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_p" + ], + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "qwen/qwen3.5-122b-a10b", + "canonical_slug": "qwen/qwen3.5-122b-a10b-20260224", + "hugging_face_id": "Qwen/Qwen3.5-122B-A10B", + "name": "Qwen: Qwen3.5-122B-A10B", + "created": 1772053789, + "description": "The Qwen3.5 122B-A10B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. In terms of overall performance, this model is second only to Qwen3.5-397B-A17B. Its text capabilities significantly outperform those of Qwen3-235B-2507, and its visual capabilities surpass those of Qwen3-VL-235B.", + "context_length": 262144, + "architecture": { + "modality": "text+image+video->text", + "input_modalities": [ + "text", + "image", + "video" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Qwen3", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000004", + "completion": "0.0000032" + }, + "top_provider": { + "context_length": 262144, + "max_completion_tokens": 65536, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "include_reasoning", + "logprobs", + "max_tokens", + "presence_penalty", + "reasoning", + "response_format", + "seed", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_logprobs", + "top_p" + ], + "default_parameters": { + "temperature": 0.6, + "top_p": 0.95, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "qwen/qwen3.5-27b", + "canonical_slug": "qwen/qwen3.5-27b-20260224", + "hugging_face_id": "Qwen/Qwen3.5-27B", + "name": "Qwen: Qwen3.5-27B", + "created": 1772053810, + "description": "The Qwen3.5 27B native vision-language Dense model incorporates a linear attention mechanism, delivering fast response times while balancing inference speed and performance. Its overall capabilities are comparable to those of the Qwen3.5-122B-A10B.", + "context_length": 262144, + "architecture": { + "modality": "text+image+video->text", + "input_modalities": [ + "text", + "image", + "video" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Qwen3", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000003", + "completion": "0.0000024" + }, + "top_provider": { + "context_length": 262144, + "max_completion_tokens": 65536, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "include_reasoning", + "logprobs", + "max_tokens", + "presence_penalty", + "reasoning", + "response_format", + "seed", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_logprobs", + "top_p" + ], + "default_parameters": { + "temperature": 0.6, + "top_p": 0.95, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "qwen/qwen3.5-35b-a3b", + "canonical_slug": "qwen/qwen3.5-35b-a3b-20260224", + "hugging_face_id": "Qwen/Qwen3.5-35B-A3B", + "name": "Qwen: Qwen3.5-35B-A3B", + "created": 1772053822, + "description": "The Qwen3.5 Series 35B-A3B is a native vision-language model designed with a hybrid architecture that integrates linear attention mechanisms and a sparse mixture-of-experts model, achieving higher inference efficiency. Its overall performance is comparable to that of the Qwen3.5-27B.", + "context_length": 262144, + "architecture": { + "modality": "text+image+video->text", + "input_modalities": [ + "text", + "image", + "video" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Qwen3", + "instruct_type": null + }, + "pricing": { + "prompt": "0.00000025", + "completion": "0.000002" + }, + "top_provider": { + "context_length": 262144, + "max_completion_tokens": 65536, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "include_reasoning", + "logprobs", + "max_tokens", + "presence_penalty", + "reasoning", + "response_format", + "seed", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_logprobs", + "top_p" + ], + "default_parameters": { + "temperature": 1, + "top_p": 0.95, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "qwen/qwen3.5-397b-a17b", + "canonical_slug": "qwen/qwen3.5-397b-a17b-20260216", + "hugging_face_id": "Qwen/Qwen3.5-397B-A17B", + "name": "Qwen: Qwen3.5 397B A17B", + "created": 1771223018, + "description": "The Qwen3.5 series 397B-A17B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. It delivers state-of-the-art performance comparable to leading-edge models across a wide range of tasks, including language understanding, logical reasoning, code generation, agent-based tasks, image understanding, video understanding, and graphical user interface (GUI) interactions. With its robust code-generation and agent capabilities, the model exhibits strong generalization across diverse agent.", + "context_length": 262144, + "architecture": { + "modality": "text+image+video->text", + "input_modalities": [ + "text", + "image", + "video" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Qwen3", + "instruct_type": null + }, + "pricing": { + "prompt": "0.00000055", + "completion": "0.0000035", + "input_cache_read": "0.00000055" + }, + "top_provider": { + "context_length": 262144, + "max_completion_tokens": 65536, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "frequency_penalty", + "include_reasoning", + "logit_bias", + "max_tokens", + "min_p", + "presence_penalty", + "reasoning", + "repetition_penalty", + "response_format", + "seed", + "stop", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_k", + "top_p" + ], + "default_parameters": { + "temperature": 0.6, + "top_p": 0.95, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "qwen/qwen3.5-flash-02-23", + "canonical_slug": "qwen/qwen3.5-flash-20260224", + "hugging_face_id": null, + "name": "Qwen: Qwen3.5-Flash", + "created": 1772053776, + "description": "The Qwen3.5 native vision-language Flash models are built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. Compared to the 3 series, these models deliver a leap forward in performance for both pure text and multimodal tasks, offering fast response times while balancing inference speed and overall performance.", + "context_length": 1000000, + "architecture": { + "modality": "text+image+video->text", + "input_modalities": [ + "text", + "image", + "video" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Qwen3", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000001", + "completion": "0.0000004" + }, + "top_provider": { + "context_length": 1000000, + "max_completion_tokens": 65536, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "include_reasoning", + "max_tokens", + "presence_penalty", + "reasoning", + "response_format", + "seed", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_p" + ], + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "qwen/qwen3.5-plus-02-15", + "canonical_slug": "qwen/qwen3.5-plus-20260216", + "hugging_face_id": "", + "name": "Qwen: Qwen3.5 Plus 2026-02-15", + "created": 1771229416, + "description": "The Qwen3.5 native vision-language series Plus models are built on a hybrid architecture that integrates linear attention mechanisms with sparse mixture-of-experts models, achieving higher inference efficiency. In a variety of task evaluations, the 3.5 series consistently demonstrates performance on par with state-of-the-art leading models. Compared to the 3 series, these models show a leap forward in both pure-text and multimodal capabilities.", + "context_length": 1000000, + "architecture": { + "modality": "text+image+video->text", + "input_modalities": [ + "text", + "image", + "video" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Qwen3", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000004", + "completion": "0.0000024" + }, + "top_provider": { + "context_length": 1000000, + "max_completion_tokens": 65536, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "include_reasoning", + "max_tokens", + "presence_penalty", + "reasoning", + "response_format", + "seed", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_p" + ], + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "stepfun/step-3.5-flash", + "canonical_slug": "stepfun/step-3.5-flash", + "hugging_face_id": "stepfun-ai/Step-3.5-Flash", + "name": "StepFun: Step 3.5 Flash", + "created": 1769728337, + "description": "Step 3.5 Flash is StepFun's most capable open-source foundation model. Built on a sparse Mixture of Experts (MoE) architecture, it selectively activates only 11B of its 196B parameters per token. It is a reasoning model that is incredibly speed efficient even at long contexts.", + "context_length": 256000, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Other", + "instruct_type": null + }, + "pricing": { + "prompt": "0.0000001", + "completion": "0.0000003", + "input_cache_read": "0.00000002" + }, + "top_provider": { + "context_length": 256000, + "max_completion_tokens": 256000, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "frequency_penalty", + "include_reasoning", + "max_tokens", + "reasoning", + "stop", + "temperature", + "tools", + "top_p" + ], + "default_parameters": { + "temperature": null, + "top_p": null, + "frequency_penalty": null + }, + "expiration_date": null + }, + { + "id": "z-ai/glm-5", + "canonical_slug": "z-ai/glm-5-20260211", + "hugging_face_id": "zai-org/GLM-5", + "name": "Z.ai: GLM 5", + "created": 1770829182, + "description": "GLM-5 is Z.ai’s flagship open-source foundation model engineered for complex systems design and long-horizon agent workflows. Built for expert developers, it delivers production-grade performance on large-scale programming tasks, rivaling leading closed-source models. With advanced agentic planning, deep backend reasoning, and iterative self-correction, GLM-5 moves beyond code generation to full-system construction and autonomous execution.", + "context_length": 204800, + "architecture": { + "modality": "text->text", + "input_modalities": [ + "text" + ], + "output_modalities": [ + "text" + ], + "tokenizer": "Other", + "instruct_type": null + }, + "pricing": { + "prompt": "0.00000095", + "completion": "0.00000255", + "input_cache_read": "0.0000002" + }, + "top_provider": { + "context_length": 204800, + "max_completion_tokens": 131072, + "is_moderated": false + }, + "per_request_limits": null, + "supported_parameters": [ + "frequency_penalty", + "include_reasoning", + "logit_bias", + "logprobs", + "max_tokens", + "min_p", + "presence_penalty", + "reasoning", + "repetition_penalty", + "response_format", + "seed", + "stop", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_k", + "top_logprobs", + "top_p" + ], + "default_parameters": { + "temperature": 1, + "top_p": 0.95, + "frequency_penalty": null + }, + "expiration_date": null } ] } diff --git a/scripts/refresh-openrouter-provider-catalog.sh b/scripts/refresh-openrouter-provider-catalog.sh new file mode 100755 index 00000000..07947bdc --- /dev/null +++ b/scripts/refresh-openrouter-provider-catalog.sh @@ -0,0 +1,164 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PINNED_PATH="${ROOT_DIR}/internal/attractor/modeldb/pinned/openrouter_models.json" +PROVIDER_REGEX='^(openai|anthropic|google)/' +LIVE_URL="https://openrouter.ai/api/v1/models" +DRY_RUN=0 + +usage() { + cat <<'USAGE' +Usage: + scripts/refresh-openrouter-provider-catalog.sh [--dry-run] [--providers-regex ] [--pinned ] + +Description: + Refreshes provider entries in the pinned OpenRouter catalog from live data. + By default, refreshes: + - openai/* + - anthropic/* + - google/* + +Options: + --dry-run Show planned changes, do not write file + --providers-regex Provider ID regex (default: ^(openai|anthropic|google)/) + --pinned Path to pinned catalog JSON file +USAGE +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --dry-run) + DRY_RUN=1 + shift + ;; + --providers-regex) + PROVIDER_REGEX="${2:-}" + if [[ -z "${PROVIDER_REGEX}" ]]; then + echo "error: --providers-regex requires a value" >&2 + exit 1 + fi + shift 2 + ;; + --pinned) + PINNED_PATH="${2:-}" + if [[ -z "${PINNED_PATH}" ]]; then + echo "error: --pinned requires a value" >&2 + exit 1 + fi + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "error: unknown argument: $1" >&2 + usage >&2 + exit 1 + ;; + esac +done + +if [[ ! -f "${PINNED_PATH}" ]]; then + echo "error: pinned catalog not found: ${PINNED_PATH}" >&2 + exit 1 +fi + +if ! command -v jq >/dev/null 2>&1; then + echo "error: jq is required" >&2 + exit 1 +fi + +if ! command -v curl >/dev/null 2>&1; then + echo "error: curl is required" >&2 + exit 1 +fi + +tmp_live="$(mktemp)" +tmp_new="$(mktemp)" +tmp_old_ids="$(mktemp)" +tmp_live_ids="$(mktemp)" +tmp_added="$(mktemp)" +tmp_removed="$(mktemp)" +trap 'rm -f "$tmp_live" "$tmp_new" "$tmp_old_ids" "$tmp_live_ids" "$tmp_added" "$tmp_removed"' EXIT + +curl -fsSL "${LIVE_URL}" > "${tmp_live}" + +jq -e '.data and (.data | type == "array")' "${tmp_live}" >/dev/null +jq -e '.data and (.data | type == "array")' "${PINNED_PATH}" >/dev/null + +jq -r --arg re "${PROVIDER_REGEX}" '.data[] | select(.id | test($re)) | .id' "${PINNED_PATH}" | sort > "${tmp_old_ids}" +jq -r --arg re "${PROVIDER_REGEX}" '.data[] | select(.id | test($re)) | .id' "${tmp_live}" | sort > "${tmp_live_ids}" + +comm -13 "${tmp_old_ids}" "${tmp_live_ids}" > "${tmp_added}" +comm -23 "${tmp_old_ids}" "${tmp_live_ids}" > "${tmp_removed}" + +jq --slurpfile live "${tmp_live}" --arg re "${PROVIDER_REGEX}" ' + ($live[0].data + | map(select(.id | test($re))) + | map({key: .id, value: .}) + | from_entries) as $freshByID + | ($freshByID | keys) as $freshIDs + | (.data + | map( + if (.id | test($re)) then + ($freshByID[.id] // empty) + else + . + end + )) as $replaced + | ($replaced + | map(.id) + | map({key: ., value: true}) + | from_entries) as $present + | ($freshIDs + | map(select($present[.] | not) | $freshByID[.])) as $missing + | .data = ($replaced + $missing) +' "${PINNED_PATH}" > "${tmp_new}" + +dups="$(jq -r '.data[].id' "${tmp_new}" | sort | uniq -d)" +if [[ -n "${dups}" ]]; then + echo "error: duplicate model IDs introduced:" >&2 + echo "${dups}" >&2 + exit 1 +fi + +old_total="$(jq '.data | length' "${PINNED_PATH}")" +new_total="$(jq '.data | length' "${tmp_new}")" +old_provider_total="$(wc -l < "${tmp_old_ids}" | tr -d ' ')" +new_provider_total="$(wc -l < "${tmp_live_ids}" | tr -d ' ')" + +echo "Pinned total models: ${old_total} -> ${new_total}" +echo "Target-provider models: ${old_provider_total} -> ${new_provider_total}" +echo "Providers regex: ${PROVIDER_REGEX}" +echo + +echo "Added IDs:" +if [[ -s "${tmp_added}" ]]; then + cat "${tmp_added}" +else + echo "(none)" +fi +echo + +echo "Removed IDs:" +if [[ -s "${tmp_removed}" ]]; then + cat "${tmp_removed}" +else + echo "(none)" +fi +echo + +if cmp -s "${PINNED_PATH}" "${tmp_new}"; then + echo "No changes detected." + exit 0 +fi + +if [[ "${DRY_RUN}" == "1" ]]; then + echo "Dry run only, no file was written." + exit 0 +fi + +mv "${tmp_new}" "${PINNED_PATH}" +echo "Updated ${PINNED_PATH}"