From 3b06e29df8a8ab0a89335f63d6c84626d05b5c0d Mon Sep 17 00:00:00 2001 From: Taras Yemets Date: Tue, 29 Apr 2025 18:43:22 +0300 Subject: [PATCH 01/10] Update model configuration documentation --- .../cody/enterprise/model-config-examples.mdx | 233 ++++++++++++------ docs/cody/enterprise/model-configuration.mdx | 3 +- 2 files changed, 161 insertions(+), 75 deletions(-) diff --git a/docs/cody/enterprise/model-config-examples.mdx b/docs/cody/enterprise/model-config-examples.mdx index d6bf85a94..ac9f8ee89 100644 --- a/docs/cody/enterprise/model-config-examples.mdx +++ b/docs/cody/enterprise/model-config-examples.mdx @@ -133,22 +133,47 @@ Below are configuration examples for setting up various LLM providers using BYOK ], "modelOverrides": [ { - "modelRef": "anthropic::2024-10-22::claude-3.5-sonnet", - "displayName": "Claude 3.5 Sonnet", - "modelName": "claude-3-5-sonnet-latest", + "modelRef": "anthropic::2024-10-22::claude-3-7-sonnet-latest", + "displayName": "Claude 3.7 Sonnet", + "modelName": "claude-3-7-sonnet-latest", "capabilities": ["chat"], "category": "accuracy", "status": "stable", "contextWindow": { - "maxInputTokens": 45000, - "maxOutputTokens": 4000 - } + "maxInputTokens": 132000, + "maxOutputTokens": 8192 + }, }, + { + "modelRef": "anthropic::2024-10-22::claude-3-7-sonnet-extended-thinking", + "displayName": "Claude 3.7 Sonnet Extended Thinking", + "modelName": "claude-3-7-sonnet-latest", + "capabilities": ["chat", "reasoning"], + "category": "accuracy", + "status": "stable", + "contextWindow": { + "maxInputTokens": 93000, + "maxOutputTokens": 64000 + }, + "reasoningEffort": "low" + }, + { + "modelRef": "anthropic::2024-10-22::claude-3-5-haiku-latest", + "displayName": "Claude 3.5 Haiku", + "modelName": "claude-3-5-haiku-latest", + "capabilities": ["autocomplete", "edit", "chat"], + "category": "speed", + "status": "stable", + "contextWindow": { + "maxInputTokens": 132000, + "maxOutputTokens": 8192 + }, + } ], "defaultModels": { - "chat": "anthropic::2024-10-22::claude-3.5-sonnet", - "fastChat": "anthropic::2023-06-01::claude-3-haiku", - "codeCompletion": "fireworks::v1::deepseek-coder-v2-lite-base" + "chat": "anthropic::2024-10-22::claude-3-7-sonnet-latest", + "fastChat": "anthropic::2024-10-22::claude-3-5-haiku-latest", + "codeCompletion": "anthropic::2024-10-22::claude-3-5-haiku-latest" } } ``` @@ -157,8 +182,9 @@ In the configuration above, - Set up a provider override for Anthropic, routing requests for this provider directly to the specified Anthropic endpoint (bypassing Cody Gateway) - Add three Anthropic models: - - Two models with chat capabilities (`"anthropic::2024-10-22::claude-3.5-sonnet"` and `"anthropic::2023-06-01::claude-3-haiku"`), providing options for chat users - - One model with autocomplete capability (`"fireworks::v1::deepseek-coder-v2-lite-base"`) + - `"anthropic::2024-10-22::claude-3-7-sonnet-latest"` with chat, vision, and tools capabilities + - `"anthropic::2024-10-22::claude-3-7-sonnet-extended-thinking"` with chat and reasoning capabilities (note: to enable [Claude's extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) model override should include "reasoning" capability and have "reasoningEffort" defined) + - `"anthropic::2024-10-22::claude-3-5-haiku-latest"` with autocomplete, edit, chat, and tools capabilities - Set the configured models as default models for Cody features in the `"defaultModels"` field @@ -239,45 +265,61 @@ In the configuration above, } ], "modelOverrides": [ - { - "modelRef": "openai::2024-02-01::gpt-4o", - "displayName": "GPT-4o", - "modelName": "gpt-4o", - "capabilities": ["chat"], - "category": "accuracy", - "status": "stable", - "contextWindow": { + { + "modelRef": "openai::unknown::gpt-4o", + "displayName": "GPT-4o", + "modelName": "gpt-4o", + "capabilities": ["chat"], + "category": "accuracy", + "status": "stable", + "contextWindow": { "maxInputTokens": 45000, "maxOutputTokens": 4000 + } + }, + { + "modelRef": "openai::unknown::gpt-4.1-nano", + "displayName": "GPT-4.1-nano", + "modelName": "gpt-4.1-nano", + "capabilities": ["edit", "chat", "autocomplete"], + "category": "speed", + "status": "stable", + "tier": "free", + "contextWindow": { + "maxInputTokens": 77000, + "maxOutputTokens": 16000 + } + }, + { + "modelRef": "openai::unknown::o3", + "displayName": "o3", + "modelName": "o3", + "capabilities": ["chat", "reasoning"], + "category": "accuracy", + "status": "stable", + "tier": "pro", + "contextWindow": { + "maxInputTokens": 68000, + "maxOutputTokens": 100000 + }, + "reasoningEffort": "medium" } - }, - { - "modelRef": "openai::unknown::gpt-3.5-turbo-instruct", - "displayName": "GPT-3.5 Turbo Instruct", - "modelName": "gpt-3.5-turbo-instruct", - "capabilities": ["autocomplete"], - "category": "speed", - "status": "stable", - "contextWindow": { - "maxInputTokens": 7000, - "maxOutputTokens": 4000 - } + ], + "defaultModels": { + "chat": "openai::unknown::gpt-4o", + "fastChat": "openai::unknown::gpt-4.1-nano", + "codeCompletion": "openai::unknown::gpt-4.1-nano" } -], - "defaultModels": { - "chat": "openai::2024-02-01::gpt-4o", - "fastChat": "openai::2024-02-01::gpt-4o", - "codeCompletion": "openai::unknown::gpt-3.5-turbo-instruct" - } } ``` In the configuration above, - Set up a provider override for OpenAI, routing requests for this provider directly to the specified OpenAI endpoint (bypassing Cody Gateway) -- Add two OpenAI models: - - `"openai::2024-02-01::gpt-4o"` with "chat" capabilities - used for "chat" and "fastChat" - - `"openai::unknown::gpt-3.5-turbo-instruct"` with "autocomplete" capability - used for "autocomplete" +- Add three OpenAI models: + - `"openai::2024-02-01::gpt-4o"` with chat capability - used as a default model for chat + - `"openai::unknown::gpt-4.1-nano"` with chat, edit and autocomplete capabilities - used as a default model for fast chat and autocomplete + - `"openai::unknown::o3"` with chat and reasoning capabilities - o-series model that supports thinking, can be used for chat (note: to enable thinking, model override should include "reasoning" capability and have "reasoningEffort" defined). @@ -313,6 +355,33 @@ In the configuration above, "maxOutputTokens": 4000 } }, + { + "modelRef": "azure-openai::unknown::gpt-4.1-nano", + "displayName": "GPT-4.1-nano", + "modelName": "gpt-4.1-nano", + "capabilities": ["edit", "chat", "autocomplete"], + "category": "speed", + "status": "stable", + "tier": "free", + "contextWindow": { + "maxInputTokens": 77000, + "maxOutputTokens": 16000 + } + }, + { + "modelRef": "azure-openai::unknown::o3-mini", + "displayName": "o3-mini", + "modelName": "o3-mini", + "capabilities": ["chat", "reasoning"], + "category": "accuracy", + "status": "stable", + "tier": "pro", + "contextWindow": { + "maxInputTokens": 68000, + "maxOutputTokens": 100000 + }, + "reasoningEffort": "medium" + }, { "modelRef": "azure-openai::unknown::gpt-35-turbo-instruct-test", "displayName": "GPT-3.5 Turbo Instruct", @@ -328,8 +397,8 @@ In the configuration above, ], "defaultModels": { "chat": "azure-openai::unknown::gpt-4o", - "fastChat": "azure-openai::unknown::gpt-4o", - "codeCompletion": "azure-openai::unknown::gpt-35-turbo-instruct-test" + "fastChat": "azure-openai::unknown::gpt-4.1-nano", + "codeCompletion": "azure-openai::unknown::gpt-4.1-nano" } } ``` @@ -338,9 +407,11 @@ In the configuration above, - Set up a provider override for Azure OpenAI, routing requests for this provider directly to the specified Azure OpenAI endpoint (bypassing Cody Gateway). **Note:** For Azure OpenAI, ensure that the `modelName` matches the name defined in your Azure portal configuration for the model. -- Add two OpenAI models: - - `"azure-openai::unknown::gpt-4o"` with "chat" capability - used for "chat" and "fastChat" - - `"azure-openai::unknown::gpt-35-turbo-instruct-test"` with "autocomplete" capability - used for "autocomplete" +- Add four OpenAI models: + - `"azure-openai::unknown::gpt-4o"` with chat capability - used as a default model for chat + - `"azure-openai::unknown::gpt-4.1-nano"` with chat, edit and autocomplete capabilities - used as a default model for fast chat and autocomplete + - `"azure-openai::unknown::o3-mini"` with chat and reasoning capabilities - o-series model that supports thinking, can be used for chat (note: to enable thinking, model override should include "reasoning" capability and have "reasoningEffort" defined) + - `"azure-openai::unknown::gpt-35-turbo-instruct-test"` with "autocomplete" capability - included as an alternative model - Since `"azure-openai::unknown::gpt-35-turbo-instruct-test"` is not supported on the newer OpenAI `"v1/chat/completions"` endpoint, we set `"useDeprecatedCompletionsAPI"` to `true` to route requests to the legacy `"v1/completions"` endpoint. This setting is unnecessary if you are using a model supported on the `"v1/chat/completions"` endpoint. @@ -499,44 +570,58 @@ In the configuration above, ], "modelOverrides": [ { - "modelRef": "google::unknown::claude-3-5-sonnet", - "displayName": "Claude 3.5 Sonnet (via Google/Vertex)", - "modelName": "claude-3-5-sonnet@20240620", - "contextWindow": { - "maxInputTokens": 45000, - "maxOutputTokens": 4000 - }, - "capabilities": ["chat"], - "category": "accuracy", - "status": "stable" + "modelRef": "google::unknown::claude-3-7-sonnet", + "displayName": "Claude 3.7 Sonnet", + "modelName": "claude-3-7-sonnet", + "capabilities": ["chat", "vision", "tools"], + "category": "accuracy", + "status": "stable", + "contextWindow": { + "maxInputTokens": 132000, + "maxOutputTokens": 8192 + } }, { - "modelRef": "google::unknown::claude-3-haiku", - "displayName": "Claude 3 Haiku", - "modelName": "claude-3-haiku@20240307", - "capabilities": ["autocomplete", "chat"], - "category": "speed", - "status": "stable", - "contextWindow": { - "maxInputTokens": 7000, - "maxOutputTokens": 4000 - } + "modelRef": "google::unknown::claude-3-7-sonnet-extended-thinking", + "displayName": "Claude 3.7 Sonnet Extended Thinking", + "modelName": "claude-3-7-sonnet", + "capabilities": ["chat"], + "category": "accuracy", + "status": "stable", + "contextWindow": { + "maxInputTokens": 93000, + "maxOutputTokens": 64000 + } }, - ], - "defaultModels": { - "chat": "google::unknown::claude-3-5-sonnet", - "fastChat": "google::unknown::claude-3-5-sonnet", - "codeCompletion": "google::unknown::claude-3-haiku" - } + { + "modelRef": "google::unknown::claude-3-5-haiku", + "displayName": "Claude 3.5 Haiku", + "modelName": "claude-3-5-haiku-latest", + "capabilities": ["autocomplete", "edit", "chat", "tools"], + "category": "speed", + "status": "stable", + "contextWindow": { + "maxInputTokens": 132000, + "maxOutputTokens": 8192 + } + } + ], + "defaultModels": { + "chat": "google::unknown::claude-3.5-sonnet", + "fastChat": "google::unknown::claude-3-5-haiku", + "codeCompletion": "google::unknown::claude-3-5-haiku" + } } ``` In the configuration above, - Set up a provider override for Google Anthropic, routing requests for this provider directly to the specified endpoint (bypassing Cody Gateway) -- Add two Anthropic models: - - `"google::unknown::claude-3-5-sonnet"` with "chat" capabiity - used for "chat" and "fastChat" - - `"google::unknown::claude-3-haiku"` with "autocomplete" capability - used for "autocomplete" +- Add three Anthropic models: + - `"google::unknown::claude-3-7-sonnet"` with chat, vision, and tools capabilities + - `"google::unknown::claude-3-7-sonnet-extended-thinking"` with chat and reasoning capabilities (note: to enable [Claude's extended thinking](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking) model override should include "reasoning" capability and have "reasoningEffort" defined) + - `"google::unknown::claude-3-5-haiku"` with autocomplete, edit, chat, and tools capabilities +- Set the configured models as default models for Cody features in the `"defaultModels"` field diff --git a/docs/cody/enterprise/model-configuration.mdx b/docs/cody/enterprise/model-configuration.mdx index ad8717491..a5c1559c0 100644 --- a/docs/cody/enterprise/model-configuration.mdx +++ b/docs/cody/enterprise/model-configuration.mdx @@ -215,7 +215,7 @@ This field is an array of items, each with the following fields: - `${apiVersionId}` specifies the API version, which helps detect compatibility issues between models and Sourcegraph instances. For example, `"2023-06-01"` can indicate that the model uses that version of the Anthropic API. If unsure, you may set this to `"unknown"` when defining custom models - `displayName`: An optional, user-friendly name for the model. If not set, clients should display the `ModelID` part of the `modelRef` instead (not the `modelName`) - `modelName`: A unique identifier the API provider uses to specify which model is being invoked. This is the identifier that the LLM provider recognizes to determine the model you are calling -- `capabilities`: A list of capabilities that the model supports. Supported values: **autocomplete** and **chat** +- `capabilities`: A list of capabilities that the model supports. Supported values: `autocomplete`, `chat`, `vision`, `reasoning`, `edit`, `tools`. - `category`: Specifies the model's category with the following options: - `"balanced"`: Typically the best default choice for most users. This category is suited for models like Sonnet 3.5 (as of October 2024) - `"speed"`: Ideal for low-parameter models that may not suit general-purpose chat but are beneficial for specialized tasks, such as query rewriting @@ -225,6 +225,7 @@ This field is an array of items, each with the following fields: - `contextWindow`: An object that defines the **number of tokens** (units of text) that can be sent to the LLM. This setting influences response time and request cost and may vary according to the limits set by each LLM model or provider. It includes two fields: - `maxInputTokens`: Specifies the maximum number of tokens for the contextual data in the prompt (e.g., question, relevant snippets) - `maxOutputTokens`: Specifies the maximum number of tokens allowed in the response +- `reasoningEffort`: Specifies the effort on reasoning for reasoning models (having `reasoning` capability). Supported values: `high`, `medium`, `low`. - `serverSideConfig`: Additional configuration for the model. It can be one of the following: - `awsBedrockProvisionedThroughput`: Specifies provisioned throughput settings for AWS Bedrock models with the following fields: From 296aaf5a521ae63277e3a46f351bec1e00328203 Mon Sep 17 00:00:00 2001 From: Taras Yemets Date: Tue, 29 Apr 2025 22:38:35 +0300 Subject: [PATCH 02/10] add reasoning fields --- docs/cody/enterprise/model-config-examples.mdx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/cody/enterprise/model-config-examples.mdx b/docs/cody/enterprise/model-config-examples.mdx index ac9f8ee89..e24df0ae7 100644 --- a/docs/cody/enterprise/model-config-examples.mdx +++ b/docs/cody/enterprise/model-config-examples.mdx @@ -585,9 +585,10 @@ In the configuration above, "modelRef": "google::unknown::claude-3-7-sonnet-extended-thinking", "displayName": "Claude 3.7 Sonnet Extended Thinking", "modelName": "claude-3-7-sonnet", - "capabilities": ["chat"], + "capabilities": ["chat", "reasoning"], "category": "accuracy", "status": "stable", + "reasoningEffort": "medium", "contextWindow": { "maxInputTokens": 93000, "maxOutputTokens": 64000 From 05cc4d020ba1f6db151bd0197c7b8f883c05d634 Mon Sep 17 00:00:00 2001 From: Taras Yemets Date: Tue, 29 Apr 2025 22:47:38 +0300 Subject: [PATCH 03/10] Update docs/cody/enterprise/model-config-examples.mdx Co-authored-by: Louis Jarvis <31862633+loujar@users.noreply.github.com> --- docs/cody/enterprise/model-config-examples.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cody/enterprise/model-config-examples.mdx b/docs/cody/enterprise/model-config-examples.mdx index e24df0ae7..b5e7c0d86 100644 --- a/docs/cody/enterprise/model-config-examples.mdx +++ b/docs/cody/enterprise/model-config-examples.mdx @@ -572,7 +572,7 @@ In the configuration above, { "modelRef": "google::unknown::claude-3-7-sonnet", "displayName": "Claude 3.7 Sonnet", - "modelName": "claude-3-7-sonnet", + "modelName": "claude-3-7-sonnet@20250219", "capabilities": ["chat", "vision", "tools"], "category": "accuracy", "status": "stable", From 5b66b19c86f79a3b6e696962acab2f03e398f55b Mon Sep 17 00:00:00 2001 From: Taras Yemets Date: Tue, 29 Apr 2025 22:48:13 +0300 Subject: [PATCH 04/10] Update docs/cody/enterprise/model-config-examples.mdx Co-authored-by: Louis Jarvis <31862633+loujar@users.noreply.github.com> --- docs/cody/enterprise/model-config-examples.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cody/enterprise/model-config-examples.mdx b/docs/cody/enterprise/model-config-examples.mdx index b5e7c0d86..45a0ceffc 100644 --- a/docs/cody/enterprise/model-config-examples.mdx +++ b/docs/cody/enterprise/model-config-examples.mdx @@ -584,7 +584,7 @@ In the configuration above, { "modelRef": "google::unknown::claude-3-7-sonnet-extended-thinking", "displayName": "Claude 3.7 Sonnet Extended Thinking", - "modelName": "claude-3-7-sonnet", + "modelName": "claude-3-7-sonnet@20250219", "capabilities": ["chat", "reasoning"], "category": "accuracy", "status": "stable", From b9c9914835d8bcc49e8b90fdee3f42c5ba8e1d8d Mon Sep 17 00:00:00 2001 From: Taras Yemets Date: Tue, 29 Apr 2025 22:50:23 +0300 Subject: [PATCH 05/10] fix section name --- docs/cody/enterprise/model-config-examples.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/cody/enterprise/model-config-examples.mdx b/docs/cody/enterprise/model-config-examples.mdx index e24df0ae7..2074c3126 100644 --- a/docs/cody/enterprise/model-config-examples.mdx +++ b/docs/cody/enterprise/model-config-examples.mdx @@ -626,7 +626,7 @@ In the configuration above, - + ```json "modelConfiguration": { @@ -645,7 +645,7 @@ In the configuration above, "modelOverrides": [ { "modelRef": "google::unknown::claude-3-5-sonnet", - "displayName": "Claude 3.5 Sonnet (via Google/Vertex)", + "displayName": "Claude 3.5 Sonnet (via Google Vertex)", "modelName": "claude-3-5-sonnet@20240620", "contextWindow": { "maxInputTokens": 45000, From a25c15581c8bee160028459a6b3162dd70df8fa6 Mon Sep 17 00:00:00 2001 From: Taras Yemets Date: Tue, 29 Apr 2025 22:54:18 +0300 Subject: [PATCH 06/10] Update docs/cody/enterprise/model-config-examples.mdx Co-authored-by: Louis Jarvis <31862633+loujar@users.noreply.github.com> --- docs/cody/enterprise/model-config-examples.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cody/enterprise/model-config-examples.mdx b/docs/cody/enterprise/model-config-examples.mdx index 83bfd9085..d12d30902 100644 --- a/docs/cody/enterprise/model-config-examples.mdx +++ b/docs/cody/enterprise/model-config-examples.mdx @@ -597,7 +597,7 @@ In the configuration above, { "modelRef": "google::unknown::claude-3-5-haiku", "displayName": "Claude 3.5 Haiku", - "modelName": "claude-3-5-haiku-latest", + "modelName": "claude-3-5-haiku@20241022", "capabilities": ["autocomplete", "edit", "chat", "tools"], "category": "speed", "status": "stable", From 7a4ce13d46b2f8795517f9a67f5ccc31a28f3543 Mon Sep 17 00:00:00 2001 From: Taras Yemets Date: Tue, 29 Apr 2025 23:10:23 +0300 Subject: [PATCH 07/10] docs: update reasoning effort documentation in model configuration --- docs/cody/enterprise/model-configuration.mdx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/cody/enterprise/model-configuration.mdx b/docs/cody/enterprise/model-configuration.mdx index a5c1559c0..c71ae2f54 100644 --- a/docs/cody/enterprise/model-configuration.mdx +++ b/docs/cody/enterprise/model-configuration.mdx @@ -226,6 +226,9 @@ This field is an array of items, each with the following fields: - `maxInputTokens`: Specifies the maximum number of tokens for the contextual data in the prompt (e.g., question, relevant snippets) - `maxOutputTokens`: Specifies the maximum number of tokens allowed in the response - `reasoningEffort`: Specifies the effort on reasoning for reasoning models (having `reasoning` capability). Supported values: `high`, `medium`, `low`. +How this value is treated depends on the specific provider. +For example, for Anthropic models supporting thinking, `low` effort means that the minimum [`thinking.budget_tokens`](https://docs.anthropic.com/en/api/messages#body-thinking) value (1024) will be used. For other `reasoningEffort` values, the `contextWindow.maxOutputTokens / 2` value will be used. +For OpenAI reasoning models, the `reasoningEffort` field value corresponds to the [`reasoning_effort`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-reasoning_effort) request body value. - `serverSideConfig`: Additional configuration for the model. It can be one of the following: - `awsBedrockProvisionedThroughput`: Specifies provisioned throughput settings for AWS Bedrock models with the following fields: @@ -327,7 +330,7 @@ In this modelOverrides config example: - The model is configured to use the `"chat"` and `"reasoning"` capabilities - The `reasoningEffort` can be set to 3 different options in the Model Config. These options are `high`, `medium` and `low` - The default `reasoningEffort` is set to `low` -- When the reasoning effort is `low`, 1024 tokens is used as the thinking budget. With `medium` and `high` the thinking budget is set via `max_tokens_to_sample/2` +- For Anthropic models supporting thinking, when the reasoning effort is `low`, 1024 tokens is used as the thinking budget. With `medium` and `high` the thinking budget is set to half of the maxOutputTokens value Refer to the [examples page](/cody/enterprise/model-config-examples) for additional examples. From 1d4e8e0bcfb490849e7483fb1144895c1e6ea15c Mon Sep 17 00:00:00 2001 From: Taras Yemets Date: Tue, 29 Apr 2025 23:16:22 +0300 Subject: [PATCH 08/10] maybe improve spacing --- docs/cody/enterprise/model-configuration.mdx | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/cody/enterprise/model-configuration.mdx b/docs/cody/enterprise/model-configuration.mdx index c71ae2f54..8756b3c7e 100644 --- a/docs/cody/enterprise/model-configuration.mdx +++ b/docs/cody/enterprise/model-configuration.mdx @@ -226,6 +226,7 @@ This field is an array of items, each with the following fields: - `maxInputTokens`: Specifies the maximum number of tokens for the contextual data in the prompt (e.g., question, relevant snippets) - `maxOutputTokens`: Specifies the maximum number of tokens allowed in the response - `reasoningEffort`: Specifies the effort on reasoning for reasoning models (having `reasoning` capability). Supported values: `high`, `medium`, `low`. + How this value is treated depends on the specific provider. For example, for Anthropic models supporting thinking, `low` effort means that the minimum [`thinking.budget_tokens`](https://docs.anthropic.com/en/api/messages#body-thinking) value (1024) will be used. For other `reasoningEffort` values, the `contextWindow.maxOutputTokens / 2` value will be used. For OpenAI reasoning models, the `reasoningEffort` field value corresponds to the [`reasoning_effort`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-reasoning_effort) request body value. From d860ad736813b0f83c45e46066dea26f7da495b7 Mon Sep 17 00:00:00 2001 From: Taras Yemets Date: Wed, 30 Apr 2025 13:59:28 +0300 Subject: [PATCH 09/10] use actual versionID in modelRefs --- docs/cody/enterprise/model-config-examples.mdx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/cody/enterprise/model-config-examples.mdx b/docs/cody/enterprise/model-config-examples.mdx index d12d30902..6ab838dd5 100644 --- a/docs/cody/enterprise/model-config-examples.mdx +++ b/docs/cody/enterprise/model-config-examples.mdx @@ -570,7 +570,7 @@ In the configuration above, ], "modelOverrides": [ { - "modelRef": "google::unknown::claude-3-7-sonnet", + "modelRef": "google::20250219::claude-3-7-sonnet", "displayName": "Claude 3.7 Sonnet", "modelName": "claude-3-7-sonnet@20250219", "capabilities": ["chat", "vision", "tools"], @@ -582,7 +582,7 @@ In the configuration above, } }, { - "modelRef": "google::unknown::claude-3-7-sonnet-extended-thinking", + "modelRef": "google::20250219::claude-3-7-sonnet-extended-thinking", "displayName": "Claude 3.7 Sonnet Extended Thinking", "modelName": "claude-3-7-sonnet@20250219", "capabilities": ["chat", "reasoning"], @@ -595,7 +595,7 @@ In the configuration above, } }, { - "modelRef": "google::unknown::claude-3-5-haiku", + "modelRef": "google::20250219::claude-3-5-haiku", "displayName": "Claude 3.5 Haiku", "modelName": "claude-3-5-haiku@20241022", "capabilities": ["autocomplete", "edit", "chat", "tools"], @@ -608,9 +608,9 @@ In the configuration above, } ], "defaultModels": { - "chat": "google::unknown::claude-3.5-sonnet", - "fastChat": "google::unknown::claude-3-5-haiku", - "codeCompletion": "google::unknown::claude-3-5-haiku" + "chat": "google::20250219::claude-3.5-sonnet", + "fastChat": "google::20250219::claude-3-5-haiku", + "codeCompletion": "google::20250219::claude-3-5-haiku" } } ``` From 504c85fac14266b83fbd8e6871c5e3b7b9d04b5e Mon Sep 17 00:00:00 2001 From: Taras Yemets Date: Wed, 30 Apr 2025 14:00:52 +0300 Subject: [PATCH 10/10] fix formatting --- docs/cody/enterprise/model-configuration.mdx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/cody/enterprise/model-configuration.mdx b/docs/cody/enterprise/model-configuration.mdx index 8756b3c7e..78eacca90 100644 --- a/docs/cody/enterprise/model-configuration.mdx +++ b/docs/cody/enterprise/model-configuration.mdx @@ -225,9 +225,7 @@ This field is an array of items, each with the following fields: - `contextWindow`: An object that defines the **number of tokens** (units of text) that can be sent to the LLM. This setting influences response time and request cost and may vary according to the limits set by each LLM model or provider. It includes two fields: - `maxInputTokens`: Specifies the maximum number of tokens for the contextual data in the prompt (e.g., question, relevant snippets) - `maxOutputTokens`: Specifies the maximum number of tokens allowed in the response -- `reasoningEffort`: Specifies the effort on reasoning for reasoning models (having `reasoning` capability). Supported values: `high`, `medium`, `low`. - -How this value is treated depends on the specific provider. +- `reasoningEffort`: Specifies the effort on reasoning for reasoning models (having `reasoning` capability). Supported values: `high`, `medium`, `low`. How this value is treated depends on the specific provider. For example, for Anthropic models supporting thinking, `low` effort means that the minimum [`thinking.budget_tokens`](https://docs.anthropic.com/en/api/messages#body-thinking) value (1024) will be used. For other `reasoningEffort` values, the `contextWindow.maxOutputTokens / 2` value will be used. For OpenAI reasoning models, the `reasoningEffort` field value corresponds to the [`reasoning_effort`](https://platform.openai.com/docs/api-reference/chat/create#chat-create-reasoning_effort) request body value. - `serverSideConfig`: Additional configuration for the model. It can be one of the following: