diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index a6e79d7a07..42889fbb40 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -532,6 +532,18 @@ async def chat_completion(self, model: str, system: str, user: str, temperature: # Support for custom OpenAI body fields (e.g., Flex Processing) kwargs = _process_litellm_extra_body(kwargs) + # Support for Anthropic prompt caching via LiteLLM's cache_control_injection_points + # (https://docs.litellm.ai/docs/tutorials/prompt_caching). Configurable as a JSON + # array in [litellm] section of configuration.toml or .pr_agent.toml. + if get_settings().get("LITELLM.CACHE_CONTROL_INJECTION_POINTS", None): + try: + cache_points = json.loads(get_settings().litellm.cache_control_injection_points) + if not isinstance(cache_points, list): + raise ValueError("LITELLM.CACHE_CONTROL_INJECTION_POINTS must be a JSON array") + kwargs["cache_control_injection_points"] = cache_points + except json.JSONDecodeError as e: + raise ValueError(f"LITELLM.CACHE_CONTROL_INJECTION_POINTS contains invalid JSON: {str(e)}") + # Support for Bedrock custom inference profile via model_id model_id = get_settings().get("litellm.model_id") if model_id and 'bedrock/' in model: diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index f4d63a73f2..f780720168 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -326,6 +326,9 @@ success_callback = [] failure_callback = [] service_callback = [] # model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock +# cache_control_injection_points = "" # Optional: JSON array enabling Anthropic prompt caching via LiteLLM +# Example: cache_control_injection_points = '[{"location": "message", "role": "system"}]' +# See https://docs.litellm.ai/docs/tutorials/prompt_caching [pr_similar_issue] skip_comments = false