lordzaharum · lordzaharum · May 19, 2026 · May 19, 2026
diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@@ -532,6 +532,18 @@ async def chat_completion(self, model: str, system: str, user: str, temperature:
                 # Support for custom OpenAI body fields (e.g., Flex Processing)
                 kwargs = _process_litellm_extra_body(kwargs)
 
+                # Support for Anthropic prompt caching via LiteLLM's cache_control_injection_points
+                # (https://docs.litellm.ai/docs/tutorials/prompt_caching). Configurable as a JSON
+                # array in [litellm] section of configuration.toml or .pr_agent.toml.
+                if get_settings().get("LITELLM.CACHE_CONTROL_INJECTION_POINTS", None):
+                    try:
+                        cache_points = json.loads(get_settings().litellm.cache_control_injection_points)
+                        if not isinstance(cache_points, list):
+                            raise ValueError("LITELLM.CACHE_CONTROL_INJECTION_POINTS must be a JSON array")
+                        kwargs["cache_control_injection_points"] = cache_points
+                    except json.JSONDecodeError as e:
+                        raise ValueError(f"LITELLM.CACHE_CONTROL_INJECTION_POINTS contains invalid JSON: {str(e)}")
+
                 # Support for Bedrock custom inference profile via model_id
                 model_id = get_settings().get("litellm.model_id")
                 if model_id and 'bedrock/' in model:

diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
@@ -326,6 +326,9 @@ success_callback = []
 failure_callback = []
 service_callback = []
 # model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock
+# cache_control_injection_points = "" # Optional: JSON array enabling Anthropic prompt caching via LiteLLM
+# Example: cache_control_injection_points = '[{"location": "message", "role": "system"}]'
+# See https://docs.litellm.ai/docs/tutorials/prompt_caching
 
 [pr_similar_issue]
 skip_comments = false