-
Notifications
You must be signed in to change notification settings - Fork 15
Expand file tree
/
Copy pathsettings.json
More file actions
61 lines (45 loc) · 2.25 KB
/
settings.json
File metadata and controls
61 lines (45 loc) · 2.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
{
"session": {
"affinity_enabled": true,
"id_header": "X-Session-ID",
"__ttl_hours_comment": "How long to keep session affinity (in hours)",
"ttl_hours": 24,
"__max_sessions_comment": "Max sessions to keep in memory before pruning",
"max_sessions": 1000
},
"context": {
"__management_mode_options": "Available modes: static, dynamic, reservoir, adaptive, disabled",
"management_mode": "reservoir",
"__static_recent_keep_comment": "Number of recent messages to keep verbatim",
"static_recent_keep": 10,
"__dynamic_utilization_target_comment": "Target % of context tokens to aim for",
"dynamic_utilization_target": 0.8,
"__dynamic_min_utilization_comment": "Minimum % of context tokens to use",
"dynamic_min_utilization": 0.3,
"__dynamic_max_boost_comment": "Maximum multiplier for dynamic boost",
"dynamic_max_boost": 1.5,
"__reservoir_recent_keep_comment": "Recent messages to keep verbatim in reservoir mode",
"reservoir_recent_keep": 15,
"__reservoir_summary_budget_comment": "Token budget for summarization in reservoir mode",
"reservoir_summary_budget": 400
},
"summarization": {
"enabled": true,
"mode": "extractive",
"__max_tokens_comment": "Max tokens for generated summary",
"max_tokens": 200,
"__timeout_ms_comment": "Timeout for summarization in milliseconds",
"timeout_ms": 2000,
"fallback_to_extractive": true
},
"routing": {
"global_provider_lock": true,
"__comment": "Ensures only one request per provider at a time, eliminates 429 errors",
"__use_server_side_system_prompt_comment": "When true (default), the server always injects its STANDARD_SYSTEM_PROMPT + style_directive and ignores the client's system message. When false, the client's messages array is forwarded verbatim, the client's own system message (if any) is respected, and no server-side prompts are added. Set to false for agent frameworks (LangChain, AutoGen, CrewAI) that need full control over their system prompt and message ordering. Can be overridden per-request via the X-Use-ServerSide-System-Prompt header.",
"use_server_side_system_prompt": true
},
"http": {
"timeout_seconds": 60,
"__timeout_seconds_comment": "HTTP request timeout in seconds"
}
}