forked from nesquena/hermes-webui
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.py
More file actions
518 lines (456 loc) · 22 KB
/
server.py
File metadata and controls
518 lines (456 loc) · 22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
"""
Hermes Web UI -- Main server entry point.
Thin routing shell: imports Handler, delegates to api/routes.py, runs server.
All business logic lives in api/*.
"""
import logging
import os
import re
import socket
import sys
import time
import traceback
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
# ── Test-mode network isolation ─────────────────────────────────────────────
# When `HERMES_WEBUI_TEST_NETWORK_BLOCK=1` is set in the environment, refuse
# outbound socket connections to anything that is not loopback / RFC1918 /
# link-local / reserved-TLD. This catches accidental real outbound (forgotten
# mocks, leaked credentials triggering SDK init, new code paths bypassing an
# existing mock) so the test suite stays hermetic and fast.
#
# tests/conftest.py sets this env var on every test_server subprocess so the
# server.py-side network isolation matches the pytest-process-side isolation
# already installed there.
#
# A test that legitimately needs real outbound spawns the server with the env
# var unset (no current callers — every test_server-using test should be
# mockable).
if os.environ.get("HERMES_WEBUI_TEST_NETWORK_BLOCK", "").strip() in ("1", "true", "yes"):
_REAL_CREATE_CONN = socket.create_connection
_REAL_SOCK_CONNECT = socket.socket.connect
import re as _re
def _re_match_unique_local_ipv6(h):
"""Match IPv6 fc00::/7 (canonical syntax). Tighter than startswith('fc')
so we don't mistakenly classify hostnames like 'food.example.com' as local."""
return bool(_re.match(r"^f[cd][0-9a-f]{0,2}:", h))
def _addr_is_local(host):
if not isinstance(host, str):
return False
h = host.strip().lower()
if not h:
return False
# IPv6 unique-local fc00::/7: require hex pair + colon to avoid
# matching hostnames like "food.example.com" or "fdsa.test".
if h in ("::1", "0:0:0:0:0:0:0:1") or h.startswith("fe80:") or _re_match_unique_local_ipv6(h):
return True
if h == "localhost" or h.endswith(".localhost"):
return True
if h.endswith(".local") or h.endswith(".test") or h.endswith(".invalid"):
return True
if h == "example.com" or h.endswith(".example.com"):
return True
if h == "example.net" or h.endswith(".example.net"):
return True
if h == "example.org" or h.endswith(".example.org"):
return True
if h.endswith(".example"):
return True
if h and h[0].isdigit() and h.count(".") == 3:
try:
o1, o2, o3, o4 = [int(p) for p in h.split(".")]
except ValueError:
return False
if o1 == 127:
return True
if o1 == 10:
return True
if o1 == 192 and o2 == 168:
return True
if o1 == 172 and 16 <= o2 <= 31:
return True
if o1 == 169 and o2 == 254:
return True
if o1 == 203 and o2 == 0 and o3 == 113:
return True
return False
def _blocked_create_connection(address, *a, **kw):
try:
host = address[0]
except (TypeError, IndexError):
host = ""
if _addr_is_local(host):
return _REAL_CREATE_CONN(address, *a, **kw)
raise OSError(
f"hermes test network isolation (server.py): outbound to {address!r} blocked"
)
def _blocked_socket_connect(self, address):
try:
host = address[0]
except (TypeError, IndexError):
host = ""
if _addr_is_local(host):
return _REAL_SOCK_CONNECT(self, address)
raise OSError(
f"hermes test network isolation (server.py): socket.connect to {address!r} blocked"
)
socket.create_connection = _blocked_create_connection
socket.socket.connect = _blocked_socket_connect
try:
import resource
except ImportError: # pragma: no cover - resource is Unix-only
resource = None
from urllib.parse import urlparse
logger = logging.getLogger(__name__)
_CSP_CONNECT_BASE = (
"'self' http://127.0.0.1:* http://localhost:* "
"ws://127.0.0.1:* ws://localhost:*"
)
_CSP_EXTRA_CONNECT_RE = re.compile(
r"^(?:https?|wss?)://(?:\*\.)?[A-Za-z0-9._~-]+(?::(?P<port>\d{1,5}|\*))?$"
)
def _valid_csp_extra_connect_source(source: str) -> bool:
match = _CSP_EXTRA_CONNECT_RE.fullmatch(source)
if not match:
return False
port = match.group("port")
if not port or port == "*":
return True
try:
return 1 <= int(port) <= 65535
except ValueError:
return False
def _csp_extra_connect_src() -> str:
raw = os.getenv("HERMES_WEBUI_CSP_CONNECT_EXTRA", "").strip()
if not raw:
return ""
sources = raw.split()
if not sources or any(not _valid_csp_extra_connect_source(src) for src in sources):
logger.warning("Ignoring invalid HERMES_WEBUI_CSP_CONNECT_EXTRA value")
return ""
return " " + " ".join(sources)
def _build_csp_report_only_policy() -> str:
connect_src = _CSP_CONNECT_BASE + _csp_extra_connect_src()
return (
"default-src 'self'; "
"base-uri 'self'; "
"object-src 'none'; "
"frame-ancestors 'self'; "
"script-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; "
"style-src 'self' 'unsafe-inline' https://cdn.jsdelivr.net; "
"img-src 'self' data: blob:; "
"font-src 'self' data:; "
"media-src 'self' data: blob:; "
f"connect-src {connect_src}; "
"report-uri /api/csp-report; report-to csp-endpoint"
)
from api.auth import check_auth
from api.config import HOST, PORT, STATE_DIR, SESSION_DIR, DEFAULT_WORKSPACE
from api.helpers import j, get_profile_cookie
from api.profiles import set_request_profile, clear_request_profile
from api.routes import handle_delete, handle_get, handle_patch, handle_post, handle_put
from api.startup import auto_install_agent_deps, fix_credential_permissions
from api.updates import WEBUI_VERSION
class QuietHTTPServer(ThreadingHTTPServer):
"""Custom HTTP server that silently handles common network errors."""
daemon_threads = True
request_queue_size = 64
def __init__(self, *args, **kwargs):
server_address = args[0] if args else kwargs.get('server_address', None)
if server_address and ':' in server_address[0]:
self.address_family = socket.AF_INET6
super().__init__(*args, **kwargs)
self.accept_loop_requests_total = 0
self.accept_loop_last_request_at = 0.0
def _handle_request_noblock(self):
"""Record accept-loop progress before dispatching a request handler.
A process can be alive and still stop accepting/dispatching requests.
Exposing this heartbeat on /health gives supervisors and watchdogs a
cheap signal that the accept loop is still moving.
Note: this method is called only from the single ``serve_forever()``
thread in CPython socketserver, so the un-locked ``+=`` increment is
safe — there is no other thread mutating these counters. The /health
readers may see a stale value momentarily but never an inconsistent
one (Python int reads are atomic). Per Opus advisor on stage-297.
"""
self.accept_loop_requests_total += 1
self.accept_loop_last_request_at = time.time()
return super()._handle_request_noblock()
def handle_error(self, request, client_address):
"""Override to suppress logging for common client disconnect errors."""
exc_type, exc_value, _ = sys.exc_info()
# Silently ignore common connection errors caused by client disconnects
if exc_type in (ConnectionResetError, BrokenPipeError, ConnectionAbortedError, TimeoutError):
return
# Also handle socket errors that indicate client disconnect
if issubclass(exc_type, OSError):
# errno 54 is Connection reset by peer on macOS/BSD
# errno 104 is Connection reset by peer on Linux
if getattr(exc_value, 'errno', None) in (32, 54, 104, 110): # EPIPE, ECONNRESET, ETIMEDOUT
return
# For other errors, use default logging
super().handle_error(request, client_address)
class Handler(BaseHTTPRequestHandler):
# HTTP/1.1 enables keep-alive connection reuse — major latency win on
# high-RTT links where every saved TCP handshake is 2×RTT. Each response
# MUST declare framing (Content-Length, Transfer-Encoding: chunked, or
# Connection: close) so the client knows where the message ends. Helpers
# j()/t() emit Content-Length; SSE/streaming endpoints emit
# Connection: close because the body has no terminator. See PR notes.
protocol_version = "HTTP/1.1"
timeout = 30 # seconds — kills idle/incomplete connections to prevent thread exhaustion
def setup(self):
"""Set socket options for each accepted connection."""
super().setup()
# TCP_NODELAY — universal, disables Nagle for HTTP latency
try:
self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
except OSError:
pass
# SO_KEEPALIVE — universal master switch (must be set before timing params)
try:
self.connection.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
except OSError:
pass
# Per-platform timing parameters
if hasattr(socket, 'TCP_KEEPIDLE'): # Linux
try:
self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 10)
self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 5)
self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 3)
except OSError:
pass
elif hasattr(socket, 'TCP_KEEPALIVE'): # macOS
try:
self.connection.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, 10)
except OSError:
pass
_ver_suffix = WEBUI_VERSION.removeprefix('v')
server_version = ('HermesWebUI/' + _ver_suffix) if _ver_suffix != 'unknown' else 'HermesWebUI'
_CSP_REPORT_TO = '{"group":"csp-endpoint","max_age":10886400,"endpoints":[{"url":"/api/csp-report"}]}'
@classmethod
def csp_report_only_policy(cls) -> str:
return _build_csp_report_only_policy()
def end_headers(self) -> None:
self.send_header("Content-Security-Policy-Report-Only", self.csp_report_only_policy())
self.send_header("Report-To", self._CSP_REPORT_TO)
super().end_headers()
def log_message(self, fmt, *args): pass # suppress default Apache-style log
def log_request(self, code: str='-', size: str='-') -> None:
"""Structured JSON logs for each request."""
import json as _json
duration_ms = round((time.time() - getattr(self, '_req_t0', time.time())) * 1000, 1)
record = _json.dumps({
'ts': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime()),
'method': getattr(self, 'command', None) or '-',
'path': getattr(self, 'path', None) or '-',
'status': int(code) if str(code).isdigit() else code,
'ms': duration_ms,
})
print(f'[webui] {record}', flush=True)
def do_GET(self) -> None:
self._req_t0 = time.time()
# Per-request profile context from cookie (issue #798)
cookie_profile = get_profile_cookie(self)
if cookie_profile:
set_request_profile(cookie_profile)
try:
parsed = urlparse(self.path)
if not check_auth(self, parsed): return
result = handle_get(self, parsed)
if result is False:
return j(self, {'error': 'not found'}, status=404)
except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError):
# The browser/client closed the socket while we were writing the
# response. This is expected for probes, tab closes, and SSE
# reconnect races; do not convert it into a misleading server 500.
return
except Exception as e:
print(f'[webui] ERROR {self.command} {self.path}\n' + traceback.format_exc(), flush=True)
return j(self, {'error': 'Internal server error'}, status=500)
finally:
clear_request_profile()
def _handle_write(self, route_func) -> None:
self._req_t0 = time.time()
# Per-request profile context from cookie (issue #798)
cookie_profile = get_profile_cookie(self)
if cookie_profile:
set_request_profile(cookie_profile)
try:
parsed = urlparse(self.path)
# Stage-346 Opus SHOULD-FIX defense-in-depth: scope the CSP-report
# auth carve-out to POST only. The endpoint is intentionally
# unauthenticated (browsers omit cookies on CSP reports), but the
# carve-out should not extend to PATCH/DELETE on that path even
# though they currently fail through CSRF/routing fallthrough.
_is_csp_report_post = (
parsed.path == "/api/csp-report" and self.command == "POST"
)
if not _is_csp_report_post and not check_auth(self, parsed): return
result = route_func(self, parsed)
if result is False:
return j(self, {'error': 'not found'}, status=404)
except (BrokenPipeError, ConnectionResetError, ConnectionAbortedError):
# The browser/client closed the socket while we were writing the
# response. This is expected for probes, tab closes, and SSE
# reconnect races; do not convert it into a misleading server 500.
return
except Exception as e:
print(f'[webui] ERROR {self.command} {self.path}\n' + traceback.format_exc(), flush=True)
return j(self, {'error': 'Internal server error'}, status=500)
finally:
clear_request_profile()
def do_POST(self) -> None:
self._handle_write(handle_post)
def do_PUT(self) -> None:
self._handle_write(handle_put)
def do_PATCH(self) -> None:
self._handle_write(handle_patch)
def do_OPTIONS(self) -> None:
"""Handle CORS preflight requests."""
self._req_t0 = time.time()
self.send_response(200)
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Access-Control-Allow-Methods", "GET, POST, PUT, PATCH, DELETE, OPTIONS")
self.send_header("Access-Control-Allow-Headers", "Content-Type, Authorization")
self.end_headers()
def do_DELETE(self) -> None:
self._handle_write(handle_delete)
def _raise_fd_soft_limit(target: int = 4096) -> dict:
"""Best-effort raise of RLIMIT_NOFILE for persistent WebUI hosts.
macOS launchd jobs often start with a 256 soft limit. If a future FD leak
regresses, that low ceiling turns a leak into a hard HTTP wedge quickly.
Raising the soft limit does not hide leaks; it buys enough headroom for
diagnostics and watchdog recovery.
"""
if resource is None:
return {"status": "unsupported"}
try:
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
except Exception as exc:
return {"status": "error", "error": str(exc)}
# On Unix, RLIM_INFINITY is commonly a large int; keep the logic explicit
# so tests can use ordinary integers without depending on platform values.
desired = int(target)
if hard not in (-1, getattr(resource, "RLIM_INFINITY", object())):
desired = min(desired, int(hard))
if soft >= desired:
return {"status": "unchanged", "soft": soft, "hard": hard}
try:
resource.setrlimit(resource.RLIMIT_NOFILE, (desired, hard))
except Exception as exc:
return {"status": "error", "soft": soft, "hard": hard, "error": str(exc)}
return {"status": "raised", "soft": desired, "hard": hard, "previous_soft": soft}
def main() -> None:
from api.config import print_startup_config, verify_hermes_imports, _HERMES_FOUND
print_startup_config()
fd_limit = _raise_fd_soft_limit()
if fd_limit.get("status") == "raised":
print(
f"[ok] Raised file descriptor soft limit "
f"{fd_limit.get('previous_soft')} -> {fd_limit.get('soft')}",
flush=True,
)
elif fd_limit.get("status") == "error":
print(f"[!!] WARNING: Could not raise file descriptor limit: {fd_limit.get('error')}", flush=True)
# Fix sensitive file permissions before doing anything else
fix_credential_permissions()
# ── #1558 startup self-heal ─────────────────────────────────────────
# If a previous process wrote a session JSON with fewer messages than
# its .bak (the data-loss shape #1558 produced), restore from the .bak.
# Safe to run unconditionally — a clean install is a no-op.
try:
from api.models import _active_state_db_path
from api.session_recovery import recover_all_sessions_on_startup
result = recover_all_sessions_on_startup(
SESSION_DIR,
rebuild_index=True,
state_db_path=_active_state_db_path(),
)
if result.get("restored"):
print(f"[recovery] Restored {result['restored']}/{result['scanned']} sessions from .bak (see #1558).", flush=True)
except Exception as exc:
# Recovery is best-effort; never block server startup.
print(f"[recovery] startup recovery failed: {exc}", flush=True)
within_container = False
# Check for the "/.within_container" file to determine if we're running inside a container; this file is created in the Dockerfile
try:
with open('/.within_container', 'r') as f:
within_container = True
except FileNotFoundError:
pass
if within_container:
print('[ok] Running within container.', flush=True)
# Security: warn if binding non-loopback without authentication
from api.auth import is_auth_enabled
if HOST not in ('127.0.0.1', '::1', 'localhost') and not is_auth_enabled():
print(f'[!!] WARNING: Binding to {HOST} with NO PASSWORD SET.', flush=True)
print(f' Anyone on the network can access your filesystem and agent.', flush=True)
print(f' Set a password via Settings or HERMES_WEBUI_PASSWORD env var.', flush=True)
print(f' To suppress: bind to 127.0.0.1 or set a password.', flush=True)
if within_container:
print(f' Note: You are running within a container, must bind to 0.0.0.0 (IPv4) or :: (IPv6) to publish the port.', flush=True)
elif not is_auth_enabled():
print(f' [tip] No password set. Any process on this machine can read sessions', flush=True)
print(f' and memory via the local API. Set HERMES_WEBUI_PASSWORD to', flush=True)
print(f' enable authentication.', flush=True)
ok, missing, errors = verify_hermes_imports()
if not ok and _HERMES_FOUND:
print(f'[!!] Warning: Hermes agent found but missing modules: {missing}', flush=True)
for mod, err in errors.items():
print(f' {mod}: {err}', flush=True)
print(' Attempting to install missing dependencies from agent requirements.txt...', flush=True)
auto_install_agent_deps()
ok, missing, errors = verify_hermes_imports()
if not ok:
print(f'[!!] Still missing after install attempt: {missing}', flush=True)
for mod, err in errors.items():
print(f' {mod}: {err}', flush=True)
print(' Agent features may not work correctly.', flush=True)
else:
print('[ok] Agent dependencies installed successfully.', flush=True)
STATE_DIR.mkdir(parents=True, exist_ok=True)
SESSION_DIR.mkdir(parents=True, exist_ok=True)
DEFAULT_WORKSPACE.mkdir(parents=True, exist_ok=True)
# Start the gateway session watcher for real-time SSE updates
try:
from api.gateway_watcher import start_watcher
start_watcher()
except Exception as e:
print(f'[!!] WARNING: Gateway watcher failed to start: {e}', flush=True)
httpd = QuietHTTPServer((HOST, PORT), Handler)
# ── TLS/HTTPS setup (optional) ─────────────────────────────────────────
from api.config import TLS_ENABLED, TLS_CERT, TLS_KEY
scheme = 'https' if TLS_ENABLED else 'http'
if TLS_ENABLED:
try:
import ssl
ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
ctx.minimum_version = ssl.TLSVersion.TLSv1_2
ctx.load_cert_chain(TLS_CERT, TLS_KEY)
httpd.socket = ctx.wrap_socket(httpd.socket, server_side=True)
print(f' TLS enabled: cert={TLS_CERT}, key={TLS_KEY}', flush=True)
except Exception as e:
print(f'[!!] WARNING: TLS setup failed ({e}), falling back to HTTP', flush=True)
scheme = 'http'
print(f' Hermes Web UI listening on {scheme}://{HOST}:{PORT}', flush=True)
if HOST in ('127.0.0.1', '::1') or within_container:
print(f' Remote access: ssh -N -L {PORT}:127.0.0.1:{PORT} <user>@<your-server>', flush=True)
print(f' Then open: {scheme}://localhost:{PORT}', flush=True)
print('', flush=True)
try:
httpd.serve_forever()
finally:
# Stop the gateway watcher on shutdown
try:
from api.gateway_watcher import stop_watcher
stop_watcher()
except Exception:
logger.debug("Failed to stop gateway watcher during shutdown")
# Drain pending memory-provider lifecycle commits before exit
try:
from api.session_lifecycle import drain_all_on_shutdown
drain_all_on_shutdown()
except Exception:
logger.debug("Failed to drain lifecycle on shutdown", exc_info=True)
if __name__ == '__main__':
main()