Project-Go-Forward/main.py at main · arigatoexpress/Project-Go-Forward · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
FastAPI Application — Config-Driven AI Agent Server

Serves the AI agent backend and static frontend.
All business-specific config is loaded from config.yaml.
Admin routes use `X-Admin-Token` or `Authorization: Bearer <token>`;
partner integrations live under `/api/v1/*` and authenticate with `THO_API_KEY`.
"""
# ruff: noqa: E402

import os

# Configure Vertex AI before importing any ADK modules
os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "TRUE"

import asyncio
import hashlib
import json
import logging
import re
import secrets
import threading
import time
import uuid
from collections import defaultdict, deque
from datetime import UTC, datetime, timedelta
from json import JSONDecodeError
from urllib.parse import urlsplit, urlunsplit

import uvicorn
from fastapi import Depends, FastAPI, File, HTTPException, Request, UploadFile
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, JSONResponse, RedirectResponse, Response
from fastapi.staticfiles import StaticFiles
from slowapi import Limiter
from slowapi.errors import RateLimitExceeded
from slowapi.middleware import SlowAPIMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.middleware.gzip import GZipMiddleware

import caching
from auth.routes import router as passkey_router
from auth.session import SESSION_COOKIE_NAME as PASSKEY_COOKIE_NAME
from auth.session import SessionManager
from config_loader import business_name, get_deployment_config

# Lazy initialization placeholders - will be loaded on first use
_adk_app = None
_runner = None
_vertexai_initialized = False
_root_agent = None


def _init_vertex_ai():
    """Lazy initialization of Vertex AI."""
    global _vertexai_initialized
    if _vertexai_initialized:
        return
    try:
        import vertexai

        deploy_cfg = get_deployment_config()
        project_id = os.environ.get(
            "GOOGLE_CLOUD_PROJECT", deploy_cfg.get("project_id", "tho-ai-agent")
        )
        location = os.environ.get("GOOGLE_CLOUD_LOCATION", deploy_cfg.get("region", "us-central1"))
        vertexai.init(project=project_id, location=location)
        _vertexai_initialized = True
        logger.info("Vertex AI initialized successfully")
    except Exception as e:
        logger.warning(f"Vertex AI initialization failed: {e}")
        # Don't raise - allow server to start without AI


def _get_runner():
    """Lazy initialization of ADK runner."""
    global _adk_app, _runner, _root_agent
    if _runner is None:
        try:
            _init_vertex_ai()
            from google.adk.apps import App
            from google.adk.runners import InMemoryRunner

            from root_agent import root_agent

            _root_agent = root_agent
            _adk_app = App(name="root_agent", root_agent=_root_agent)
            _runner = InMemoryRunner(app=_adk_app)
            logger.info("ADK Runner initialized successfully")
        except Exception as e:
            logger.error(f"Failed to initialize ADK runner: {e}")
            raise RuntimeError("AI services not available. Please try again later.")
    return _runner


from appointment_manager import Appointment, AppointmentManager
from audit_log import (
    ALLOWED_ACTIONS as AUDIT_ALLOWED_ACTIONS,
)
from audit_log import (
    ALLOWED_TARGET_TYPES as AUDIT_ALLOWED_TARGET_TYPES,
)
from audit_log import (
    log_admin_action,
    query_audit_log,
)
from chat_history import ChatHistory
from conversation_memory import ConversationMemory
from docuseal_service import (
    maybe_trigger_automated_signing as docuseal_auto_trigger,
)
from docuseal_service import (
    send_file_for_signature as docuseal_send_file_for_signature,
)
from docuseal_service import (
    send_for_signature as docuseal_send_for_signature,
)
from email_service import (
    get_email_log,
    notify_new_appointment,
    notify_new_lead,
    send_appointment_confirmation,
    send_custom_email,
    send_deal_status_update,
    send_document_email,
    send_lead_welcome,
)
from lead_management import Lead, LeadManager
from structured_logging import logger as struct_logger
from tools.input_sanitizer import sanitize_body
from tools.pii_guard import redact_pii_from_text, validate_no_pii_in_text


def _safe_audit(action: str, details: dict) -> None:
    """Wrap audit_log to never raise into the request hot path."""
    try:
        log_admin_action(
            actor="system",  # Background email actions are system-actor
            action=action,
            target_type="document",
            details=details,
            request=None,
        )
    except Exception as exc:  # noqa: BLE001
        try:
            struct_logger.warning("Audit log write failed", action=action, error=str(exc))
        except Exception:
            pass


def _maybe_email_document(
    *,
    customer_email,
    customer_name,
    doc_filename: str,
    doc_type: str,
    download_url: str,
    deal_id=None,
    audit_action: str = "document.email_delivery",
) -> None:
    """Best-effort document delivery email.

    Silently skips when the customer email is missing. Catches and warn-logs
    every failure so the surrounding doc-generation request never breaks
    because email is flaky.
    """
    if not customer_email:
        try:
            struct_logger.info(
                "Document email skipped — no customer email",
                doc_filename=doc_filename,
                deal_id=deal_id,
            )
        except Exception:
            pass
        return

    try:
        result = send_document_email(
            to=customer_email,
            customer_name=customer_name or "",
            doc_filename=doc_filename,
            doc_type=doc_type,
            download_url=download_url,
            deal_id=deal_id,
        )
    except Exception as exc:  # noqa: BLE001
        try:
            struct_logger.warning(
                "Document email send raised",
                error=str(exc),
                doc_filename=doc_filename,
                deal_id=deal_id,
            )
        except Exception:
            pass
        return

    _safe_audit(
        audit_action,
        {
            "to": customer_email,
            "doc_filename": doc_filename,
            "doc_type": doc_type,
            "deal_id": deal_id,
            "delivery": "ok" if result.get("success") else "failed",
            "error": result.get("error"),
        },
    )

    if not result.get("success"):
        try:
            struct_logger.warning(
                "Document email send returned non-success",
                error=result.get("error"),
                doc_filename=doc_filename,
                deal_id=deal_id,
            )
        except Exception:
            pass


# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
APP_STARTED_AT = time.monotonic()

# Sentry error tracking — opt-in; no-op when SENTRY_DSN is absent or under pytest.
if os.environ.get("SENTRY_DSN") and not os.environ.get("PYTEST_CURRENT_TEST"):
    import re as _sentry_re

    import sentry_sdk

    _SENTRY_SSN_RE = _sentry_re.compile(r"\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b")
    _SENTRY_EMAIL_RE = _sentry_re.compile(r"\b[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}\b")

    def _sentry_before_send(event, hint):
        # Drop health-probe events — noisy and quota-wasting.
        url = (event.get("request") or {}).get("url", "")
        if "/healthz" in url or url.rstrip("/").endswith("/health"):
            return None
        # Scrub PII from request body (mirrors tools/pii_guard.py patterns).
        body = (event.get("request") or {}).get("data", "")
        if isinstance(body, str):
            body = _SENTRY_SSN_RE.sub("[SSN-REDACTED]", body)
            body = _SENTRY_EMAIL_RE.sub("[EMAIL-REDACTED]", body)
            event.setdefault("request", {})["data"] = body
        return event

    def _sentry_traces_sampler(ctx):
        # Exclude health probes from performance tracing.
        path = (ctx.get("asgi_scope") or {}).get("path", "")
        if path.startswith("/health"):
            return 0
        # Conservative 0.05 default; tunable via env without a redeploy (e.g.
        # raise during an incident, set 0.0 to disable). Clamped to [0,1]; a
        # malformed value falls back to the default. Only reached when
        # SENTRY_DSN is set, so the no-op-when-unconfigured contract holds.
        try:
            rate = float(os.environ.get("SENTRY_TRACES_SAMPLE_RATE", "0.05"))
        except (TypeError, ValueError):
            rate = 0.05
        return min(max(rate, 0.0), 1.0)

    sentry_sdk.init(
        dsn=os.environ["SENTRY_DSN"],
        environment=os.environ.get("K_REVISION", "local"),
        release=os.environ.get("APP_VERSION", "local"),
        traces_sampler=_sentry_traces_sampler,
        profiles_sample_rate=0.0,
        send_default_pii=False,
        before_send=_sentry_before_send,
    )
    logger.info("Sentry initialized (environment=%s)", os.environ.get("K_REVISION", "local"))

# Disable FastAPI's auto-docs (/openapi.json, /docs, /redoc) in Cloud Run.
# These endpoints expose the full API surface — every admin route, every
# operation ID, every path parameter — to anonymous attackers, which makes
# enumeration and targeted abuse trivial. They remain available locally so
# developers can still spelunk the surface in dev. Override with
# `EXPOSE_API_DOCS=1` if you really need them on a deployed env.
_EXPOSE_API_DOCS = os.environ.get("EXPOSE_API_DOCS", "0") == "1"
_DOCS_ENABLED = _EXPOSE_API_DOCS or os.environ.get("K_SERVICE") is None
app = FastAPI(
    title=f"{business_name()} AI Agent",
    openapi_url="/openapi.json" if _DOCS_ENABLED else None,
    docs_url="/docs" if _DOCS_ENABLED else None,
    redoc_url="/redoc" if _DOCS_ENABLED else None,
)
LLMS_TXT_PATH = os.path.join(os.path.dirname(__file__), "llms.txt")

# Initialize services (these don't require Vertex AI)
deploy_cfg = get_deployment_config()
project_id = os.environ.get("GOOGLE_CLOUD_PROJECT", deploy_cfg.get("project_id", "tho-ai-agent"))
conversation_memory = ConversationMemory(project_id=project_id)
chat_history = ChatHistory(project_id=project_id)
lead_manager = LeadManager(project_id=project_id)
appointment_manager = AppointmentManager(project_id=project_id)


# Security headers middleware
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
    async def dispatch(self, request, call_next):
        response = await call_next(request)
        response.headers["X-Content-Type-Options"] = "nosniff"
        response.headers["X-Frame-Options"] = "DENY"
        response.headers["X-XSS-Protection"] = "1; mode=block"
        response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
        response.headers["Permissions-Policy"] = "camera=(), microphone=(), geolocation=()"
        # CSP: allow self, inline styles (Tailwind), Google Fonts, Matterport
        # iframes, CloudFront CDN images. Vendor analytics/pixel script+connect
        # hosts are appended ONLY for IDs that are validly set (see
        # seo_routes.analytics_csp_sources, same _clean_id gate as the snippet);
        # with none set this header is byte-identical to the static baseline.
        import seo_routes

        _csp = [
            ("default-src", ["'self'"]),
            ("script-src", ["'self'", "'unsafe-inline'"]),
            ("style-src", ["'self'", "'unsafe-inline'", "https://fonts.googleapis.com"]),
            ("img-src", ["'self'", "https://d132mt2yijm03y.cloudfront.net", "https:", "data:"]),
            ("frame-src", ["https://my.matterport.com"]),
            ("connect-src", ["'self'"]),
            ("font-src", ["'self'", "https://fonts.gstatic.com", "data:"]),
            ("frame-ancestors", ["'none'"]),
        ]
        _extra = seo_routes.analytics_csp_sources()
        if _extra:
            for _name, _hosts in _csp:
                for _h in _extra.get(_name, []):
                    if _h not in _hosts:
                        _hosts.append(_h)
        response.headers["Content-Security-Policy"] = "; ".join(
            f"{_name} {' '.join(_hosts)}" for _name, _hosts in _csp
        )
        # HSTS: enforce HTTPS for 1 year (only effective on HTTPS connections)
        response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
        return response


def _get_client_ip(request: Request) -> str:
    """Get real client IP, checking X-Forwarded-For for reverse proxy (Cloud Run).

    Reused as the slowapi ``key_func`` so per-IP rate limiting and the
    Redis-backed brute-force counter key the same client identity.
    """
    forwarded = request.headers.get("x-forwarded-for", "")
    if forwarded:
        return forwarded.split(",")[0].strip()
    return request.client.host if request.client else "unknown"


# slowapi per-IP rate limiter — layered on top of the legacy
# RateLimitMiddleware below. Per-route caps are applied via @limiter.limit()
# decorators on individual endpoints (admin, partner /api/v1/*, marketing
# inventory-context). /health, /healthz, /healthz/ are exempted via
# @limiter.exempt so Cloud Run liveness probes are never throttled.
#
# headers_enabled is intentionally left FALSE: slowapi's _inject_headers
# raises when a route returns a dict (the common FastAPI shape) without an
# explicit ``response: Response`` parameter in the signature. The custom
# 429 handler below adds Retry-After by hand using the exception's limit
# metadata so callers still get the standard rate-limit signal.
limiter = Limiter(
    key_func=_get_client_ip,
    default_limits=["100/minute"],
    headers_enabled=False,
    # Fail-open hardening. With slowapi's default in-process memory storage
    # (no RATELIMIT_STORAGE_URI set) these are a no-op and limiting behaves
    # exactly as before. They only matter if an operator later points slowapi
    # at an external backend (e.g. Redis): in_memory_fallback_enabled re-checks
    # against in-memory storage if that backend is unreachable, and
    # swallow_errors lets the request through rather than 500 if even that path
    # fails — a rate-limit backend outage can never take a public route down.
    swallow_errors=True,
    in_memory_fallback_enabled=True,
)
app.state.limiter = limiter


def _rate_limit_handler(request: Request, exc: RateLimitExceeded) -> JSONResponse:
    """JSON 429 handler with Retry-After. Replaces slowapi's default handler
    so we don't depend on ``headers_enabled`` (see comment above)."""
    try:
        retry_after = int(exc.limit.limit.get_expiry())
    except Exception:
        retry_after = 60
    return JSONResponse(
        {"error": f"Rate limit exceeded: {exc.detail}"},
        status_code=429,
        headers={"Retry-After": str(retry_after)},
    )


app.add_exception_handler(RateLimitExceeded, _rate_limit_handler)


# ── Per-route rate-limit values, operator-tunable via env ──────────────────
# slowapi accepts a Callable[..., str] as a limit value and evaluates it per
# request, so these caps can be tuned in Cloud Run with --update-env-vars and
# ZERO redeploy. When the env var is ABSENT the callable returns the
# conservative default below — identical to the previously-hardcoded value
# (strict no-op). A blank OR malformed override (anything slowapi's parser
# rejects, e.g. "10/sec") also falls back to the default, so a fat-fingered env
# var can never silently weaken or disable the per-route cap.
def _route_rate_limit(env_var: str, default: str):
    def _resolve() -> str:
        value = os.environ.get(env_var, "").strip()
        if not value:
            return default
        try:
            from limits import parse_many

            parse_many(value)  # validate it parses as a rate string
        except Exception:
            return default
        return value

    return _resolve


# Conservative defaults: a real human submits the contact form / books an
# appointment once. 10/min/IP leaves headroom for retries, shared NAT, and
# double-clicks while throttling an email-flood bot on the fresh Resend domain.
CONTACT_RATE_LIMIT = _route_rate_limit("CONTACT_RATE_LIMIT", "10/minute")
APPOINTMENTS_RATE_LIMIT = _route_rate_limit("APPOINTMENTS_RATE_LIMIT", "10/minute")

# Rate limiting middleware — per-IP sliding window
MAX_REQUESTS_PER_MINUTE = int(os.environ.get("RATE_LIMIT_RPM", "60"))
MAX_REQUEST_BODY_BYTES = int(
    os.environ.get("MAX_REQUEST_BODY_BYTES", str(1 * 1024 * 1024))
)  # 1 MB default


_STATIC_RATE_LIMIT_EXTENSIONS = (
    ".css",
    ".gif",
    ".ico",
    ".jpeg",
    ".jpg",
    ".js",
    ".map",
    ".png",
    ".svg",
    ".webmanifest",
    ".webp",
    ".woff",
    ".woff2",
)
_STATIC_RATE_LIMIT_PATHS = {
    "/manifest.webmanifest",
    "/registerSW.js",
    "/sw.js",
    "/tex-icon.svg",
    "/vite.svg",
}


def _is_rate_limit_exempt_path(path: str, method: str = "GET") -> bool:
    """Skip the legacy global limiter for health checks and SPA delivery."""
    if path in {"/health", "/healthz", "/healthz/"}:
        return True
    if method.upper() in {"GET", "HEAD"} and not (path == "/api" or path.startswith("/api/")):
        return True
    if path.startswith("/assets/") or path.startswith("/workbox-"):
        return True
    if path in _STATIC_RATE_LIMIT_PATHS:
        return True
    if not (path == "/api" or path.startswith("/api/")):
        return path.lower().endswith(_STATIC_RATE_LIMIT_EXTENSIONS)
    return False


class RateLimitMiddleware(BaseHTTPMiddleware):
    def __init__(self, app):
        super().__init__(app)
        self._hits: dict[str, list[float]] = defaultdict(list)

    async def dispatch(self, request: Request, call_next):
        if _is_rate_limit_exempt_path(request.url.path, request.method):
            return await call_next(request)
        client_ip = _get_client_ip(request)
        now = time.time()
        window = self._hits[client_ip]
        # Prune entries older than 60s
        self._hits[client_ip] = window = [t for t in window if now - t < 60]
        if len(window) >= MAX_REQUESTS_PER_MINUTE:
            return JSONResponse(
                {"error": "Rate limit exceeded. Please try again shortly."}, status_code=429
            )
        window.append(now)
        return await call_next(request)


class RequestSizeLimitMiddleware(BaseHTTPMiddleware):
    async def dispatch(self, request: Request, call_next):
        content_length = request.headers.get("content-length")
        if content_length and int(content_length) > MAX_REQUEST_BODY_BYTES:
            return JSONResponse({"error": "Request body too large."}, status_code=413)
        return await call_next(request)


class InputSanitizationMiddleware(BaseHTTPMiddleware):
    async def dispatch(self, request: Request, call_next):
        if (
            request.method in {"POST", "PUT", "PATCH"}
            and request.url.path.startswith("/api/")
            and not request.url.path.startswith("/api/v1/")
        ):
            content_type = request.headers.get("content-type", "")
            if content_type.startswith("application/json"):
                try:
                    body = await request.body()
                    if body:
                        data = json.loads(body)
                        sanitized = sanitize_body(data)
                        request._body = json.dumps(sanitized).encode("utf-8")
                except Exception:
                    pass  # Not valid JSON or sanitization failed — leave body untouched
        return await call_next(request)


class _MetricsStore:
    """Thread-safe rolling-window store for request latency metrics."""

    def __init__(self):
        self._global_buffer = deque(maxlen=5000)
        self._endpoint_buffers = defaultdict(lambda: deque(maxlen=1000))
        self._lock = threading.Lock()

    def record(self, endpoint_key: str, duration_ms: float, status_code: int) -> None:
        with self._lock:
            self._global_buffer.append((duration_ms, status_code))
            self._endpoint_buffers[endpoint_key].append((duration_ms, status_code))

    @staticmethod
    def _calculate_percentiles(durations: list[float]) -> dict:
        if not durations:
            return {"p50": None, "p95": None, "p99": None}
        sorted_durations = sorted(durations)
        n = len(sorted_durations)

        def _p(p: float) -> float:
            k = (n - 1) * p / 100.0
            f = int(k)
            c = min(f + 1, n - 1)
            if f == c:
                return sorted_durations[f]
            return sorted_durations[f] * (c - k) + sorted_durations[c] * (k - f)

        return {"p50": round(_p(50), 2), "p95": round(_p(95), 2), "p99": round(_p(99), 2)}

    def get_metrics(self) -> dict:
        with self._lock:
            global_data = list(self._global_buffer)
            endpoint_data = {
                key: list(buf) for key, buf in self._endpoint_buffers.items()
            }

        global_durations = [d for d, _ in global_data]
        overall = self._calculate_percentiles(global_durations)
        overall["count"] = len(global_data)

        endpoints = {}
        for key, records in endpoint_data.items():
            durations = [d for d, _ in records]
            status_codes = [s for _, s in records]
            error_count = sum(1 for s in status_codes if s >= 400)
            endpoints[key] = {
                **self._calculate_percentiles(durations),
                "count": len(records),
                "error_rate": round(error_count / len(records), 4) if records else 0.0,
            }

        return {"overall": overall, "endpoints": endpoints}


_metrics_store = _MetricsStore()


class PerformanceMetricsMiddleware(BaseHTTPMiddleware):
    """Track request latency and record structured metrics per endpoint."""

    async def dispatch(self, request: Request, call_next):
        if request.url.path in {"/health", "/healthz", "/healthz/"}:
            return await call_next(request)

        start = time.perf_counter()
        status_code = None
        try:
            response = await call_next(request)
            status_code = response.status_code
            return response
        except Exception:
            status_code = 500
            raise
        finally:
            duration_ms = (time.perf_counter() - start) * 1000
            endpoint_key = f"{request.method} {request.url.path}"
            _metrics_store.record(endpoint_key, duration_ms, status_code)
            struct_logger.info(
                "request",
                method=request.method,
                path=request.url.path,
                status_code=status_code,
                duration_ms=round(duration_ms, 2),
                timestamp=datetime.now(UTC).isoformat(),
            )


app.add_middleware(SecurityHeadersMiddleware)
app.add_middleware(RateLimitMiddleware)
app.add_middleware(RequestSizeLimitMiddleware)
app.add_middleware(InputSanitizationMiddleware)
# slowapi middleware is registered last so it sits outermost and runs
# before the legacy per-IP RateLimitMiddleware. Per-route caps via
# @limiter.limit decorators short-circuit hot paths (e.g. /api/admin/verify
# at 5/min) before they ever reach the brute-force _pin_attempts counter.
app.add_middleware(SlowAPIMiddleware)


class ImmutableStaticFiles(StaticFiles):
    """Serve Vite fingerprinted assets with long-lived immutable caching."""

    async def get_response(self, path, scope):
        response = await super().get_response(path, scope)
        if response.status_code == 200:
            response.headers["Cache-Control"] = "public, max-age=31536000, immutable"
        return response


# ─── Resilient Error Responses + Cache-Control ───
#
# All HTTPExceptions raised inside the API surface are wrapped in a uniform
# {success, status_code, message} JSON envelope so the frontend can branch on
# `success` rather than parsing FastAPI's default `{detail: ...}` shape.
# The /api/v1/* partner contract is excluded — external partners parse the
# legacy `{detail}` shape, and changing it requires a coordinated version bump.
#
# Cache-Control is applied consistently:
#   * GET /api/marketing/inventory-context (public read-side):
#       max-age=3600, public, stale-while-revalidate=60
#   * Any other /api/* path: no-cache (CRM data must never be cached)
#   * Non-/api paths: header is left untouched so the SPA / static asset
#     handlers can set their own caching policy.
#
# This is additive on top of PR #17 ("Return JSON 404 for unknown API paths"):
# the SPA catch-all now raises HTTPException(404) for unknown /api/* paths so
# they flow through this single envelope rather than emitting a bare detail.

_PUBLIC_INVENTORY_CACHE = "max-age=3600, public, stale-while-revalidate=60"
_DYNAMIC_API_CACHE = "no-cache"


def _is_public_inventory_read(method: str, path: str) -> bool:
    """True for unauthenticated public inventory views only.

    The admin inventory API exposes operational fields such as serial and label
    numbers, so it must stay private/no-cache even for read requests.
    """
    if method.upper() != "GET":
        return False
    return path in {"/api/marketing/inventory-context", "/api/marketing/inventory-context/"}


def _is_partner_api_path(path: str) -> bool:
    """The /api/v1/* surface is a versioned public contract for external
    partners; we MUST NOT change its error shape (`{detail: ...}`) without a
    coordinated version bump.
    """
    return path == "/api/v1" or path.startswith("/api/v1/")


def _apply_api_cache_headers(request: Request, response: JSONResponse) -> JSONResponse:
    """Stamp Cache-Control on JSON responses for /api/* paths."""
    path = request.url.path
    if not (path.startswith("/api/") or path == "/api"):
        return response
    if _is_public_inventory_read(request.method, path):
        response.headers["Cache-Control"] = _PUBLIC_INVENTORY_CACHE
    else:
        response.headers["Cache-Control"] = _DYNAMIC_API_CACHE
    return response


class APICacheControlMiddleware(BaseHTTPMiddleware):
    """Stamp Cache-Control on all successful /api/* responses.

    Errors (HTTPException) are handled separately by the exception handler so
    we don't double-write the header. We skip responses that already carry an
    explicit Cache-Control to respect handler-level overrides.
    """

    async def dispatch(self, request: Request, call_next):
        response = await call_next(request)
        path = request.url.path
        if not (path.startswith("/api/") or path == "/api"):
            return response
        if any(h.lower() == "cache-control" for h in response.headers.keys()):
            return response
        if _is_public_inventory_read(request.method, path):
            response.headers["Cache-Control"] = _PUBLIC_INVENTORY_CACHE
        else:
            response.headers["Cache-Control"] = _DYNAMIC_API_CACHE
        return response


app.add_middleware(APICacheControlMiddleware)


@app.exception_handler(HTTPException)
async def resilient_http_exception_handler(request: Request, exc: HTTPException) -> JSONResponse:
    """Wrap HTTPExceptions in a uniform success/status_code/message envelope.

    Scope:
      * /api/v1/* (partner contract): keep FastAPI's default `{detail: ...}`
        shape so external partners are not broken. Cache-Control still applied.
      * Everything else (frontend-facing /api/* and unknown paths): wrap in
        `{success, status_code, message}` so the SPA can branch on `success`.

    Preserves any headers FastAPI already attached (e.g. WWW-Authenticate
    from auth dependencies) and adds Cache-Control for /api/* paths.
    """
    if _is_partner_api_path(request.url.path):
        body: dict = {"detail": exc.detail}
    else:
        detail = exc.detail
        if isinstance(detail, str):
            message = detail
        elif detail is None:
            message = "Error"
        else:
            # dict / list / pydantic-ish — stringify so the wrapper stays flat.
            message = str(detail)
        body = {
            "success": False,
            "status_code": exc.status_code,
            "message": message,
        }

    response = JSONResponse(
        body,
        status_code=exc.status_code,
        headers=getattr(exc, "headers", None) or None,
    )
    return _apply_api_cache_headers(request, response)


@app.exception_handler(RequestValidationError)
async def resilient_validation_handler(
    request: Request, exc: RequestValidationError
) -> JSONResponse:
    """Funnel FastAPI body/query/path validation errors through the same
    `{success, status_code, message}` envelope as HTTPException so the SPA
    can branch on `success` and never sees FastAPI's default 422 shape.
    Partner /api/v1/* keeps the default for contract stability.
    """
    if _is_partner_api_path(request.url.path):
        body: dict = {"detail": exc.errors()}
        status_code = 422
    else:
        body = {
            "success": False,
            "status_code": 400,
            "message": "Invalid request payload.",
        }
        status_code = 400
    response = JSONResponse(body, status_code=status_code)
    return _apply_api_cache_headers(request, response)


@app.exception_handler(JSONDecodeError)
async def resilient_json_decode_handler(request: Request, exc: JSONDecodeError) -> JSONResponse:
    """A `json.JSONDecodeError` raised inside a route — typically from
    `await request.json()` on an empty/malformed body — would otherwise
    surface as a Starlette 500 with `text/plain` "Internal Server Error",
    breaking the JSON contract the SPA relies on. Wrap it.
    """
    if _is_partner_api_path(request.url.path):
        body: dict = {"detail": "Malformed JSON body."}
    else:
        body = {
            "success": False,
            "status_code": 400,
            "message": "Malformed JSON body.",
        }
    response = JSONResponse(body, status_code=400)
    return _apply_api_cache_headers(request, response)


# Add CORS — production origins from env, with sensible defaults
IS_LOCAL = os.environ.get("K_SERVICE") is None  # K_SERVICE is set by Cloud Run
_default_origins = [
    "https://tho-agent-691674245427.us-central1.run.app",
    "https://tho-agent-trgi34bxuq-uc.a.run.app",
    "https://tho-ai-agent.web.app",
    "https://tho-ai-agent.firebaseapp.com",
    "https://tho.sapphirealpha.xyz",
    "https://sapphirealpha.xyz",
    "https://www.sapphirealpha.xyz",
    "https://texashomeoutlet.com",
    "https://www.texashomeoutlet.com",
]
ALLOWED_ORIGINS = [
    o.strip()
    for o in os.environ.get("ALLOWED_ORIGINS", ",".join(_default_origins)).split(",")
    if o.strip()
]
if IS_LOCAL:
    ALLOWED_ORIGINS += ["http://localhost:8080", "http://localhost:5173"]

app.add_middleware(
    CORSMiddleware,
    allow_origins=ALLOWED_ORIGINS,
    allow_methods=["GET", "POST", "PUT"],
    allow_headers=["Content-Type", "Accept", "X-Admin-Token", "Authorization"],
)


CANONICAL_PUBLIC_URL = os.environ.get(
    "CANONICAL_PUBLIC_URL", "https://tho.sapphirealpha.xyz"
).rstrip("/")
_CANONICAL_PUBLIC_PARTS = urlsplit(CANONICAL_PUBLIC_URL)
_CANONICAL_PUBLIC_SCHEME = _CANONICAL_PUBLIC_PARTS.scheme or "https"
_CANONICAL_PUBLIC_HOST = _CANONICAL_PUBLIC_PARTS.netloc or "tho.sapphirealpha.xyz"


def _should_redirect_to_canonical_host(request: Request) -> bool:
    """Keep operator/client navigation on the production vanity domain.

    Cloud Run's default *.run.app URL remains useful for probes and low-level
    diagnostics, but customer-facing pages must settle on tho.sapphirealpha.xyz
    so admin cookies, passkeys, and support instructions all share one origin.
    """
    if request.method.upper() not in {"GET", "HEAD"}:
        return False

    host = request.headers.get("host", "")
    host_name = host.split(":", 1)[0].lower().rstrip(".")
    if not host_name.endswith(".run.app"):
        return False

    path = request.url.path
    if path == "/llms.txt":
        return False
    if path.startswith("/health") or path == "/api" or path.startswith("/api/"):
        return False
    if path.startswith("/assets/") or path.startswith("/workbox-"):
        return False
    if path in _STATIC_RATE_LIMIT_PATHS:
        return False
    return True


class CanonicalHostMiddleware(BaseHTTPMiddleware):
    async def dispatch(self, request: Request, call_next):
        if _should_redirect_to_canonical_host(request):
            target = urlunsplit(
                (
                    _CANONICAL_PUBLIC_SCHEME,
                    _CANONICAL_PUBLIC_HOST,
                    request.url.path,
                    request.url.query,
                    "",
                )
            )
            return RedirectResponse(target, status_code=308)
        return await call_next(request)


app.add_middleware(CanonicalHostMiddleware)
app.add_middleware(PerformanceMetricsMiddleware)
# Outermost layer: gzip the fully-formed response. minimum_size skips tiny
# payloads (redirects, JSON acks) where compression overhead isn't worth it;
# the server-rendered SEO HTML + JSON-LD shrink ~70%, a real TTFB/bandwidth win.
app.add_middleware(GZipMiddleware, minimum_size=500)


# ─── Admin Auth Setup ───

# Admin PIN hash — MUST be set via ADMIN_PIN_HASH env var in production.
# Generate hash: python -c "import hashlib; print(hashlib.sha256(b'YOUR_PIN').hexdigest())"
# Cloud Run and local app startup fail closed; tests and operators must set
# ADMIN_PIN_HASH explicitly.
_CONFIGURED_ADMIN_PIN_HASH = os.environ.get("ADMIN_PIN_HASH")
if os.environ.get("K_SERVICE"):
    ADMIN_PIN_HASH = _CONFIGURED_ADMIN_PIN_HASH or ""
    if not ADMIN_PIN_HASH:
        raise RuntimeError("ADMIN_PIN_HASH is mandatory in Cloud Run")
else:
    ADMIN_PIN_HASH = _CONFIGURED_ADMIN_PIN_HASH or ""
    if not ADMIN_PIN_HASH:
        raise RuntimeError("Set ADMIN_PIN_HASH env var to run locally")

# Warn loudly if email service is not configured (appointments/leads won't get confirmations)
if not os.environ.get("RESEND_API_KEY") and not IS_LOCAL:
    logger.critical(
        "RESEND_API_KEY not set — appointment confirmations and lead emails will NOT be sent."
    )
elif not os.environ.get("RESEND_API_KEY"):
    logger.warning("RESEND_API_KEY not set — emails will run in dry-run mode (local dev).")

# JWT-based admin tokens — works across multiple Cloud Run instances.
# Uses HMAC-SHA256 with a shared secret derived from the PIN hash.
import base64
import hmac
import struct

# Derive a stable session secret from the PIN hash if not explicitly provided.
# This prevents random secret rotation on every Cloud Run cold start, which
# causes session invalidation and 'double PIN gates'.
if not os.environ.get("ADMIN_SESSION_SECRET") and ADMIN_PIN_HASH:
    # Use a different salt from the JWT secret below
    _derived_secret = hashlib.sha256(f"tho-session-v2-{ADMIN_PIN_HASH}".encode()).hexdigest()
    os.environ["ADMIN_SESSION_SECRET"] = _derived_secret
    logger.info("ADMIN_SESSION_SECRET derived from PIN hash for stability")

ADMIN_TOKEN_TTL = int(os.environ.get("ADMIN_TOKEN_TTL", str(24 * 60 * 60)))  # 24 hours
_JWT_SECRET = hashlib.sha256(f"sapphire-jwt-{ADMIN_PIN_HASH[:16]}".encode()).digest()


def _create_admin_token() -> str:
    """Create an HMAC-signed JWT-like token with embedded expiration."""
    if not ADMIN_PIN_HASH:
        raise RuntimeError("Admin auth not configured")
    expires = int(time.time()) + ADMIN_TOKEN_TTL
    payload = struct.pack(">Q", expires)  # 8 bytes, big-endian uint64
    sig = hmac.new(_JWT_SECRET, payload, hashlib.sha256).digest()[:16]  # 16-byte signature
    return base64.urlsafe_b64encode(payload + sig).decode().rstrip("=")


_passkey_session_manager: SessionManager | None = None


def _get_passkey_session_manager() -> SessionManager:
    global _passkey_session_manager
    if _passkey_session_manager is None:
        _passkey_session_manager = SessionManager()
    return _passkey_session_manager


def _verify_admin_token(token: str) -> bool:
    """Verify an HMAC-signed admin token. Stateless — works across instances."""
    if not ADMIN_PIN_HASH:
        return False
    try:
        # Pad base64 if needed
        padding = 4 - len(token) % 4
        if padding != 4:
            token += "=" * padding
        raw = base64.urlsafe_b64decode(token)
        if len(raw) != 24:  # 8 bytes payload + 16 bytes signature
            return False
        payload, sig = raw[:8], raw[8:]
        expected_sig = hmac.new(_JWT_SECRET, payload, hashlib.sha256).digest()[:16]
        if not hmac.compare_digest(sig, expected_sig):
            return False
        expires = struct.unpack(">Q", payload)[0]
        return time.time() < expires
    except Exception:
        return False


def _verify_passkey_cookie(request: Request) -> bool:
    """Check the passkey session cookie."""
    token = request.cookies.get(PASSKEY_COOKIE_NAME, "")
    if not token:
        return False
    mgr = _get_passkey_session_manager()
    payload = mgr.verify_session(token)
    return payload is not None and payload.get("user_id") == "admin"


def _admin_token_from_request(request: Request) -> str:
    """Read an admin token from cookie or supported auth headers."""
    # Prefer httpOnly cookie (post-hardening)
    token = request.cookies.get("tho_admin_token", "").strip()
    if token:
        return token

    # Fallback to headers for backward compatibility
    token = request.headers.get("X-Admin-Token", "").strip()
    if token:
        return token

    authorization = request.headers.get("Authorization", "").strip()
    scheme, _, value = authorization.partition(" ")
    if scheme.lower() == "bearer" and value:
        return value.strip()
    return ""


def _create_csrf_token() -> str:
    """Generate a random CSRF token for double-submit cookie pattern."""
    return secrets.token_hex(32)


def _verify_csrf(request: Request) -> bool:
    """Verify CSRF token for state-changing admin requests.