diff --git a/configs/risk_policy.yaml b/configs/risk_policy.yaml new file mode 100644 index 0000000..38a0379 --- /dev/null +++ b/configs/risk_policy.yaml @@ -0,0 +1,38 @@ +# Context-Aware Adaptive Risk Scoring Policy +# ============================================ +# Weights control how much each contextual signal contributes to the +# final risk score. They must sum to 1.0 for correct normalisation. +# +# To customise scoring behaviour, adjust the weight values below. +# The scorer will raise an error on startup if weights do not sum to 1.0. + +risk_scoring: + restricted_zone: + weight: 0.30 + description: "Subject is inside or entered a restricted zone" + + repeated_approach: + weight: 0.25 + description: "Subject has approached the same zone multiple times" + + loitering: + weight: 0.15 + description: "Subject has been dwelling/lingering beyond threshold" + + after_hours: + weight: 0.20 + description: "Activity detected outside normal operating hours" + + reasoning_confidence: + weight: 0.10 + description: "LLM reasoning confidence as a risk amplifier" + +# Thresholds for risk level classification (applied to 0-100 scale) +risk_levels: + low_max: 40 + medium_max: 70 + +# Normalization parameters for continuous signals +normalization: + loitering_max_seconds: 120.0 + repeated_approach_max_count: 5 diff --git a/services/reasoning/pipeline.py b/services/reasoning/pipeline.py index db7f6fb..0b0970b 100644 --- a/services/reasoning/pipeline.py +++ b/services/reasoning/pipeline.py @@ -17,6 +17,7 @@ import json import asyncio +import datetime import logging import time import uuid @@ -27,9 +28,10 @@ from libs.schemas.memory import ActionHint, TrackSequence from libs.schemas.reasoning import ReasoningResult, GroundingResult from services.memory.ring_buffer import MemoryStore -from services.reasoning.dedup import AlertDeduplicator -from services.reasoning.vlm import BaseCaptioner, get_captioner -from services.reasoning.llm import BaseLLMReasoner, get_reasoner +from services.reasoning.dedup import AlertDeduplicator +from services.reasoning.vlm import BaseCaptioner, get_captioner +from services.reasoning.llm import BaseLLMReasoner, get_reasoner +from services.reasoning.risk_scoring import AdaptiveRiskScorer logger = logging.getLogger(__name__) @@ -39,13 +41,7 @@ # Global asyncio queue consumed by FastAPI SSE endpoint alert_queue: asyncio.Queue = asyncio.Queue(maxsize=200) -# Severity weights -_W = dict( - confidence = 0.50, - long_dwell = 0.20, - repeated_approach = 0.20, - high_tier_bonus = 0.10, -) + class ReasoningPipeline: @@ -63,6 +59,7 @@ def __init__( reasoner: Optional[BaseLLMReasoner] = None, store: Optional[MemoryStore] = None, deduplicator: Optional[AlertDeduplicator] = None, + risk_scorer: Optional[AdaptiveRiskScorer] = None, ) -> None: self._captioner = captioner or get_captioner() self._reasoner = reasoner or get_reasoner() @@ -70,6 +67,7 @@ def __init__( self._deduplicator = deduplicator or AlertDeduplicator( redis_client=self._store._r ) + self._risk_scorer = risk_scorer or AdaptiveRiskScorer() # ── Public ─────────────────────────────────────────────────────────────── @@ -215,21 +213,46 @@ def _ground( ) return GroundingResult(grounded=True, checked_caption=caption) - # ── Severity scoring ────────────────────────────────────────────────────── + # ── Severity scoring (powered by AdaptiveRiskScorer) ───────────────────── def _attach_severity( self, result: ReasoningResult, seq: TrackSequence, ) -> ReasoningResult: - score = result.confidence * _W["confidence"] - if seq.total_dwell > 30: - score += _W["long_dwell"] - if "repeated_approach" in seq.action_summary: - score += _W["repeated_approach"] - if result.confidence_tier == "high": - score += _W["high_tier_bonus"] - result.severity_score = round(min(score, 1.0), 3) + """Compute severity using the context-aware adaptive risk scorer. + + Builds contextual signals from the reasoning result and track + sequence, delegates to :class:`AdaptiveRiskScorer`, and maps + the 0–100 risk score back to the 0.0–1.0 ``severity_score`` field. + """ + # Determine whether any visited zone is restricted + restricted_keywords = ("restricted", "danger") + in_restricted = any( + any(kw in z.lower() for kw in restricted_keywords) + for z in seq.zones_visited + ) + + # Count repeated approaches from structured event data + approach_count = sum( + 1 for event in seq.events + if event.action_hint == ActionHint.REPEATED_APPROACH + ) + + # Determine after-hours status from current wall-clock hour + current_hour = datetime.datetime.now().hour + is_after_hours = current_hour >= 20 or current_hour < 6 + + signals = { + "restricted_zone": in_restricted, + "repeated_approach": approach_count, + "loitering": seq.total_dwell, + "after_hours": is_after_hours, + "reasoning_confidence": result.confidence, + } + + risk_result = self._risk_scorer.score(signals) + result.severity_score = round(risk_result["risk_score"] / 100.0, 3) return result # ── Storage ─────────────────────────────────────────────────────────────── diff --git a/services/reasoning/risk_scoring.py b/services/reasoning/risk_scoring.py new file mode 100644 index 0000000..46d354f --- /dev/null +++ b/services/reasoning/risk_scoring.py @@ -0,0 +1,229 @@ +""" +Adaptive Risk Scoring Engine for Eagle Surveillance. + +Loads configurable weights from a YAML policy file and computes a normalised +0–100 risk score from multiple contextual signals. Designed as a drop-in +replacement for the hardcoded severity weights previously defined in +``services.reasoning.pipeline._W``. + +Usage +----- + from services.reasoning.risk_scoring import AdaptiveRiskScorer + + scorer = AdaptiveRiskScorer("configs/risk_policy.yaml") + result = scorer.score({ + "restricted_zone": True, + "repeated_approach": 3, + "loitering": 45.0, + "after_hours": False, + "reasoning_confidence": 0.85, + }) + # result == {"risk_score": 62, "risk_level": "Medium", "risk_factors": [...]} +""" +from __future__ import annotations + +import logging +from pathlib import Path +from typing import Any, TypedDict + +import yaml + +logger = logging.getLogger(__name__) + +# Default policy path relative to the project root +_DEFAULT_POLICY_PATH = Path(__file__).resolve().parents[2] / "configs" / "risk_policy.yaml" + +# Human-readable descriptions for each signal +_FACTOR_LABELS: dict[str, str] = { + "restricted_zone": "Restricted zone detected", + "repeated_approach": "Repeated approach behavior", + "loitering": "Loitering / extended dwell time", + "after_hours": "After-hours activity", + "reasoning_confidence": "High reasoning confidence", +} + + +class RiskScoringResult(TypedDict): + """Typed result returned by :meth:`AdaptiveRiskScorer.score`.""" + + risk_score: int + risk_level: str + risk_factors: list[str] + + +class AdaptiveRiskScorer: + """Context-aware risk scoring engine. + + Loads configurable weights from a YAML policy file and computes a + normalised 0–100 risk score from multiple contextual signals. + + Parameters + ---------- + policy_path: + Path to the YAML policy file. If ``None``, uses the default + ``configs/risk_policy.yaml`` relative to the project root. + + Raises + ------ + FileNotFoundError + If the policy YAML file does not exist. + ValueError + If the YAML file is malformed or has an invalid structure. + """ + + def __init__(self, policy_path: str | Path | None = None) -> None: + resolved = Path(policy_path) if policy_path else _DEFAULT_POLICY_PATH + self._policy = self._load_policy(resolved) + self._weights: dict[str, float] = { + key: cfg["weight"] + for key, cfg in self._policy["risk_scoring"].items() + } + self._risk_levels: dict[str, int] = self._policy["risk_levels"] + self._normalization: dict[str, float] = self._policy["normalization"] + + # ── Public API ──────────────────────────────────────────────────────── + + def score(self, signals: dict[str, Any]) -> RiskScoringResult: + """Compute a context-aware risk score from raw contextual signals. + + Parameters + ---------- + signals: + Dictionary of raw signal values. Expected keys: + + - ``restricted_zone`` – ``bool`` + - ``repeated_approach`` – ``int`` (entry count) + - ``loitering`` – ``float`` (dwell seconds) + - ``after_hours`` – ``bool`` + - ``reasoning_confidence`` – ``float`` in [0, 1] + + Missing keys are treated as zero / inactive. + + Returns + ------- + RiskScoringResult + A dict with ``risk_score`` (0–100), ``risk_level`` + (``"Low"`` / ``"Medium"`` / ``"High"``), and + ``risk_factors`` (list of human-readable strings). + """ + normalized = self._normalize_signals(signals) + raw_score = self._compute_weighted_score(normalized) + score_100 = round(raw_score * 100) + score_100 = max(0, min(score_100, 100)) + + return RiskScoringResult( + risk_score=score_100, + risk_level=self._classify_risk_level(score_100), + risk_factors=self._identify_contributing_factors(normalized), + ) + + # ── Internal helpers ────────────────────────────────────────────────── + + def _normalize_signals(self, signals: dict[str, Any]) -> dict[str, float]: + """Normalise each raw signal to a value in [0.0, 1.0]. + + Boolean signals map to 1.0 / 0.0. Continuous signals are clamped + against the normalization parameters defined in the policy YAML. + """ + loitering_max = self._normalization["loitering_max_seconds"] + approach_max = self._normalization["repeated_approach_max_count"] + + restricted = signals.get("restricted_zone", False) + approach_count = signals.get("repeated_approach", 0) + dwell = signals.get("loitering", 0.0) + after_hours = signals.get("after_hours", False) + confidence = signals.get("reasoning_confidence", 0.0) + + return { + "restricted_zone": 1.0 if restricted else 0.0, + "repeated_approach": max(0.0, min(approach_count / approach_max, 1.0)) if approach_max > 0 else 0.0, + "loitering": max(0.0, min(dwell / loitering_max, 1.0)) if loitering_max > 0 else 0.0, + "after_hours": 1.0 if after_hours else 0.0, + "reasoning_confidence": max(0.0, min(float(confidence), 1.0)), + } + + def _compute_weighted_score(self, normalized: dict[str, float]) -> float: + """Apply YAML weights to normalised signals and return a 0–1 float.""" + total = 0.0 + for key, weight in self._weights.items(): + total += weight * normalized.get(key, 0.0) + return min(total, 1.0) + + def _classify_risk_level(self, score_100: int) -> str: + """Classify a 0–100 score into Low / Medium / High.""" + if score_100 <= self._risk_levels["low_max"]: + return "Low" + if score_100 <= self._risk_levels["medium_max"]: + return "Medium" + return "High" + + def _identify_contributing_factors( + self, normalized: dict[str, float] + ) -> list[str]: + """Return human-readable labels for signals that contributed.""" + # Confidence threshold for reporting — only flag genuinely high values + _CONFIDENCE_HIGH_THRESHOLD = 0.7 + + factors: list[str] = [] + for key, value in normalized.items(): + if value > 0.0 and key in _FACTOR_LABELS: + # "High reasoning confidence" should only appear when + # confidence is genuinely high, not for any nonzero value. + if key == "reasoning_confidence" and value < _CONFIDENCE_HIGH_THRESHOLD: + continue + factors.append(_FACTOR_LABELS[key]) + return factors + + # ── YAML loading ────────────────────────────────────────────────────── + + @staticmethod + def _load_policy(path: Path) -> dict[str, Any]: + """Load and validate the YAML policy file. + + Raises + ------ + FileNotFoundError + If *path* does not exist. + ValueError + If the YAML is empty, unparseable, missing required sections, + or contains invalid weight / threshold values. + """ + if not path.exists(): + raise FileNotFoundError(f"Risk policy file not found: {path}") + + try: + with open(path, "r", encoding="utf-8") as fh: + data = yaml.safe_load(fh) + except yaml.YAMLError as exc: + raise ValueError(f"Invalid YAML in risk policy: {exc}") from exc + + if data is None: + raise ValueError(f"Risk policy file is empty: {path}") + + for section in ("risk_scoring", "risk_levels", "normalization"): + if section not in data: + raise ValueError( + f"Risk policy missing required section: '{section}'" + ) + + # Lightweight weight validation + for signal, cfg in data["risk_scoring"].items(): + w = cfg.get("weight", 0) + if not isinstance(w, (int, float)) or w < 0: + raise ValueError( + f"Weight for '{signal}' must be a non-negative number, got {w!r}" + ) + if w > 1.0: + raise ValueError( + f"Weight for '{signal}' must not exceed 1.0, got {w}" + ) + + # Risk-level threshold ordering + levels = data["risk_levels"] + if levels["low_max"] >= levels["medium_max"]: + raise ValueError( + f"risk_levels.low_max ({levels['low_max']}) must be less than " + f"medium_max ({levels['medium_max']})" + ) + + return data diff --git a/tests/test_risk_scoring.py b/tests/test_risk_scoring.py new file mode 100644 index 0000000..e8f6a02 --- /dev/null +++ b/tests/test_risk_scoring.py @@ -0,0 +1,259 @@ +""" +Unit tests for the Adaptive Risk Scoring Engine. + +All tests are deterministic — no network, no Redis, no randomness. +Tests use a temporary YAML policy file to isolate from the real config. +""" +from __future__ import annotations + +import os +import sys + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from services.reasoning.risk_scoring import AdaptiveRiskScorer + + +# ── Helpers ─────────────────────────────────────────────────────────────────── + +_VALID_POLICY = """\ +risk_scoring: + restricted_zone: + weight: 0.30 + description: "Restricted zone" + repeated_approach: + weight: 0.25 + description: "Repeated approach" + loitering: + weight: 0.15 + description: "Loitering" + after_hours: + weight: 0.20 + description: "After hours" + reasoning_confidence: + weight: 0.10 + description: "Confidence" + +risk_levels: + low_max: 40 + medium_max: 70 + +normalization: + loitering_max_seconds: 120.0 + repeated_approach_max_count: 5 +""" + + +@pytest.fixture +def policy_file(tmp_path): + """Write a valid policy YAML to a temp directory and return its path.""" + path = tmp_path / "risk_policy.yaml" + path.write_text(_VALID_POLICY, encoding="utf-8") + return path + + +@pytest.fixture +def scorer(policy_file): + """Return an AdaptiveRiskScorer loaded from the temp policy file.""" + return AdaptiveRiskScorer(policy_path=policy_file) + + +# ── 1. YAML loading ────────────────────────────────────────────────────────── + +def test_yaml_loading(policy_file): + """Scorer loads a valid YAML policy without errors.""" + scorer = AdaptiveRiskScorer(policy_path=policy_file) + # Weights should be accessible and sum to 1.0 + total_weight = sum(scorer._weights.values()) + assert abs(total_weight - 1.0) < 1e-9 + + +def test_yaml_missing_raises(tmp_path): + """Missing YAML file raises FileNotFoundError.""" + missing = tmp_path / "nonexistent.yaml" + with pytest.raises(FileNotFoundError): + AdaptiveRiskScorer(policy_path=missing) + + +# ── 2. Normalization ───────────────────────────────────────────────────────── + +def test_normalization_logic(scorer): + """Verify normalization for each signal type.""" + normalized = scorer._normalize_signals({ + "restricted_zone": True, + "repeated_approach": 3, + "loitering": 60.0, + "after_hours": False, + "reasoning_confidence": 0.85, + }) + assert normalized["restricted_zone"] == 1.0 + assert normalized["repeated_approach"] == pytest.approx(0.6) # 3/5 + assert normalized["loitering"] == pytest.approx(0.5) # 60/120 + assert normalized["after_hours"] == 0.0 + assert normalized["reasoning_confidence"] == pytest.approx(0.85) + + # Clamping: loitering > max should cap at 1.0 + clamped = scorer._normalize_signals({"loitering": 999.0}) + assert clamped["loitering"] == 1.0 + + +# ── 3. Weighted scoring ────────────────────────────────────────────────────── + +def test_weighted_score_all_zeros(scorer): + """All signals inactive → risk_score == 0.""" + result = scorer.score({ + "restricted_zone": False, + "repeated_approach": 0, + "loitering": 0.0, + "after_hours": False, + "reasoning_confidence": 0.0, + }) + assert result["risk_score"] == 0 + + +def test_weighted_score_restricted_zone_only(scorer): + """Only restricted_zone active → risk_score == 30 (weight 0.30 × 100).""" + result = scorer.score({ + "restricted_zone": True, + "repeated_approach": 0, + "loitering": 0.0, + "after_hours": False, + "reasoning_confidence": 0.0, + }) + assert result["risk_score"] == 30 + + +# ── 4. Risk level classification ───────────────────────────────────────────── + +def test_risk_level_low(scorer): + """Score ≤ 40 → 'Low'.""" + result = scorer.score({ + "restricted_zone": True, # 0.30 → score 30 + "repeated_approach": 0, + "loitering": 0.0, + "after_hours": False, + "reasoning_confidence": 0.0, + }) + assert result["risk_level"] == "Low" + + +def test_risk_level_high(scorer): + """Score > 70 → 'High'.""" + result = scorer.score({ + "restricted_zone": True, # 0.30 + "repeated_approach": 5, # 0.25 (5/5 = 1.0) + "loitering": 120.0, # 0.15 (120/120 = 1.0) + "after_hours": True, # 0.20 + "reasoning_confidence": 1.0, # 0.10 + }) + # All max → 100 + assert result["risk_score"] == 100 + assert result["risk_level"] == "High" + + +# ── 5. Output structure ────────────────────────────────────────────────────── + +def test_output_structure(scorer): + """Result dict has the expected keys with correct types.""" + result = scorer.score({ + "restricted_zone": True, + "repeated_approach": 2, + "loitering": 30.0, + "after_hours": True, + "reasoning_confidence": 0.7, + }) + assert isinstance(result["risk_score"], int) + assert 0 <= result["risk_score"] <= 100 + assert result["risk_level"] in ("Low", "Medium", "High") + assert isinstance(result["risk_factors"], list) + assert all(isinstance(f, str) for f in result["risk_factors"]) + # Non-zero signals should produce factors + assert len(result["risk_factors"]) > 0 + # Confidence at threshold (0.7) should appear as a factor + assert "High reasoning confidence" in result["risk_factors"] + + +# ── 6. Confidence factor threshold ─────────────────────────────────────────── + +def test_confidence_factor_below_threshold(scorer): + """Low confidence (< 0.7) should NOT produce 'High reasoning confidence' factor.""" + result = scorer.score({ + "restricted_zone": False, + "repeated_approach": 0, + "loitering": 0.0, + "after_hours": False, + "reasoning_confidence": 0.5, + }) + assert "High reasoning confidence" not in result["risk_factors"] + # Score should still reflect the confidence weight contribution + assert result["risk_score"] == round(0.10 * 0.5 * 100) + + +# ── 7. Normalization clamping ───────────────────────────────────────────────── + +def test_negative_signal_clamped(scorer): + """Negative raw values are clamped to 0.0 during normalization.""" + normalized = scorer._normalize_signals({ + "loitering": -10.0, + "repeated_approach": -3, + "reasoning_confidence": -0.5, + }) + assert normalized["loitering"] == 0.0 + assert normalized["repeated_approach"] == 0.0 + assert normalized["reasoning_confidence"] == 0.0 + + +# ── 8. Policy validation ───────────────────────────────────────────────────── + +def test_negative_weight_raises(tmp_path): + """Negative weight in policy raises ValueError.""" + bad_policy = tmp_path / "bad.yaml" + bad_policy.write_text("""\ +risk_scoring: + restricted_zone: + weight: -0.30 + repeated_approach: + weight: 0.25 + loitering: + weight: 0.15 + after_hours: + weight: 0.20 + reasoning_confidence: + weight: 0.10 +risk_levels: + low_max: 40 + medium_max: 70 +normalization: + loitering_max_seconds: 120.0 + repeated_approach_max_count: 5 +""", encoding="utf-8") + with pytest.raises(ValueError, match="non-negative"): + AdaptiveRiskScorer(policy_path=bad_policy) + + +def test_inverted_risk_levels_raises(tmp_path): + """low_max >= medium_max in policy raises ValueError.""" + bad_policy = tmp_path / "bad.yaml" + bad_policy.write_text("""\ +risk_scoring: + restricted_zone: + weight: 0.30 + repeated_approach: + weight: 0.25 + loitering: + weight: 0.15 + after_hours: + weight: 0.20 + reasoning_confidence: + weight: 0.10 +risk_levels: + low_max: 70 + medium_max: 40 +normalization: + loitering_max_seconds: 120.0 + repeated_approach_max_count: 5 +""", encoding="utf-8") + with pytest.raises(ValueError, match="less than"): + AdaptiveRiskScorer(policy_path=bad_policy)