Einsia · heming-gmh · Apr 26, 2026 · Apr 26, 2026
diff --git a/src/openchronicle/capture/app_parsers/__init__.py b/src/openchronicle/capture/app_parsers/__init__.py
@@ -0,0 +1,56 @@
+"""S1 app parser registry.
+
+Parsers are registered at import time and run in priority order during
+:func:`apply_parsers`.  Later parsers can match on fields produced by
+earlier parsers (e.g. a Linear parser matching ``fields.url`` that
+contains ``linear.app``, which was extracted by the browser parser).
+"""
+
+from __future__ import annotations
+
+from ...logger import get
+from .base import AppParser, ParseContext, S1Fields
+from .browser import BrowserParser
+
+logger = get("openchronicle.capture.s1_registry")
+
+_parsers: list[AppParser] = []
+
+
+def _register_builtins() -> None:
+    register(BrowserParser())
+
+
+def register(parser: AppParser) -> None:
+    _parsers.append(parser)
+    _parsers.sort(key=lambda p: p.priority)
+
+
+def _reset_registry() -> None:
+    """Clear all registered parsers and re-register builtins.
+
+    Intended for test isolation so registry mutations in one test
+    do not leak into another.
+    """
+    _parsers.clear()
+    _register_builtins()
+
+
+def apply_parsers(ctx: ParseContext, fields: S1Fields) -> None:
+    for parser in _parsers:
+        try:
+            if parser.matches(ctx, fields):
+                patch = parser.parse(ctx, fields)
+                if patch.focused_element is not None:
+                    fields.focused_element = patch.focused_element
+                if patch.visible_text is not None:
+                    fields.visible_text = patch.visible_text
+                if patch.url is not None:
+                    fields.url = patch.url
+                if patch.app_context:
+                    fields.app_context = {**fields.app_context, **patch.app_context}
+        except Exception:
+            logger.exception("S1 parser %r failed", parser.name)
+
+
+_register_builtins()
diff --git a/src/openchronicle/capture/app_parsers/base.py b/src/openchronicle/capture/app_parsers/base.py
@@ -0,0 +1,94 @@
+"""Base types for the S1 app parser registry.
+
+Every app-specific parser implements the :class:`AppParser` protocol.
+The :class:`ParseContext` gives parsers read-only access to the raw
+capture data; :class:`S1Fields` holds the current state; and
+:class:`S1Patch` lets a parser selectively override fields.
+"""
+
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass, field
+from typing import Any, Iterable, Protocol
+
+
+@dataclass
+class FocusedElement:
+    role: str = ""
+    title: str = ""
+    value: str = ""
+    is_editable: bool = False
+    has_value: bool = False
+    value_length: int = 0
+
+    def to_dict(self) -> dict[str, Any]:
+        d = asdict(self)
+        stripped = (self.value or "").strip()
+        d["has_value"] = bool(stripped)
+        d["value_length"] = len(stripped)
+        return d
+
+
+@dataclass
+class ParseContext:
+    """Read-only view of the raw capture data for a parser."""
+
+    capture: dict[str, Any]
+    app: dict[str, Any]
+    window_meta: dict[str, Any]
+
+    @property
+    def bundle_id(self) -> str:
+        return (self.app.get("bundle_id") or "").strip()
+
+    @property
+    def app_name(self) -> str:
+        return (self.app.get("name") or "").strip()
+
+    def iter_windows(self) -> Iterable[dict[str, Any]]:
+        return iter(self.app.get("windows", []))
+
+    def focused_window(self) -> dict[str, Any] | None:
+        for w in self.app.get("windows", []):
+            if w.get("focused"):
+                return w
+        return None
+
+    def iter_elements(self) -> Iterable[dict[str, Any]]:
+        """Iterate top-level elements across all windows."""
+        for window in self.app.get("windows", []):
+            yield from window.get("elements", [])
+
+
+@dataclass
+class S1Fields:
+    focused_element: FocusedElement
+    visible_text: str
+    url: str | None = None
+    app_context: dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class S1Patch:
+    focused_element: FocusedElement | None = None
+    visible_text: str | None = None
+    url: str | None = None
+    app_context: dict[str, Any] = field(default_factory=dict)
+
+
+class AppParser(Protocol):
+    """Protocol for app-specific S1 field parsers.
+
+    .. warning::
+
+        ``matches()`` and ``parse()`` **must not** call ``register()``.
+        Doing so mutates the parser list while ``apply_parsers()`` is
+        iterating and will raise a ``RuntimeError``.
+    """
+
+    name: str
+    priority: int
+
+    def matches(self, ctx: ParseContext, fields: S1Fields) -> bool: ...
+
+    def parse(self, ctx: ParseContext, fields: S1Fields) -> S1Patch: ...
diff --git a/src/openchronicle/capture/app_parsers/browser.py b/src/openchronicle/capture/app_parsers/browser.py
@@ -0,0 +1,52 @@
+"""Browser URL extraction parser.
+
+Migrated from ``s1_parser._extract_url``.  Matches known browser
+bundle IDs and extracts the URL from the first ``AXTextField`` whose
+value looks like a URL or bare domain.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Any
+
+from .base import ParseContext, S1Fields, S1Patch
+
+_BROWSER_BUNDLES = {
+    "com.google.Chrome",
+    "com.apple.Safari",
+    "org.mozilla.firefox",
+    "com.microsoft.edgemac",
+    "company.thebrowser.Browser",
+    "com.brave.Browser",
+    "com.operasoftware.Opera",
+}
+
+_URL_RE = re.compile(r"https?://\S+")
+
+
+class BrowserParser:
+    name = "browser"
+    priority = 10
+
+    def matches(self, ctx: ParseContext, fields: S1Fields) -> bool:
+        return ctx.bundle_id in _BROWSER_BUNDLES
+
+    def parse(self, ctx: ParseContext, fields: S1Fields) -> S1Patch:
+        url = _extract_url_from_app(ctx.app)
+        return S1Patch(url=url)
+
+
+def _extract_url_from_app(app_data: dict[str, Any]) -> str | None:
+    for window in app_data.get("windows", []):
+        for el in window.get("elements", []):
+            if el.get("role") != "AXTextField":
+                continue
+            value = (el.get("value") or "").strip()
+            if not value:
+                continue
+            if _URL_RE.search(value):
+                return value
+            if "." in value and " " not in value:
+                return f"https://{value}"
+    return None
diff --git a/src/openchronicle/capture/s1_parser.py b/src/openchronicle/capture/s1_parser.py
@@ -9,27 +9,28 @@
 ``_extract_focused_element`` / ``_render_visible_text`` / ``_extract_url``).
 Runs inline inside ``capture_once`` so every capture-buffer JSON carries
 these fields.
+
+Architecture
+------------
+
+``enrich()`` computes a **generic baseline** (focused element + visible text
++ url=None) and then runs registered app parsers in priority order.  Each
+parser may selectively override fields via an ``S1Patch``.  This lets future
+parsers compose — for example a Linear parser can match ``linear.app`` in
+the URL that the browser parser already extracted.
 """
 
 from __future__ import annotations
 
-import re
-from dataclasses import asdict, dataclass
 from typing import Any
 
+# Import triggers builtin parser registration.
+from .app_parsers import apply_parsers
+from .app_parsers.base import FocusedElement, ParseContext, S1Fields
 from .ax_models import ax_app_to_markdown
 
-_BROWSER_BUNDLES = {
-    "com.google.Chrome",
-    "com.apple.Safari",
-    "org.mozilla.firefox",
-    "com.microsoft.edgemac",
-    "company.thebrowser.Browser",
-    "com.brave.Browser",
-    "com.operasoftware.Opera",
-}
-
-_URL_RE = re.compile(r"https?://\S+")
+# Re-export for tests and downstream code that imports from here.
+__all__ = ["FocusedElement", "enrich"]
 
 _EDITABLE_ROLES = {"AXTextField", "AXTextArea", "AXComboBox"}
 _STATIC_ROLES = {"AXStaticText", "AXWebArea"}
@@ -39,23 +40,6 @@
 _FOCUS_VALUE_MAX = 2_000
 
 
-@dataclass
-class FocusedElement:
-    role: str = ""
-    title: str = ""
-    value: str = ""
-    is_editable: bool = False
-    has_value: bool = False
-    value_length: int = 0
-
-    def to_dict(self) -> dict[str, Any]:
-        d = asdict(self)
-        stripped = (self.value or "").strip()
-        d["has_value"] = bool(stripped)
-        d["value_length"] = len(stripped)
-        return d
-
-
 def enrich(capture: dict[str, Any]) -> None:
     """Mutate ``capture`` in place: add ``focused_element`` / ``visible_text`` / ``url``.
 
@@ -72,9 +56,27 @@ def enrich(capture: dict[str, Any]) -> None:
         capture["url"] = None
         return
 
-    capture["focused_element"] = _extract_focused_element(app_data).to_dict()
-    capture["visible_text"] = _render_visible_text(app_data)
-    capture["url"] = _extract_url(app_data)
+    # ── Generic baseline ──────────────────────────────────────────────
+    fields = S1Fields(
+        focused_element=_extract_focused_element(app_data),
+        visible_text=_render_visible_text(app_data),
+        url=None,
+    )
+
+    # ── App-parser patches ────────────────────────────────────────────
+    ctx = ParseContext(
+        capture=capture,
+        app=app_data,
+        window_meta=capture.get("window_meta") or {},
+    )
+    apply_parsers(ctx, fields)
+
+    # ── Write back ────────────────────────────────────────────────────
+    capture["focused_element"] = fields.focused_element.to_dict()
+    capture["visible_text"] = fields.visible_text
+    capture["url"] = fields.url
+    if fields.app_context:
+        capture["app_context"] = fields.app_context
 
 
 def _frontmost_app(ax_tree: dict[str, Any]) -> dict[str, Any] | None:
@@ -113,21 +115,3 @@ def _render_visible_text(app_data: dict[str, Any]) -> str:
     if len(md) > _VISIBLE_TEXT_MAX:
         md = md[:_VISIBLE_TEXT_MAX] + "\n...(truncated)"
     return md
-
-
-def _extract_url(app_data: dict[str, Any]) -> str | None:
-    bundle = app_data.get("bundle_id", "")
-    if bundle not in _BROWSER_BUNDLES:
-        return None
-    for window in app_data.get("windows", []):
-        for el in window.get("elements", []):
-            if el.get("role") != "AXTextField":
-                continue
-            value = (el.get("value") or "").strip()
-            if not value:
-                continue
-            if _URL_RE.search(value):
-                return value
-            if "." in value and " " not in value:
-                return f"https://{value}"
-    return None
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -8,6 +8,15 @@
 import pytest
 
 
+@pytest.fixture(autouse=True)
+def _reset_app_parser_registry() -> None:
+    """Restore builtin parsers before each test so registry mutations
+    in one test do not leak into another."""
+    from openchronicle.capture.app_parsers import _reset_registry
+
+    _reset_registry()
+
+
 @pytest.fixture
 def ac_root(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
     root = tmp_path / "openchronicle"

diff --git a/tests/fixtures/s1/chrome_url/expected.json b/tests/fixtures/s1/chrome_url/expected.json
@@ -0,0 +1,12 @@
+{
+  "focused_element": {
+    "role": "AXTextField",
+    "title": "Address and search bar",
+    "value": "https://www.anthropic.com/news",
+    "is_editable": true,
+    "has_value": true,
+    "value_length": 30
+  },
+  "visible_text": "## Google Chrome [active]\n_com.google.Chrome_\n### Anthropic — Claude Code\n- [TextField] Address and search bar — https://www.anthropic.com/news",
+  "url": "https://www.anthropic.com/news"
+}
diff --git a/tests/fixtures/s1/chrome_url/input.json b/tests/fixtures/s1/chrome_url/input.json
@@ -0,0 +1,24 @@
+{
+  "ax_tree": {
+    "apps": [
+      {
+        "name": "Google Chrome",
+        "bundle_id": "com.google.Chrome",
+        "is_frontmost": true,
+        "windows": [
+          {
+            "title": "Anthropic — Claude Code",
+            "focused": true,
+            "elements": [
+              {
+                "role": "AXTextField",
+                "title": "Address and search bar",
+                "value": "https://www.anthropic.com/news"
+              }
+            ]
+          }
+        ]
+      }
+    ]
+  }
+}
diff --git a/tests/fixtures/s1/generic_cursor_textarea/expected.json b/tests/fixtures/s1/generic_cursor_textarea/expected.json
@@ -0,0 +1,12 @@
+{
+  "focused_element": {
+    "role": "AXTextArea",
+    "title": "editor",
+    "value": "def enrich(capture):\n    ...",
+    "is_editable": true,
+    "has_value": true,
+    "value_length": 28
+  },
+  "visible_text": "## Cursor [active]\n_com.todesktop.230313mzl4w4u92_\n### s1_parser.py\n- [TextArea] editor — def enrich(capture):\n    ...",
+  "url": null
+}