diff --git a/install.bat b/install.bat new file mode 100644 index 0000000..c40da27 --- /dev/null +++ b/install.bat @@ -0,0 +1,117 @@ +@echo off +setlocal EnableExtensions DisableDelayedExpansion + +rem install.bat - CMD entry point for the Windows OpenChronicle installer. +rem +rem This file intentionally delegates the real install logic to install.ps1 so +rem the project keeps one source of truth. It only: +rem 1. finds PowerShell, +rem 2. maps familiar --long-options to the PowerShell script parameters, +rem 3. preserves a non-zero exit code for CI / scripts. + +set "SCRIPT_DIR=%~dp0" +set "PS_SCRIPT=%SCRIPT_DIR%install.ps1" + +if not exist "%PS_SCRIPT%" ( + echo [openchronicle-install] Error: install.ps1 not found next to install.bat + exit /b 1 +) + +set "PS_EXE=" +where pwsh.exe >nul 2>nul +if %ERRORLEVEL%==0 set "PS_EXE=pwsh.exe" +if not defined PS_EXE ( + where powershell.exe >nul 2>nul + if %ERRORLEVEL%==0 set "PS_EXE=powershell.exe" +) +if not defined PS_EXE ( + if exist "%SystemRoot%\System32\WindowsPowerShell\v1.0\powershell.exe" ( + set "PS_EXE=%SystemRoot%\System32\WindowsPowerShell\v1.0\powershell.exe" + ) +) +if not defined PS_EXE ( + echo [openchronicle-install] Error: PowerShell was not found on PATH. + exit /b 1 +) + +set "PS_ARGS=" + +:parse_args +if "%~1"=="" goto run_installer + +if /I "%~1"=="--python" goto opt_python +if /I "%~1"=="-Python" goto opt_python +if /I "%~1"=="--bin-dir" goto opt_bin_dir +if /I "%~1"=="-BinDir" goto opt_bin_dir +if /I "%~1"=="--yes" goto opt_yes +if /I "%~1"=="-Yes" goto opt_yes +if /I "%~1"=="--no-client-config" goto opt_no_client_config +if /I "%~1"=="-NoClientConfig" goto opt_no_client_config +if /I "%~1"=="--help" goto opt_help +if /I "%~1"=="-h" goto opt_help +if /I "%~1"=="-Help" goto opt_help + +echo [openchronicle-install] Error: unknown option: %~1 +echo. +call :print_usage +exit /b 1 + +:opt_python +if "%~2"=="" ( + echo [openchronicle-install] Error: %~1 requires a value. + exit /b 1 +) +call :append_arg -Python +call :append_arg "%~2" +shift +shift +goto parse_args + +:opt_bin_dir +if "%~2"=="" ( + echo [openchronicle-install] Error: %~1 requires a value. + exit /b 1 +) +call :append_arg -BinDir +call :append_arg "%~2" +shift +shift +goto parse_args + +:opt_yes +call :append_arg -Yes +shift +goto parse_args + +:opt_no_client_config +call :append_arg -NoClientConfig +shift +goto parse_args + +:opt_help +call :print_usage +exit /b 0 + +:run_installer +"%PS_EXE%" -NoProfile -ExecutionPolicy Bypass -File "%PS_SCRIPT%" %PS_ARGS% +exit /b %ERRORLEVEL% + +:append_arg +rem Quote every forwarded argument so paths like "C:\Users\Me\My Bin" survive. +set "PS_ARGS=%PS_ARGS% "%~1"" +exit /b 0 + +:print_usage +echo Usage: install.bat [options] +echo. +echo Installs OpenChronicle into a dedicated virtualenv, creates an +echo openchronicle launcher, and optionally injects MCP config into +echo detected clients. +echo. +echo Options: +echo --python ^ Python version to target (default: 3.12) +echo --bin-dir ^ Directory to place the openchronicle launcher +echo --yes Auto-inject all detected MCP client configs +echo --no-client-config Skip MCP client config prompts entirely +echo -h, --help Show this help +exit /b 0 diff --git a/install.ps1 b/install.ps1 new file mode 100644 index 0000000..850040a --- /dev/null +++ b/install.ps1 @@ -0,0 +1,274 @@ +# install.ps1 — Windows installer for OpenChronicle +# Mirrors install.sh functionality for Windows environments. +# +# Usage: powershell -ExecutionPolicy Bypass -File install.ps1 [options] +# --python Python version for managed runtime (default: 3.12) +# --bin-dir Directory for the openchronicle shim script +# --yes Auto-inject all detected MCP client configs +# --no-client-config Skip MCP client config prompts entirely +# -h, --help Show this help + +param( + [string]$Python = "3.12", + [string]$BinDir = "", + [switch]$Yes, + [switch]$NoClientConfig, + [switch]$Help +) + +$ErrorActionPreference = "Stop" +$RootDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$InstallHome = if ($env:OPENCHRONICLE_INSTALL_HOME) { $env:OPENCHRONICLE_INSTALL_HOME } else { Join-Path $env:USERPROFILE ".openchronicle" } +$VenvDir = Join-Path $InstallHome "venv" +$UvBin = "" +$OpenChronicleBin = "" +$InstallBinDir = "" + +function Log($msg) { Write-Host "[openchronicle-install] $msg" } +function Warn($msg) { Write-Warning "[openchronicle-install] $msg" } +function Die($msg) { + Write-Error "[openchronicle-install] Error: $msg" + exit 1 +} + +if ($Help) { + @" +Usage: powershell -ExecutionPolicy Bypass -File install.ps1 [options] + +Installs OpenChronicle into a dedicated virtualenv, creates an +`openchronicle` launcher, and optionally injects MCP config into +detected clients. + +Options: + -Python Python version to target (default: 3.12) + -BinDir Directory to place the openchronicle launcher + -Yes Auto-inject all detected MCP client configs + -NoClientConfig Skip MCP client config prompts entirely + -Help Show this help +"@ + exit 0 +} + +function Require-RepoRoot { + $pyprojectPath = Join-Path $RootDir "pyproject.toml" + $srcPath = Join-Path (Join-Path $RootDir "src") "openchronicle" + if (-not (Test-Path $pyprojectPath)) { Die "run this script from the repository root" } + if (-not (Test-Path $srcPath)) { Die "repository layout looks incomplete" } +} + +function Check-Platform { + if ($env:OS -ne "Windows_NT") { + Die "This installer is for Windows only. Use install.sh on macOS/Linux." + } + $ver = [System.Environment]::OSVersion.Version + if ($ver.Major -lt 10) { + Die "Windows 10 or later is required (found $($ver.ToString()))" + } +} + +function Ensure-UV { + $uvCmd = Get-Command uv -ErrorAction SilentlyContinue + if ($uvCmd) { + $script:UvBin = $uvCmd.Source + return + } + + Log "uv not found; installing it" + try { + Invoke-RestMethod https://astral.sh/uv/install.ps1 | Invoke-Expression + } catch { + Die "failed to install uv: $_" + } + + # Refresh PATH and search again + $env:PATH = [System.Environment]::GetEnvironmentVariable("PATH", "User") + ";" + $env:PATH + $uvCmd = Get-Command uv -ErrorAction SilentlyContinue + if ($uvCmd) { + $script:UvBin = $uvCmd.Source + return + } + + $candidates = @( + (Join-Path (Join-Path (Join-Path $env:USERPROFILE ".local") "bin") "uv.exe"), + (Join-Path (Join-Path (Join-Path $env:USERPROFILE ".cargo") "bin") "uv.exe") + ) + foreach ($c in $candidates) { + if (Test-Path $c) { + $script:UvBin = $c + $env:PATH = (Split-Path $c) + ";" + $env:PATH + return + } + } + Die "uv installation finished but the binary was not found" +} + +function Find-CompatiblePython { + $pyCmd = Get-Command python -ErrorAction SilentlyContinue + if (-not $pyCmd) { + $pyCmd = Get-Command python3 -ErrorAction SilentlyContinue + } + if (-not $pyCmd) { return $null } + + try { + $ver = & $pyCmd.Source -c "import sys; print('.'.join(map(str, sys.version_info[:3])))" + $parts = $ver.Split('.') + if ([int]$parts[0] -ge 3 -and [int]$parts[1] -ge 11) { + return $pyCmd.Source + } + } catch {} + return $null +} + +function Prepare-PythonTarget { + $systemPython = Find-CompatiblePython + if ($systemPython) { + Log "using system Python at $systemPython" + return $systemPython + } + + Log "system Python < 3.11; installing managed Python $Python via uv" + & $UvBin python install $Python + if ($LASTEXITCODE -ne 0) { Die "failed to install Python $Python via uv" } + return $Python +} + +function Install-Package($pythonTarget) { + if (Test-Path $VenvDir) { + Remove-Item -Recurse -Force $VenvDir + } + New-Item -ItemType Directory -Path $InstallHome -Force | Out-Null + + Log "creating virtualenv at $VenvDir" + & $UvBin venv $VenvDir --python $pythonTarget + if ($LASTEXITCODE -ne 0) { Die "failed to create virtualenv" } + + $venvPython = Join-Path (Join-Path $VenvDir "Scripts") "python.exe" + + Log "installing OpenChronicle into the virtualenv" + & $UvBin pip install --python $venvPython $RootDir + if ($LASTEXITCODE -ne 0) { Die "failed to install OpenChronicle into $VenvDir" } + + $script:OpenChronicleBin = Join-Path (Join-Path $VenvDir "Scripts") "openchronicle.exe" + if (-not (Test-Path $OpenChronicleBin)) { + Die "expected CLI not found at $OpenChronicleBin" + } +} + +function Choose-InstallBinDir { + if ($BinDir) { + New-Item -ItemType Directory -Path $BinDir -Force | Out-Null + return $BinDir + } + + $localBin = Join-Path (Join-Path $env:USERPROFILE ".local") "bin" + New-Item -ItemType Directory -Path $localBin -Force | Out-Null + return $localBin +} + +function Install-Launcher { + $script:InstallBinDir = Choose-InstallBinDir + $launcherPath = Join-Path $InstallBinDir "openchronicle.cmd" + $content = "@echo off`r`n`"$OpenChronicleBin`" %*" + Set-Content -Path $launcherPath -Value $content -Encoding ASCII + $env:PATH = "$InstallBinDir;$env:PATH" + Log "installed openchronicle launcher at $launcherPath" +} + +function Verify-Install { + & (Join-Path $InstallBinDir "openchronicle.cmd") status 2>&1 | Out-Null + if ($LASTEXITCODE -ne 0) { + Warn "installation verification returned non-zero (this is expected on first run)" + } +} + +function Prompt-YesNo($prompt) { + if ($Yes) { return $true } + if ($NoClientConfig) { return $false } + + $reply = Read-Host "$prompt [Y/n]" + if ([string]::IsNullOrWhiteSpace($reply) -or $reply -match '^[Yy]') { + return $true + } + return $false +} + +function Maybe-InjectClient($client, $label) { + if ($NoClientConfig) { return } + + if (-not $Yes) { + if (-not (Prompt-YesNo "Detected $label. Inject OpenChronicle MCP config now?")) { + return + } + } else { + Log "injecting MCP config into $label" + } + + & (Join-Path $InstallBinDir "openchronicle.cmd") install $client + if ($LASTEXITCODE -ne 0) { + Warn "failed to inject MCP config for $label; retry later with 'openchronicle install $client'" + } +} + +function Inject-DetectedClients { + $codexCfg = Join-Path (Join-Path $env:USERPROFILE ".codex") "config.toml" + $claudeCodeCfg = Join-Path $env:USERPROFILE ".claude.json" + $claudeDesktopCfg = Join-Path (Join-Path $env:APPDATA "Claude") "claude_desktop_config.json" + $opencodeCfg = Join-Path (Join-Path (Join-Path $env:USERPROFILE ".config") "opencode") "opencode.json" + + if (Test-Path $codexCfg) { + if (Get-Command codex -ErrorAction SilentlyContinue) { + Maybe-InjectClient "codex" "Codex CLI" + } else { + Warn "found $codexCfg, but codex is not on PATH; skipping" + } + } + + if (Test-Path $claudeCodeCfg) { + if (Get-Command claude -ErrorAction SilentlyContinue) { + Maybe-InjectClient "claude-code" "Claude Code" + } else { + Warn "found $claudeCodeCfg, but claude is not on PATH; skipping" + } + } + + if (Test-Path $claudeDesktopCfg) { + Maybe-InjectClient "claude-desktop" "Claude Desktop" + } + + if ((Test-Path $opencodeCfg) -or (Test-Path "$opencodeCfg`c")) { + Maybe-InjectClient "opencode" "opencode" + } +} + +function Print-Summary { + @" + +OpenChronicle installed successfully. + +Install root : $InstallHome +Virtualenv : $VenvDir +CLI launcher : $(Join-Path $InstallBinDir 'openchronicle.cmd') + +Next steps: + 1. Start the daemon: + openchronicle start + 2. Check status: + openchronicle status + 3. (Optional) Add $InstallBinDir to your PATH if not already present. +"@ +} + +# ─── Main ────────────────────────────────────────────────────────────── + +Require-RepoRoot +Check-Platform +Ensure-UV + +$pythonTarget = Prepare-PythonTarget +if (-not $pythonTarget) { Die "failed to determine a Python target" } + +Install-Package $pythonTarget +Install-Launcher +Verify-Install +Inject-DetectedClients +Print-Summary diff --git a/pyproject.toml b/pyproject.toml index 0681e02..0ef9f84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ classifiers = [ "Environment :: Console", "License :: OSI Approved :: MIT License", "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -47,6 +48,7 @@ packages = ["src/openchronicle"] "resources/build-mac-ax-helper.sh" = "openchronicle/_bundled/build-mac-ax-helper.sh" "resources/mac-ax-watcher.swift" = "openchronicle/_bundled/mac-ax-watcher.swift" "resources/build-mac-ax-watcher.sh" = "openchronicle/_bundled/build-mac-ax-watcher.sh" +"resources/win-uia-helper.ps1" = "openchronicle/_bundled/win-uia-helper.ps1" [dependency-groups] dev = [ diff --git a/resources/win-uia-helper.ps1 b/resources/win-uia-helper.ps1 new file mode 100644 index 0000000..90dda63 --- /dev/null +++ b/resources/win-uia-helper.ps1 @@ -0,0 +1,584 @@ +# win-uia-helper.ps1 — Windows UI Automation tree capture (mirrors mac-ax-helper) +# +# Output schema is intentionally identical to mac-ax-helper.swift so the +# Python s1_parser / ax_models / aggregator code paths are platform-agnostic: +# +# { +# "timestamp": "", +# "apps": [ +# { +# "pid": , +# "name": "", +# "bundle_id": "", +# "is_frontmost": , +# "windows": [ +# { +# "title": "", +# "focused": true, # only present on focused window +# "elements": [ , ... ] +# } +# ] +# } +# ] +# } +# +# Each is: +# { +# "role": "AXEdit"|"AXButton"|... # UIA ControlType remapped to mac AX role +# "title": "", +# "identifier": "", +# "value": "", +# "children": [, ...] +# } +# +# Empty fields are omitted (matches mac-ax-helper's AXNode.toDict()). +# Container nodes that add no semantic value are collapsed (matches +# mac-ax-helper's containerRoles promotion). +# Visual chrome roles (Image, ScrollBar, ...) are dropped entirely. +# +# Usage: powershell -ExecutionPolicy Bypass -File win-uia-helper.ps1 [options] +# -AllVisible Capture all visible top-level windows +# -AppName Capture a specific application by name +# -FocusedWindowOnly Only capture the focused window (default for frontmost) +# -Depth Max element tree depth (default 8) +# -Timeout Reserved for CLI parity with mac-ax-helper +# -Raw Skip semantic filtering (preserve full UIA tree) + +param( + [switch]$AllVisible, + [string]$AppName = "", + [switch]$FocusedWindowOnly, + [int]$Depth = 8, + [int]$Timeout = 3, + [switch]$Raw, + # Win32 HWND of the foreground window, as observed by the parent + # (daemon) process. The daemon owns the desktop session whereas the + # helper subprocess runs in a session-isolated detached state — its + # own GetForegroundWindow / UIA FocusedElement always return zero. + # This parameter is the equivalent of mac's + # NSWorkspace.frontmostApplication: a stable, externally-observed + # anchor for "which app is the user actually using". + [long]$ForegroundHwnd = 0, + [int]$ForegroundPid = 0 +) + +$ErrorActionPreference = "Stop" + +Add-Type -AssemblyName UIAutomationClient +Add-Type -AssemblyName UIAutomationTypes + +# Win32 imports — used to identify the *real* foreground window. The +# UIAutomation FocusedElement is unreliable here: when this script is +# launched as a subprocess by the daemon, focus has briefly moved to the +# launching shell, so FocusedElement can return the wrong app entirely. +# GetForegroundWindow + GetWindowThreadProcessId mirror what mac's +# NSWorkspace.frontmostApplication gives us — they're independent of who +# is currently consuming UIA events. +if (-not ("OpenChronicle.Win32" -as [type])) { + Add-Type -TypeDefinition @" +using System; +using System.Runtime.InteropServices; +namespace OpenChronicle { + public static class Win32 { + [DllImport("user32.dll")] + public static extern IntPtr GetForegroundWindow(); + [DllImport("user32.dll")] + public static extern uint GetWindowThreadProcessId(IntPtr hWnd, out uint lpdwProcessId); + } +} +"@ +} + +$automation = [System.Windows.Automation.AutomationElement] +$controlType = [System.Windows.Automation.ControlType] + +# ─── Mac AX role parity ───────────────────────────────────────────────── +# +# Maps UIA ControlType.ProgrammaticName ("ControlType.Edit") to the +# normalized role vocabulary consumed by s1_parser. Windows edit controls keep +# the distinct AXEdit role while still participating in editable/url-bar logic. + +$ROLE_MAP = @{ + "Window" = "AXWindow" + "Pane" = "AXGroup" + "Group" = "AXGroup" + "Custom" = "AXGroup" + "TitleBar" = "AXGroup" + "StatusBar" = "AXGroup" + "Calendar" = "AXGroup" + "SemanticZoom" = "AXGroup" + "Document" = "AXTextArea" + "Edit" = "AXEdit" + "Text" = "AXStaticText" + "Hyperlink" = "AXLink" + "Button" = "AXButton" + "SplitButton" = "AXButton" + "MenuItem" = "AXMenuItem" + "Menu" = "AXMenu" + "MenuBar" = "AXMenuBar" + "CheckBox" = "AXCheckBox" + "RadioButton" = "AXRadioButton" + "ComboBox" = "AXComboBox" + "List" = "AXList" + "ListItem" = "AXRow" + "Tree" = "AXOutline" + "TreeItem" = "AXRow" + "DataGrid" = "AXOutline" + "DataItem" = "AXRow" + "Table" = "AXOutline" + "Header" = "AXHeading" + "HeaderItem" = "AXHeading" + "Tab" = "AXTabGroup" + "TabItem" = "AXTab" + "ToolBar" = "AXToolbar" + "AppBar" = "AXToolbar" + "Image" = "AXImage" + "ScrollBar" = "AXScrollBar" + "Slider" = "AXSlider" + "ProgressBar" = "AXProgressIndicator" + "Spinner" = "AXIncrementor" + "Separator" = "AXSplitter" + "ToolTip" = "AXToolTip" + "Thumb" = "AXValueIndicator" +} + +# Pure visual chrome — drop the node and its subtree. Mirrors +# mac-ax-helper.swift's `dropRoles`. +$DROP_ROLES = @( + "AXImage", "AXScrollBar", "AXValueIndicator", "AXSplitter" +) | ForEach-Object { $_ } | Group-Object -AsHashTable -Property { $_ } + +# Container roles that get collapsed (single-child promotion) when they +# carry no text of their own. Mirrors mac-ax-helper.swift's `containerRoles`. +$CONTAINER_ROLES = @( + "AXGroup", "AXSplitGroup", "AXScrollArea", "AXList", + "AXOutline", "AXBrowser", "AXDrawer", "AXSheet", "AXToolbar" +) | ForEach-Object { $_ } | Group-Object -AsHashTable -Property { $_ } + +# Length cap on element values (mac-ax-helper.swift maxValueLength = 1000). +$VALUE_MAX = 1000 + +# Max children per node — guards against a runaway list. +$MAX_CHILDREN = 200 + + +function Map-ControlTypeToRole { + param([string]$ProgrammaticName) + # ProgrammaticName is "ControlType.Edit" / "ControlType.Pane" / ... + $bare = $ProgrammaticName -replace "^ControlType\.", "" + if ($ROLE_MAP.ContainsKey($bare)) { + return $ROLE_MAP[$bare] + } + # Unknown control type: prefix with AX so downstream code knows to + # treat it as an "unknown but tagged" element. Matches mac-ax-helper's + # behaviour for roles outside its known sets. + return "AX$bare" +} + + +function Get-TextAttribute { + param( + [System.Windows.Automation.AutomationElement]$Element, + [string]$Name + ) + try { + $val = $Element.Current.$Name + if ($null -eq $val) { return "" } + return [string]$val + } catch { + return "" + } +} + + +function Get-ElementValue { + param([System.Windows.Automation.AutomationElement]$Element) + # ValuePattern → .Value (works for Edit/Document/ComboBox) + try { + $obj = $null + if ($Element.TryGetCurrentPattern( + [System.Windows.Automation.ValuePattern]::Pattern, + [ref]$obj)) { + $v = $obj.Current.Value + if ($null -ne $v -and $v.Length -gt 0) { + if ($v.Length -gt $VALUE_MAX) { + return $v.Substring(0, $VALUE_MAX) + "..." + } + return $v + } + } + } catch {} + # TogglePattern → "On" / "Off" / "Indeterminate" (for CheckBox / RadioButton) + try { + $obj = $null + if ($Element.TryGetCurrentPattern( + [System.Windows.Automation.TogglePattern]::Pattern, + [ref]$obj)) { + return $obj.Current.ToggleState.ToString() + } + } catch {} + return "" +} + + +function Test-IsSecureField { + param([System.Windows.Automation.AutomationElement]$Element) + # IsPassword is a UIA core property on edit controls + try { + return [bool]$Element.Current.IsPassword + } catch { + return $false + } +} + + +function Get-ElementTree { + param( + [System.Windows.Automation.AutomationElement]$Element, + [int]$CurrentDepth, + [int]$MaxDepth + ) + + if ($null -eq $Element) { return $null } + if ($MaxDepth -gt 0 -and $CurrentDepth -ge $MaxDepth) { return $null } + + $role = $null + $title = "" + $identifier = "" + $value = "" + $isSecure = $false + + try { + $role = Map-ControlTypeToRole -ProgrammaticName ` + $Element.Current.ControlType.ProgrammaticName + $title = (Get-TextAttribute -Element $Element -Name "Name").Trim() + $identifier = (Get-TextAttribute -Element $Element -Name "AutomationId").Trim() + $isSecure = Test-IsSecureField -Element $Element + if ($isSecure) { + $value = "[REDACTED]" + } else { + $value = (Get-ElementValue -Element $Element).Trim() + } + } catch { + return $null + } + + # Drop visual chrome roles in filtered mode (mirrors mac dropRoles). + if (-not $Raw -and $DROP_ROLES.ContainsKey($role)) { + return $null + } + + # Recurse into children. + $childList = @() + if ($MaxDepth -le 0 -or $CurrentDepth + 1 -lt $MaxDepth) { + try { + $children = $Element.FindAll( + [System.Windows.Automation.TreeScope]::Children, + [System.Windows.Automation.Condition]::TrueCondition + ) + $count = 0 + foreach ($child in $children) { + if ($count -ge $MAX_CHILDREN) { break } + $childResult = Get-ElementTree ` + -Element $child ` + -CurrentDepth ($CurrentDepth + 1) ` + -MaxDepth $MaxDepth + if ($null -ne $childResult) { + $childList += $childResult + } + $count++ + } + } catch {} + } + + $hasText = ($title.Length -gt 0) -or ($value.Length -gt 0) + + # Container collapsing: a container node with no own text is just + # noise — promote a single child, drop a leaf, otherwise keep as a + # plain wrapper. Mirrors mac-ax-helper's container collapse logic. + if (-not $Raw -and $CONTAINER_ROLES.ContainsKey($role) -and -not $hasText) { + if ($childList.Count -eq 1) { + return $childList[0] + } + if ($childList.Count -eq 0) { + return $null + } + } + + # Drop unknown / leaf nodes that carry nothing useful at all. + if (-not $Raw -and -not $hasText -and ($childList.Count -eq 0)) { + return $null + } + + $result = [ordered]@{ + "role" = $role + } + if ($title.Length -gt 0) { $result["title"] = $title } + if ($identifier.Length -gt 0) { $result["identifier"] = $identifier } + if ($value.Length -gt 0) { $result["value"] = $value } + if ($childList.Count -gt 0) { $result["children"] = $childList } + return $result +} + + +function Get-WindowData { + param( + [System.Windows.Automation.AutomationElement]$WindowElement, + [int]$MaxDepth, + [bool]$IsFocused + ) + + $title = "" + try { $title = [string]$WindowElement.Current.Name } catch {} + + # Redact title when the focused element is a password field. Mirrors + # mac-ax-watcher's window_title redaction. + try { + $focusedEl = $automation::FocusedElement + if ($null -ne $focusedEl -and (Test-IsSecureField -Element $focusedEl)) { + $title = "[REDACTED]" + } + } catch {} + + $elements = @() + try { + $children = $WindowElement.FindAll( + [System.Windows.Automation.TreeScope]::Children, + [System.Windows.Automation.Condition]::TrueCondition + ) + foreach ($child in $children) { + $el = Get-ElementTree -Element $child -CurrentDepth 0 -MaxDepth $MaxDepth + if ($null -ne $el) { + $elements += $el + } + } + } catch {} + + $windowData = [ordered]@{ "title" = $title } + if ($IsFocused) { $windowData["focused"] = $true } + if ($elements.Count -gt 0) { $windowData["elements"] = $elements } + return $windowData +} + + +function Get-AppFromHwnd { + param( + [int]$ProcessId, + [bool]$IsFrontmost + ) + + $appData = [ordered]@{ + "pid" = $ProcessId + "name" = "" + "bundle_id" = "" + "is_frontmost" = $IsFrontmost + "windows" = @() + } + + if ($ProcessId -gt 0) { + try { + $proc = Get-Process -Id $ProcessId -ErrorAction Stop + $appData["name"] = $proc.ProcessName + try { + $appData["bundle_id"] = $proc.MainModule.FileName + } catch {} + } catch {} + } + + return $appData +} + + +function Find-WindowAncestor { + param([System.Windows.Automation.AutomationElement]$Element) + $walker = [System.Windows.Automation.TreeWalker]::ControlViewWalker + $current = $Element + while ($null -ne $current) { + try { + if ($current.Current.ControlType -eq $controlType::Window) { + return $current + } + } catch { return $null } + try { + $current = $walker.GetParent($current) + } catch { return $null } + } + return $null +} + + +function Get-ForegroundContext { + # Prefer the parent-supplied HWND/PID — that's the only reliable + # source when this script runs as a session-isolated subprocess. + # Fall back to GetForegroundWindow when the caller didn't pass one + # (manual invocation from a normal interactive PowerShell). + if ($ForegroundHwnd -ne 0 -and $ForegroundPid -gt 0) { + return @{ + hwnd = [IntPtr]::new($ForegroundHwnd) + pid = $ForegroundPid + } + } + $hwnd = [OpenChronicle.Win32]::GetForegroundWindow() + if ($hwnd -eq [IntPtr]::Zero) { + return @{ hwnd = [IntPtr]::Zero; pid = 0 } + } + $procId = 0 + [void][OpenChronicle.Win32]::GetWindowThreadProcessId($hwnd, [ref]$procId) + return @{ hwnd = $hwnd; pid = $procId } +} + + +function Resolve-WindowFromHwnd { + param([IntPtr]$Hwnd) + if ($Hwnd -eq [IntPtr]::Zero) { return $null } + try { + $el = $automation::FromHandle($Hwnd) + if ($null -eq $el) { return $null } + # If the HWND maps to something other than a Window element (e.g. + # a console host), walk up to the first Window ancestor so the + # output schema still has a "windows[]" with content. + try { + if ($el.Current.ControlType -eq $controlType::Window) { + return $el + } + } catch {} + return Find-WindowAncestor -Element $el + } catch { + return $null + } +} + + +# ─── Main ──────────────────────────────────────────────────────────────── + +$timestamp = (Get-Date).ToUniversalTime().ToString("yyyy-MM-ddTHH:mm:ssZ") +$output = [ordered]@{ + "timestamp" = $timestamp + "apps" = @() +} + +try { + $root = $automation::RootElement + + # Foreground window/pid via Win32 — independent of who is currently + # consuming UIA events. This is the rock for everything else. + $fg = Get-ForegroundContext + $foregroundHwnd = $fg.hwnd + $foregroundPid = [int]$fg.pid + + # FocusedElement is still useful for figuring out *which* window + # within an app is focused (when the app has multiple windows), and + # for password-field redaction. But it's an *extra* signal — never + # the source of truth for which app is frontmost. + $focused = $null + try { $focused = $automation::FocusedElement } catch {} + + if ($AllVisible) { + # All top-level windows, grouped by ProcessId. + $windowCondition = New-Object System.Windows.Automation.PropertyCondition( + $automation::ControlTypeProperty, $controlType::Window + ) + $allWindows = $root.FindAll( + [System.Windows.Automation.TreeScope]::Children, + $windowCondition + ) + + $appMap = @{} + foreach ($w in $allWindows) { + try { + $wPid = $w.Current.ProcessId + if (-not $appMap.ContainsKey($wPid)) { + $appMap[$wPid] = @() + } + $appMap[$wPid] += $w + } catch {} + } + + foreach ($wPid in $appMap.Keys) { + $isFront = ($wPid -eq $foregroundPid) + $appData = Get-AppFromHwnd -ProcessId $wPid -IsFrontmost $isFront + $windowDicts = @() + foreach ($w in $appMap[$wPid]) { + $isFocusedWindow = $false + try { + $isFocusedWindow = $isFront -and ` + ([IntPtr]$w.Current.NativeWindowHandle -eq $foregroundHwnd) + } catch {} + $windowDicts += (Get-WindowData -WindowElement $w ` + -MaxDepth $Depth -IsFocused $isFocusedWindow) + } + $appData["windows"] = $windowDicts + $output["apps"] += $appData + } + } elseif ($AppName) { + $nameCondition = New-Object System.Windows.Automation.PropertyCondition( + $automation::NameProperty, $AppName + ) + $found = $root.FindFirst( + [System.Windows.Automation.TreeScope]::Children, + $nameCondition + ) + if ($null -ne $found) { + $appPid = 0 + try { $appPid = $found.Current.ProcessId } catch {} + $appData = Get-AppFromHwnd -ProcessId $appPid -IsFrontmost $true + $appData["windows"] = @(Get-WindowData -WindowElement $found ` + -MaxDepth $Depth -IsFocused $true) + $output["apps"] += $appData + } + } else { + # Frontmost app — anchor on the Win32 foreground window. Walking + # up from FocusedElement is fragile (focus can be on a transient + # child) and easily wrong when the helper itself was just + # launched. + if ($foregroundHwnd -ne [IntPtr]::Zero -and $foregroundPid -gt 0) { + $foregroundWindow = Resolve-WindowFromHwnd -Hwnd $foregroundHwnd + $appData = Get-AppFromHwnd -ProcessId $foregroundPid -IsFrontmost $true + + if ($FocusedWindowOnly) { + if ($null -ne $foregroundWindow) { + $appData["windows"] = @(Get-WindowData ` + -WindowElement $foregroundWindow ` + -MaxDepth $Depth -IsFocused $true) + } + } else { + $windowCondition = New-Object System.Windows.Automation.PropertyCondition( + $automation::ControlTypeProperty, $controlType::Window + ) + $allWindows = $root.FindAll( + [System.Windows.Automation.TreeScope]::Children, + $windowCondition + ) + $windowDicts = @() + foreach ($w in $allWindows) { + try { + if ($w.Current.ProcessId -ne $foregroundPid) { continue } + } catch { continue } + $isFocusedWindow = $false + try { + $isFocusedWindow = ` + ([IntPtr]$w.Current.NativeWindowHandle -eq $foregroundHwnd) + } catch {} + $windowDicts += (Get-WindowData -WindowElement $w ` + -MaxDepth $Depth -IsFocused $isFocusedWindow) + } + # Fallback: if for any reason we didn't find the + # foreground HWND in the root's window list (e.g. the + # foreground window is a child popup not enumerated as a + # top-level), still emit the foreground window itself. + if ($windowDicts.Count -eq 0 -and $null -ne $foregroundWindow) { + $windowDicts += (Get-WindowData ` + -WindowElement $foregroundWindow ` + -MaxDepth $Depth -IsFocused $true) + } + $appData["windows"] = $windowDicts + } + + $output["apps"] += $appData + } + } +} catch { + $output["error"] = $_.Exception.Message +} + +$output | ConvertTo-Json -Depth 50 -Compress diff --git a/src/openchronicle/capture/ax_capture.py b/src/openchronicle/capture/ax_capture.py index f5c297f..3deaac1 100644 --- a/src/openchronicle/capture/ax_capture.py +++ b/src/openchronicle/capture/ax_capture.py @@ -1,7 +1,9 @@ -"""Cross-platform-stub AX Tree capture (macOS only in v1). +"""Cross-platform AX / UI Automation tree capture. -Wraps the vendored `mac-ax-helper` Swift binary. Ported from Einsia-Partner's -backend/core/capture/ax_capture_service.py with Windows branch removed and +macOS: vendored ``mac-ax-helper`` Swift binary +Windows: vendored ``win-uia-helper.ps1`` PowerShell script (uses .NET UIAutomation) + +Ported from Einsia-Partner's backend/core/capture/ax_capture_service.py with resource resolution adapted for a uv/pip-installable package. """ @@ -20,6 +22,39 @@ logger = get("openchronicle.capture") _SUBPROCESS_TIMEOUT = 10 # seconds (covers --timeout 3 + overhead) +_WIN_SUBPROCESS_TIMEOUT = 15 # PowerShell startup overhead + + +def _foreground_hwnd_pid() -> tuple[int, int]: + """Resolve (hwnd, pid) of the current foreground window. + + Used on Windows to anchor the win-uia-helper subprocess to the user's + actual foreground app (the helper itself can't see the desktop — + see _run() for the why). On non-Windows or if the call fails we + return (0, 0); the helper falls back to its own GetForegroundWindow. + """ + if platform.system() != "Windows": + return (0, 0) + try: + import ctypes + import ctypes.wintypes as wt + + user32 = ctypes.windll.user32 # type: ignore[attr-defined] + user32.GetForegroundWindow.argtypes = [] + user32.GetForegroundWindow.restype = wt.HWND + user32.GetWindowThreadProcessId.argtypes = [ + wt.HWND, ctypes.POINTER(wt.DWORD) + ] + user32.GetWindowThreadProcessId.restype = wt.DWORD + hwnd = user32.GetForegroundWindow() or 0 + if not hwnd: + return (0, 0) + pid = wt.DWORD(0) + user32.GetWindowThreadProcessId(hwnd, ctypes.byref(pid)) + return (int(hwnd), int(pid.value)) + except Exception as exc: # noqa: BLE001 + logger.debug("foreground HWND query failed: %s", exc) + return (0, 0) def _strip_frame_fields(value: Any) -> Any: @@ -123,7 +158,13 @@ class AXProvider(Protocol): @property def available(self) -> bool: ... - def capture_frontmost(self, *, focused_window_only: bool = True) -> AXCaptureResult | None: ... + def capture_frontmost( + self, + *, + focused_window_only: bool = True, + anchor_hwnd: int = 0, + anchor_pid: int = 0, + ) -> AXCaptureResult | None: ... def capture_all_visible(self) -> AXCaptureResult | None: ... @@ -140,7 +181,13 @@ def __init__(self, reason: str) -> None: def available(self) -> bool: return False - def capture_frontmost(self, *, focused_window_only: bool = True) -> AXCaptureResult | None: + def capture_frontmost( + self, + *, + focused_window_only: bool = True, + anchor_hwnd: int = 0, + anchor_pid: int = 0, + ) -> AXCaptureResult | None: return None def capture_all_visible(self) -> AXCaptureResult | None: @@ -165,7 +212,16 @@ def __init__(self, *, helper_path: Path, depth: int, timeout: int, raw: bool = F def available(self) -> bool: return True - def capture_frontmost(self, *, focused_window_only: bool = True) -> AXCaptureResult | None: + def capture_frontmost( + self, + *, + focused_window_only: bool = True, + anchor_hwnd: int = 0, + anchor_pid: int = 0, + ) -> AXCaptureResult | None: + # mac-ax-helper resolves the frontmost app via NSWorkspace; the + # anchor_hwnd / anchor_pid hints are Windows-only and ignored here. + del anchor_hwnd, anchor_pid return self._run(all_visible=False, focused_window_only=focused_window_only) def capture_all_visible(self) -> AXCaptureResult | None: @@ -237,13 +293,178 @@ def _run( ) -def create_provider(*, depth: int = 8, timeout: int = 3, raw: bool = False) -> AXProvider: - if platform.system() != "Darwin": - return UnavailableAXProvider(f"unsupported platform: {platform.system()}") - helper = _resolve_helper_path() - if helper is None: - return UnavailableAXProvider( - "mac-ax-helper not found. Build it: bash resources/build-mac-ax-helper.sh" +class WinUIAutomationProvider: + """Subprocess wrapper around the vendored win-uia-helper.ps1 PowerShell script.""" + + def __init__(self, *, script_path: Path, depth: int, timeout: int, raw: bool = False) -> None: + self._script_path = str(script_path) + self._depth = depth + self._timeout = timeout + self._raw = raw + + @property + def available(self) -> bool: + return True + + def capture_frontmost( + self, + *, + focused_window_only: bool = True, + anchor_hwnd: int = 0, + anchor_pid: int = 0, + ) -> AXCaptureResult | None: + return self._run( + all_visible=False, + focused_window_only=focused_window_only, + anchor_hwnd=anchor_hwnd, + anchor_pid=anchor_pid, ) - logger.info("AX capture initialized: %s", helper) - return MacAXHelperProvider(helper_path=helper, depth=depth, timeout=timeout, raw=raw) + + def capture_all_visible(self) -> AXCaptureResult | None: + return self._run(all_visible=True) + + def capture_app( + self, app_name: str, *, focused_window_only: bool = True + ) -> AXCaptureResult | None: + return self._run( + all_visible=False, app_name=app_name, focused_window_only=focused_window_only + ) + + def _run( + self, + *, + all_visible: bool, + app_name: str | None = None, + focused_window_only: bool = False, + anchor_hwnd: int = 0, + anchor_pid: int = 0, + ) -> AXCaptureResult | None: + args: list[str] = [ + "powershell", "-ExecutionPolicy", "Bypass", "-NoProfile", + "-File", self._script_path, + ] + if app_name: + args.extend(["-AppName", app_name]) + elif all_visible: + args.append("-AllVisible") + if focused_window_only: + args.append("-FocusedWindowOnly") + if self._raw: + args.append("-Raw") + args.extend(["-Depth", str(self._depth)]) + + # Anchor the helper to the user's foreground window. Required + # because the helper subprocess runs in a session-isolated state + # (no console, no desktop access — see Windows session/desktop + # isolation rules) where its own GetForegroundWindow / UIA + # FocusedElement always return zero. + # + # Source priority: + # 1. Caller-supplied anchor (anchor_hwnd / anchor_pid) — comes + # from the watcher event that triggered this capture, so + # it's the *exact* HWND that fired the event. + # 2. Daemon-process GetForegroundWindow — usable for heartbeat + # captures or whenever no triggering event exists. + # + # mac's NSWorkspace.frontmostApplication is the moral equivalent; + # it just doesn't need the explicit hand-off because the helper + # has its own desktop access there. + if not all_visible and not app_name: + hwnd, pid = anchor_hwnd, anchor_pid + if not hwnd: + hwnd, pid = _foreground_hwnd_pid() + if hwnd: + args.extend(["-ForegroundHwnd", str(hwnd)]) + args.extend(["-ForegroundPid", str(pid)]) + + # CREATE_NO_WINDOW: when the daemon runs detached (no console of + # its own), every subprocess.run that launches powershell.exe + # would otherwise be given a fresh console window — visible as + # a black cmd window flashing on every capture event. The flag + # is Windows-only; getattr keeps the import portable. + creationflags = getattr(subprocess, "CREATE_NO_WINDOW", 0) + try: + proc = subprocess.run( + args, + capture_output=True, + text=True, + timeout=_WIN_SUBPROCESS_TIMEOUT, + creationflags=creationflags, + ) + except subprocess.TimeoutExpired: + logger.warning("win-uia-helper timed out after %ds", _WIN_SUBPROCESS_TIMEOUT) + return None + except OSError as exc: + logger.error("Failed to run win-uia-helper: %s", exc) + return None + + if proc.returncode != 0: + logger.warning( + "win-uia-helper exited %d: %s", proc.returncode, proc.stderr.strip()[:200] + ) + return None + + try: + data = json.loads(proc.stdout) + except json.JSONDecodeError as exc: + logger.warning("Failed to parse win-uia-helper JSON: %s", exc) + return None + + data = _strip_frame_fields(data) + mode = "all-visible" if all_visible else "frontmost" + return AXCaptureResult( + raw_json=data, + timestamp=data.get("timestamp", ""), + apps=data.get("apps", []), + metadata={"mode": mode, "depth": self._depth, "platform": "windows", "raw": self._raw}, + ) + + +def _resolve_win_helper_path() -> Path | None: + """Find the win-uia-helper.ps1 script.""" + override = os.environ.get("OPENCHRONICLE_WIN_UIA_HELPER") + if override: + p = Path(override).expanduser().resolve() + if p.is_file(): + return p + logger.warning("OPENCHRONICLE_WIN_UIA_HELPER set but not found: %s", p) + + candidates: list[Path] = [] + + try: + from importlib.resources import files as _pkg_files + + bundled_dir = Path(str(_pkg_files("openchronicle").joinpath("_bundled"))) + candidates.append(bundled_dir / "win-uia-helper.ps1") + except (ModuleNotFoundError, ValueError): + pass + + dev_root = Path(__file__).resolve().parents[3] + candidates.append(dev_root / "resources" / "win-uia-helper.ps1") + + for p in candidates: + if p.is_file(): + return p + + return None + + +def create_provider(*, depth: int = 8, timeout: int = 3, raw: bool = False) -> AXProvider: + system = platform.system() + if system == "Darwin": + helper = _resolve_helper_path() + if helper is None: + return UnavailableAXProvider( + "mac-ax-helper not found. Build it: bash resources/build-mac-ax-helper.sh" + ) + logger.info("AX capture initialized (macOS): %s", helper) + return MacAXHelperProvider(helper_path=helper, depth=depth, timeout=timeout, raw=raw) + if system == "Windows": + script = _resolve_win_helper_path() + if script is None: + return UnavailableAXProvider( + "win-uia-helper.ps1 not found. Reinstall OpenChronicle." + ) + logger.info("UI Automation capture initialized (Windows): %s", script) + return WinUIAutomationProvider(script_path=script, depth=depth, timeout=timeout, raw=raw) + return UnavailableAXProvider(f"unsupported platform: {system}") diff --git a/src/openchronicle/capture/event_dispatcher.py b/src/openchronicle/capture/event_dispatcher.py index a092c04..8f9b18a 100644 --- a/src/openchronicle/capture/event_dispatcher.py +++ b/src/openchronicle/capture/event_dispatcher.py @@ -102,6 +102,17 @@ def on_event(self, raw: dict[str, Any]) -> None: "bundle_id": bundle_id, "window_title": window_title, } + # Windows-only HWND/PID hints — used by the capture scheduler to + # anchor the win-uia-helper subprocess to the user's foreground + # window. Stripped before being written into the capture JSON + # (see scheduler._public_trigger). Mac's watcher doesn't emit + # these fields, so the trigger gracefully degrades. + hwnd = raw.get("hwnd") + if hwnd: + trigger["hwnd"] = int(hwnd) + pid = raw.get("pid") + if pid: + trigger["pid"] = int(pid) if event_type in _IMMEDIATE_EVENTS: self._cancel_debounce() diff --git a/src/openchronicle/capture/s1_parser.py b/src/openchronicle/capture/s1_parser.py index f846661..21af6da 100644 --- a/src/openchronicle/capture/s1_parser.py +++ b/src/openchronicle/capture/s1_parser.py @@ -15,11 +15,13 @@ import re from dataclasses import asdict, dataclass +from pathlib import PurePath from typing import Any from .ax_models import ax_app_to_markdown -_BROWSER_BUNDLES = { +# macOS bundle IDs (reverse-DNS). +_BROWSER_BUNDLES_MAC = { "com.google.Chrome", "com.apple.Safari", "org.mozilla.firefox", @@ -29,10 +31,41 @@ "com.operasoftware.Opera", } +# Windows executable basenames (lowercase, no extension). On Windows the +# capture pipeline stores the full ``*.exe`` path as ``bundle_id``, so we +# match on the file stem to keep parity with the macOS bundle-ID check. +_BROWSER_EXES_WIN = { + "chrome", + "msedge", + "firefox", + "brave", + "opera", + "vivaldi", + "arc", + "iexplore", +} + _URL_RE = re.compile(r"https?://\S+") -_EDITABLE_ROLES = {"AXTextField", "AXTextArea", "AXComboBox"} -_STATIC_ROLES = {"AXStaticText", "AXWebArea"} +_EDITABLE_ROLES = {"AXTextField", "AXTextArea", "AXComboBox", "AXEdit"} +_STATIC_ROLES = {"AXStaticText", "AXWebArea", "AXText"} + +# Roles that can hold a browser address bar. mac-ax-helper reports the +# Chromium/Safari address bar as ``AXTextField``; win-uia-helper.ps1 maps +# the UIA ``Edit`` control type to ``AXEdit`` (and some browsers expose +# the address bar as ``AXComboBox``). Keep all three here so ``_extract_url`` +# works on both platforms without per-OS branching. +_URL_BAR_ROLES = {"AXTextField", "AXEdit", "AXComboBox"} + +# Hints in the address-bar element's title/identifier — used to prefer the +# real address bar over an unrelated edit control on the page (e.g. a +# search input rendered as ``AXEdit``). Lowercased substring match. +_URL_BAR_NAME_HINTS = ( + "address", # "Address and search bar" (Edge/Chrome English) + "url", # "URL bar" + "location", # Firefox "location bar" + "地址", # Edge/Chrome zh-CN "地址和搜索栏" +) _VISIBLE_TEXT_MAX = 10_000 _FOCUS_TITLE_MAX = 200 @@ -56,25 +89,67 @@ def to_dict(self) -> dict[str, Any]: return d -def enrich(capture: dict[str, Any]) -> None: +def _is_browser_bundle(bundle: str) -> bool: + """Cross-platform browser detection. + + macOS: ``bundle_id`` is reverse-DNS (e.g. ``com.microsoft.edgemac``). + Windows: ``bundle_id`` is the full executable path + (e.g. ``C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe``) + — match on the lowercase file stem. + """ + if not bundle: + return False + if bundle in _BROWSER_BUNDLES_MAC: + return True + # PurePath handles both forward- and back-slashes regardless of the + # host OS, so a Windows path passed through on macOS still parses. + stem = PurePath(bundle).stem.lower() + if not stem and "." in bundle: + # Bare exe name like ``msedge.exe`` or ``msedge`` slipping through. + stem = bundle.rsplit(".", 1)[0].lower() + return stem in _BROWSER_EXES_WIN + + +def enrich(capture: dict[str, Any], *, trigger: dict[str, Any] | None = None) -> None: """Mutate ``capture`` in place: add ``focused_element`` / ``visible_text`` / ``url``. No-op when there is no ``ax_tree`` (e.g. AX unavailable, permission denied). + + ``trigger`` (optional) is the watcher event that drove this capture. + When ax_tree fails to yield a URL we fall back to scanning + ``trigger.window_title`` for an ``https?://...`` substring, so a + GitHub-style window title that ends in ``" — Microsoft Edge"`` still + has its URL surfaced. This mirrors the macOS behaviour of pulling URL + candidates from any text the watcher already exposed. """ ax_tree = capture.get("ax_tree") if not isinstance(ax_tree, dict): + if trigger is not None: + capture["url"] = _extract_url_from_trigger(trigger) return app_data = _frontmost_app(ax_tree) if app_data is None: capture["focused_element"] = FocusedElement().to_dict() capture["visible_text"] = "" - capture["url"] = None + capture["url"] = _extract_url_from_trigger(trigger) if trigger is not None else None return - capture["focused_element"] = _extract_focused_element(app_data).to_dict() + focused = _extract_focused_element(app_data) + capture["focused_element"] = focused.to_dict() capture["visible_text"] = _render_visible_text(app_data) - capture["url"] = _extract_url(app_data) + + url = _extract_url(app_data) + # All URL fallbacks are gated on bundle-is-browser, so a code editor + # whose buffer contains an https URL never surfaces as a captured URL. + is_browser = _is_browser_bundle(str(app_data.get("bundle_id") or "")) + if not url and is_browser: + # Edge/Chrome on Windows sometimes only expose the address bar + # as the focused AXEdit, not as a child element of the window. + url = _extract_url_from_text(focused.value) + if not url and trigger is not None: + url = _extract_url_from_trigger(trigger) + capture["url"] = url def _frontmost_app(ax_tree: dict[str, Any]) -> dict[str, Any] | None: @@ -117,17 +192,75 @@ def _render_visible_text(app_data: dict[str, Any]) -> str: def _extract_url(app_data: dict[str, Any]) -> str | None: bundle = app_data.get("bundle_id", "") - if bundle not in _BROWSER_BUNDLES: + if not _is_browser_bundle(bundle): return None + + # Two-pass walk: prefer an element whose title/identifier looks like + # an address bar ("Address and search bar"), then fall back to any + # url-bar-shaped element. This avoids picking up an in-page search + # field that happens to be the first AXEdit on the page. + candidates: list[tuple[bool, str]] = [] for window in app_data.get("windows", []): - for el in window.get("elements", []): - if el.get("role") != "AXTextField": - continue + _collect_url_bar_candidates(window.get("elements", []), candidates) + + candidates.sort(key=lambda c: 0 if c[0] else 1) + for _is_named, value in candidates: + url = _normalise_url(value) + if url: + return url + return None + + +def _collect_url_bar_candidates( + elements: list[dict[str, Any]], + out: list[tuple[bool, str]], +) -> None: + """Recursive walk: collect (is_named_addr_bar, value) tuples.""" + for el in elements: + role = el.get("role", "") or "" + if role in _URL_BAR_ROLES: value = (el.get("value") or "").strip() - if not value: - continue - if _URL_RE.search(value): - return value - if "." in value and " " not in value: - return f"https://{value}" + if value: + hint_blob = " ".join( + str(el.get(k) or "") for k in ("title", "identifier", "name") + ).lower() + is_named = any(h in hint_blob for h in _URL_BAR_NAME_HINTS) + out.append((is_named, value)) + children = el.get("children") + if children: + _collect_url_bar_candidates(children, out) + + +def _normalise_url(value: str) -> str | None: + """Turn an address-bar string into a normalised URL or ``None``.""" + if not value: + return None + match = _URL_RE.search(value) + if match: + return match.group(0) + if "." in value and " " not in value: + return f"https://{value}" return None + + +def _extract_url_from_text(text: str) -> str | None: + """Pull the first ``https?://...`` substring out of free-form text.""" + if not text: + return None + match = _URL_RE.search(text) + return match.group(0) if match else None + + +def _extract_url_from_trigger(trigger: dict[str, Any]) -> str | None: + """Last-ditch URL recovery: scan the watcher trigger for a URL. + + Browsers rarely put the full URL in the window title, but some sites + (and some browser configurations) do. We only fall back here when + the AX-tree path produced nothing, so the noise floor stays low. + Restricted to triggers whose ``bundle_id`` looks like a browser, so + we don't misclassify a Word document path that happens to contain + ``https://`` as a URL. + """ + if not _is_browser_bundle(str(trigger.get("bundle_id") or "")): + return None + return _extract_url_from_text(str(trigger.get("window_title") or "")) diff --git a/src/openchronicle/capture/scheduler.py b/src/openchronicle/capture/scheduler.py index bcfe00d..cf65ef5 100644 --- a/src/openchronicle/capture/scheduler.py +++ b/src/openchronicle/capture/scheduler.py @@ -20,7 +20,7 @@ from ..store import fts as fts_store from . import ax_capture, s1_parser, screenshot, window_meta from .event_dispatcher import EventDispatcher -from .watcher import AXWatcherProcess +from .watcher import EventWatcher, create_watcher logger = get("openchronicle.capture") @@ -33,6 +33,97 @@ def _safe_filename(ts: str) -> str: return ts.replace(":", "-").replace("+", "p") +def _public_trigger(trigger: dict[str, Any] | None) -> dict[str, Any]: + """Return the schema-stable trigger fields suitable for capture.json. + + Internal-only hints (windows HWND, PID — used to anchor the helper + subprocess) live on the trigger dict for plumbing convenience but + must not leak into the persisted JSON, otherwise the on-disk schema + diverges between mac and Windows. + """ + if not trigger: + return {"event_type": "heartbeat"} + return { + "event_type": trigger.get("event_type", ""), + "bundle_id": trigger.get("bundle_id", ""), + "window_title": trigger.get("window_title", ""), + } + + +def _resolve_window_meta( + primary: dict[str, str], + ax_tree: dict[str, Any] | None, + trigger: dict[str, Any] | None, +) -> dict[str, str]: + """Backfill empty ``window_meta`` fields from authoritative secondaries. + + The primary source — ``window_meta.active_window()`` — calls + ``GetForegroundWindow`` on Windows. From a session-isolated daemon + worker thread (a known ConPTY edge case) that returns 0, leaving the + capture's ``window_meta`` empty even when the AX helper *did* + successfully introspect the same window via the anchored HWND/PID. + Downstream stages then surface the missing app name as ``Unknown``. + + Two ranked fallbacks fill the gap, mirroring the data the macOS + osascript path would have produced: + + 1. **AX tree's frontmost app** — the helper was invoked with the + watcher's anchor hwnd/pid, so its ``apps[].name`` / + ``apps[].bundle_id`` / ``windows[].title`` describe exactly the + window the user was on at capture time. + 2. **Watcher trigger** — what `WinEventHook` saw when the event + fired. Used last because the hwnd may have changed in the few ms + before `_build_capture` ran. ``app_name`` is derived from + ``bundle_id`` (the executable path) since the trigger schema + intentionally has no app_name field. + + Mac never hits the fallbacks because ``osascript`` is reliable when + AX permission is granted, but the function is platform-agnostic so + behaviour stays uniform. + """ + out = dict(primary) + + ax_app: dict[str, Any] | None = None + ax_window: dict[str, Any] | None = None + if isinstance(ax_tree, dict): + apps = ax_tree.get("apps") or [] + for app in apps: + if app.get("is_frontmost"): + ax_app = app + break + if ax_app is None and apps: + ax_app = apps[0] + if ax_app is not None: + windows = ax_app.get("windows") or [] + for w in windows: + if w.get("focused"): + ax_window = w + break + if ax_window is None and windows: + ax_window = windows[0] + + if not out.get("app_name"): + if ax_app and ax_app.get("name"): + out["app_name"] = str(ax_app["name"]) + elif trigger and trigger.get("bundle_id"): + # Trigger has no app_name; derive it from the exe path's stem. + out["app_name"] = Path(str(trigger["bundle_id"])).stem + + if not out.get("title"): + if ax_window and ax_window.get("title"): + out["title"] = str(ax_window["title"]) + elif trigger and trigger.get("window_title"): + out["title"] = str(trigger["window_title"]) + + if not out.get("bundle_id"): + if ax_app and ax_app.get("bundle_id"): + out["bundle_id"] = str(ax_app["bundle_id"]) + elif trigger and trigger.get("bundle_id"): + out["bundle_id"] = str(trigger["bundle_id"]) + + return out + + def _build_capture( cfg: CaptureConfig, provider: ax_capture.AXProvider, @@ -49,7 +140,7 @@ def _build_capture( out: dict[str, Any] = { "timestamp": ts, "schema_version": 2, - "trigger": trigger or {"event_type": "heartbeat"}, + "trigger": _public_trigger(trigger), } meta = window_meta.active_window() @@ -60,7 +151,19 @@ def _build_capture( } if provider.available: - result = provider.capture_frontmost(focused_window_only=True) + # On Windows the helper subprocess can't see the desktop, so we + # must hand it the foreground HWND/PID. The watcher includes + # ``hwnd`` in every event it emits — that's the exact window the + # event came from, eliminating any race vs polling. Heartbeat + # captures (no trigger) fall back to GetForegroundWindow inside + # the provider. Mac ignores these hints. + anchor_hwnd = int((trigger or {}).get("hwnd") or 0) + anchor_pid = int((trigger or {}).get("pid") or 0) + result = provider.capture_frontmost( + focused_window_only=True, + anchor_hwnd=anchor_hwnd, + anchor_pid=anchor_pid, + ) if result is not None: out["ax_tree"] = result.raw_json out["ax_metadata"] = result.metadata @@ -79,7 +182,14 @@ def _build_capture( "height": shot.height, } - s1_parser.enrich(out) + # Backfill empty window_meta fields from the AX tree / trigger so a + # GetForegroundWindow hiccup on Windows doesn't surface downstream as + # "Unknown". See _resolve_window_meta for ranking + rationale. + out["window_meta"] = _resolve_window_meta( + out["window_meta"], out.get("ax_tree"), trigger, + ) + + s1_parser.enrich(out, trigger=trigger) return out @@ -301,7 +411,7 @@ async def run_forever( runner = _CaptureRunner(cfg, provider, pre_capture_hook=pre_capture_hook) runner.start_worker() - watcher: AXWatcherProcess | None = None + watcher: EventWatcher | None = None dispatcher: EventDispatcher | None = None def _on_capture(trigger: dict[str, Any] | None) -> None: @@ -310,7 +420,7 @@ def _on_capture(trigger: dict[str, Any] | None) -> None: runner.run_threaded(trigger) if cfg.event_driven: - watcher = AXWatcherProcess() + watcher = create_watcher() if watcher.available: dispatcher = EventDispatcher( _on_capture, diff --git a/src/openchronicle/capture/watcher.py b/src/openchronicle/capture/watcher.py index c1b1cc1..f4d60a7 100644 --- a/src/openchronicle/capture/watcher.py +++ b/src/openchronicle/capture/watcher.py @@ -1,11 +1,12 @@ -"""Long-running AX event watcher subprocess manager. +"""Long-running event watcher — cross-platform. -Wraps the vendored ``mac-ax-watcher`` Swift binary. Reads JSONL events from -stdout and dispatches them through a registered callback. Reconnects on -crash with exponential backoff. +macOS: vendored ``mac-ax-watcher`` Swift binary (subprocess, JSONL on stdout) +Windows: in-process thread using SetWinEventHook + low-level input hooks -Ported from Einsia-Partner's backend/core/memory/watcher.py — path resolution -adapted to OpenChronicle's bundled-resource layout (mirrors ax_capture.py). +Both implementations share the same public interface: + .available, .running, .on_event(callback), .start(), .stop() + +Use ``create_watcher()`` to get the right implementation for the current OS. """ from __future__ import annotations @@ -18,7 +19,7 @@ import threading from collections.abc import Callable from pathlib import Path -from typing import Any +from typing import Any, Protocol from ..logger import get from .ax_capture import _maybe_compile @@ -26,6 +27,22 @@ logger = get("openchronicle.capture") +class EventWatcher(Protocol): + """Common interface implemented by both macOS and Windows watchers.""" + + @property + def available(self) -> bool: ... + + @property + def running(self) -> bool: ... + + def on_event(self, callback: Callable[[dict[str, Any]], None]) -> None: ... + + def start(self) -> None: ... + + def stop(self, *, join_timeout: float = 5.0) -> None: ... + + def _resolve_watcher_path() -> Path | None: """Find or build the mac-ax-watcher binary. @@ -200,3 +217,16 @@ def _read_events(self) -> None: self._stop_event.set() elif rc != 0: logger.warning("AX watcher exited with code %d", rc) + + +def create_watcher() -> EventWatcher: + """Factory: return the appropriate watcher for the current platform.""" + system = platform.system() + if system == "Darwin": + return AXWatcherProcess() + if system == "Windows": + from .win_watcher import WinWatcherThread + + return WinWatcherThread() + logger.warning("No event watcher available for platform: %s", system) + return AXWatcherProcess() # will be unavailable on non-Darwin diff --git a/src/openchronicle/capture/win_meta.py b/src/openchronicle/capture/win_meta.py new file mode 100644 index 0000000..4637141 --- /dev/null +++ b/src/openchronicle/capture/win_meta.py @@ -0,0 +1,112 @@ +"""Windows window metadata via ctypes (user32 / kernel32). + +Provides the same WindowMeta fields as the macOS osascript path: + app_name – executable basename without extension (e.g. "Code") + title – foreground window title text + bundle_id – full executable path (closest Windows analogue to macOS bundle ID) + +NOTE on argtypes/restype: declaring these is mandatory on 64-bit Windows. +Without them, ctypes assumes ``c_int`` (32-bit) for every parameter and +silently truncates HWND / HANDLE pointer values, producing garbage results +or — when a 64-bit pointer is treated as a Python int parameter — an +``OverflowError`` at call time. See the comment in ``win_watcher.py`` for +context. +""" + +from __future__ import annotations + +import ctypes +import ctypes.wintypes as wt +from pathlib import Path + +from ..logger import get + +logger = get("openchronicle.capture") + +user32 = ctypes.windll.user32 # type: ignore[attr-defined] +kernel32 = ctypes.windll.kernel32 # type: ignore[attr-defined] + +PROCESS_QUERY_LIMITED_INFORMATION = 0x1000 + +user32.GetForegroundWindow.argtypes = [] +user32.GetForegroundWindow.restype = wt.HWND + +user32.GetWindowTextLengthW.argtypes = [wt.HWND] +user32.GetWindowTextLengthW.restype = ctypes.c_int + +user32.GetWindowTextW.argtypes = [wt.HWND, wt.LPWSTR, ctypes.c_int] +user32.GetWindowTextW.restype = ctypes.c_int + +user32.GetWindowThreadProcessId.argtypes = [wt.HWND, ctypes.POINTER(wt.DWORD)] +user32.GetWindowThreadProcessId.restype = wt.DWORD + +kernel32.OpenProcess.argtypes = [wt.DWORD, wt.BOOL, wt.DWORD] +kernel32.OpenProcess.restype = wt.HANDLE + +kernel32.CloseHandle.argtypes = [wt.HANDLE] +kernel32.CloseHandle.restype = wt.BOOL + +kernel32.QueryFullProcessImageNameW.argtypes = [ + wt.HANDLE, wt.DWORD, wt.LPWSTR, ctypes.POINTER(wt.DWORD), +] +kernel32.QueryFullProcessImageNameW.restype = wt.BOOL + + +def _get_foreground_window() -> int: + return user32.GetForegroundWindow() or 0 + + +def _get_window_text(hwnd: int) -> str: + if not hwnd: + return "" + length = user32.GetWindowTextLengthW(hwnd) + if length <= 0: + return "" + buf = ctypes.create_unicode_buffer(length + 1) + user32.GetWindowTextW(hwnd, buf, length + 1) + return buf.value + + +def _get_window_pid(hwnd: int) -> int: + if not hwnd: + return 0 + pid = wt.DWORD(0) + user32.GetWindowThreadProcessId(hwnd, ctypes.byref(pid)) + return pid.value + + +def _get_process_exe(pid: int) -> str: + if not pid: + return "" + handle = kernel32.OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, False, pid) + if not handle: + return "" + try: + buf = ctypes.create_unicode_buffer(1024) + size = wt.DWORD(1024) + ok = kernel32.QueryFullProcessImageNameW(handle, 0, buf, ctypes.byref(size)) + return buf.value if ok else "" + finally: + kernel32.CloseHandle(handle) + + +def get_active_window_info() -> dict[str, str]: + """Return foreground window metadata on Windows.""" + try: + hwnd = _get_foreground_window() + if not hwnd: + return {"app_name": "", "title": "", "bundle_id": ""} + + title = _get_window_text(hwnd) + pid = _get_window_pid(hwnd) + exe_path = _get_process_exe(pid) + app_name = Path(exe_path).stem if exe_path else "" + + return { + "app_name": app_name, + "title": title, + "bundle_id": exe_path, + } + except Exception as exc: # noqa: BLE001 + logger.warning("win_meta: failed to get active window info: %s", exc) + return {"app_name": "", "title": "", "bundle_id": ""} diff --git a/src/openchronicle/capture/win_watcher.py b/src/openchronicle/capture/win_watcher.py new file mode 100644 index 0000000..5434705 --- /dev/null +++ b/src/openchronicle/capture/win_watcher.py @@ -0,0 +1,900 @@ +"""Windows event watcher using SetWinEventHook + low-level input hooks. + +Emits the same event_type names used by the macOS AX watcher so the +downstream EventDispatcher works unchanged: + + EVENT_SYSTEM_FOREGROUND → AXApplicationActivated + AXFocusedWindowChanged + EVENT_OBJECT_FOCUS → AXFocusedWindowChanged + EVENT_OBJECT_NAMECHANGE → AXTitleChanged + EVENT_OBJECT_VALUECHANGE → AXValueChanged + WH_MOUSE_LL click → UserMouseClick (with x/y/button + element details) + WH_KEYBOARD_LL key → UserTextInput (debounced, modifier/nav filtered) + +Parity with mac-ax-watcher.swift: + + * 5s typing debounce (matches kTextInputDebounceSeconds), with a 60s + safety cap that force-flushes on continuous typing. + * Ctrl / Win held → treated as shortcut, does NOT reset the debounce + timer (matches mac's Cmd/Ctrl filter; Alt is allowed through for + AltGr / international layouts). + * Navigation keys (arrows, F1–F24, Home/End/PgUp/PgDn, Esc, etc.) are + filtered out so they don't count as "typing". + * Pending UserTextInput is flushed on focus change and on mouse + click, so typed text is attributed to the *outgoing* field rather + than the new one the user just moved to. + * Each emitted event carries a local-tz ISO 8601 ``timestamp``. + * UserMouseClick events carry ``details = {button, x, y, element}``, + matching the mac shape exactly. ``button`` is one of + ``left | right | other`` (mac doesn't distinguish middle clicks — + middle/X-buttons collapse to ``other``). + * UserTextInput events carry ``details = {reason, element}``. + * The ``details.element`` dict is populated best-effort via the + GUI-thread focus query (``GetGUIThreadInfo``) plus a class-name + lookup. Mac's element comes from a synchronous AX hit-test which + Windows can't do from inside a low-level hook callback (the system + disables hooks if the callback exceeds LowLevelHooksTimeout ≈ + 300ms). When we can't resolve a meaningful element we still emit + the field with empty strings so the JSON schema is identical to + mac's; downstream consumers tolerate empty values. + * Window-title and element redaction kicks in when the focused + control's class name looks like a password field (Edit + ES_PASSWORD + style), matching mac's ``isFocusedSecure`` AX-subrole check. +""" + +from __future__ import annotations + +import ctypes +import ctypes.wintypes as wt +import threading +import time +from collections.abc import Callable +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from ..logger import get + +logger = get("openchronicle.capture") + +user32 = ctypes.windll.user32 # type: ignore[attr-defined] +kernel32 = ctypes.windll.kernel32 # type: ignore[attr-defined] + +# ─── WinEvent constants ────────────────────────────────────────────────── +EVENT_SYSTEM_FOREGROUND = 0x0003 +EVENT_OBJECT_FOCUS = 0x8005 +EVENT_OBJECT_NAMECHANGE = 0x800C +EVENT_OBJECT_VALUECHANGE = 0x800E + +WINEVENT_OUTOFCONTEXT = 0x0000 +WINEVENT_SKIPOWNPROCESS = 0x0002 + +OBJID_WINDOW = 0 + +# Low-level input hook constants +WH_MOUSE_LL = 14 +WH_KEYBOARD_LL = 13 + +WM_LBUTTONDOWN = 0x0201 +WM_RBUTTONDOWN = 0x0204 +WM_MBUTTONDOWN = 0x0207 +WM_XBUTTONDOWN = 0x020B + +WM_KEYDOWN = 0x0100 +WM_SYSKEYDOWN = 0x0104 +WM_QUIT = 0x0012 + +PROCESS_QUERY_LIMITED_INFORMATION = 0x1000 + +# ─── Virtual-key codes used for the "is this a typing key?" filter ── +VK_BACK = 0x08 +VK_TAB = 0x09 +VK_RETURN = 0x0D +VK_SHIFT = 0x10 +VK_CONTROL = 0x11 +VK_MENU = 0x12 # Alt +VK_PAUSE = 0x13 +VK_CAPITAL = 0x14 +VK_ESCAPE = 0x1B +VK_SPACE = 0x20 +VK_PRIOR = 0x21 +VK_NEXT = 0x22 +VK_END = 0x23 +VK_HOME = 0x24 +VK_LEFT = 0x25 +VK_UP = 0x26 +VK_RIGHT = 0x27 +VK_DOWN = 0x28 +VK_SELECT = 0x29 +VK_PRINT = 0x2A +VK_EXECUTE = 0x2B +VK_SNAPSHOT = 0x2C +VK_INSERT = 0x2D +VK_DELETE = 0x2E +VK_HELP = 0x2F +VK_LWIN = 0x5B +VK_RWIN = 0x5C +VK_APPS = 0x5D +VK_F1 = 0x70 +VK_F24 = 0x87 +VK_NUMLOCK = 0x90 +VK_SCROLL = 0x91 + +# Held-modifier mask returned by GetAsyncKeyState's high bit. +_HIGH_BIT = 0x8000 + +# ─── ctypes pointer-sized aliases ─────────────────────────────────────── +# LRESULT / LONG_PTR / ULONG_PTR are pointer-sized on 64-bit Windows. +LRESULT = ctypes.c_ssize_t +ULONG_PTR = ctypes.c_size_t + + +class POINT(ctypes.Structure): + _fields_ = [("x", wt.LONG), ("y", wt.LONG)] + + +class MSLLHOOKSTRUCT(ctypes.Structure): + _fields_ = [ + ("pt", POINT), + ("mouseData", wt.DWORD), + ("flags", wt.DWORD), + ("time", wt.DWORD), + ("dwExtraInfo", ULONG_PTR), + ] + + +class KBDLLHOOKSTRUCT(ctypes.Structure): + _fields_ = [ + ("vkCode", wt.DWORD), + ("scanCode", wt.DWORD), + ("flags", wt.DWORD), + ("time", wt.DWORD), + ("dwExtraInfo", ULONG_PTR), + ] + + +# ─── Win32 prototypes (argtypes/restype are mandatory on 64-bit) ───── +# Without these, ctypes assumes c_int for every parameter, silently +# truncating 64-bit handles (HWND, HHOOK, HANDLE) and pointer values. +# That's the root cause of the OverflowError previously seen on +# CallNextHookEx, but the same bug lurks in every other call. +user32.GetForegroundWindow.argtypes = [] +user32.GetForegroundWindow.restype = wt.HWND + +user32.GetWindowTextLengthW.argtypes = [wt.HWND] +user32.GetWindowTextLengthW.restype = ctypes.c_int + +user32.GetWindowTextW.argtypes = [wt.HWND, wt.LPWSTR, ctypes.c_int] +user32.GetWindowTextW.restype = ctypes.c_int + +user32.GetWindowThreadProcessId.argtypes = [wt.HWND, ctypes.POINTER(wt.DWORD)] +user32.GetWindowThreadProcessId.restype = wt.DWORD + +user32.SetWinEventHook.argtypes = [ + wt.DWORD, wt.DWORD, wt.HMODULE, + ctypes.c_void_p, # WINEVENTPROC – ctypes will accept the WINFUNCTYPE callable + wt.DWORD, wt.DWORD, wt.DWORD, +] +user32.SetWinEventHook.restype = wt.HANDLE + +user32.UnhookWinEvent.argtypes = [wt.HANDLE] +user32.UnhookWinEvent.restype = wt.BOOL + +user32.SetWindowsHookExW.argtypes = [ + ctypes.c_int, ctypes.c_void_p, wt.HINSTANCE, wt.DWORD, +] +user32.SetWindowsHookExW.restype = wt.HHOOK + +user32.UnhookWindowsHookEx.argtypes = [wt.HHOOK] +user32.UnhookWindowsHookEx.restype = wt.BOOL + +user32.CallNextHookEx.argtypes = [wt.HHOOK, ctypes.c_int, wt.WPARAM, wt.LPARAM] +user32.CallNextHookEx.restype = LRESULT + +user32.GetMessageW.argtypes = [ + ctypes.POINTER(wt.MSG), wt.HWND, wt.UINT, wt.UINT, +] +user32.GetMessageW.restype = wt.BOOL + +user32.TranslateMessage.argtypes = [ctypes.POINTER(wt.MSG)] +user32.TranslateMessage.restype = wt.BOOL + +user32.DispatchMessageW.argtypes = [ctypes.POINTER(wt.MSG)] +user32.DispatchMessageW.restype = LRESULT + +user32.PostThreadMessageW.argtypes = [wt.DWORD, wt.UINT, wt.WPARAM, wt.LPARAM] +user32.PostThreadMessageW.restype = wt.BOOL + +user32.GetAsyncKeyState.argtypes = [ctypes.c_int] +user32.GetAsyncKeyState.restype = ctypes.c_short + +kernel32.OpenProcess.argtypes = [wt.DWORD, wt.BOOL, wt.DWORD] +kernel32.OpenProcess.restype = wt.HANDLE + +kernel32.CloseHandle.argtypes = [wt.HANDLE] +kernel32.CloseHandle.restype = wt.BOOL + +kernel32.QueryFullProcessImageNameW.argtypes = [ + wt.HANDLE, wt.DWORD, wt.LPWSTR, ctypes.POINTER(wt.DWORD), +] +kernel32.QueryFullProcessImageNameW.restype = wt.BOOL + + +# ─── Focused-element discovery (best-effort, no UIA) ──────────────────── +# These are used to populate ``details.element`` for parity with the +# mac watcher's describeElement(). Pure Win32 — no COM, no UIA — so +# they're cheap enough to call from the message loop after a hook fires. + +class GUITHREADINFO(ctypes.Structure): + _fields_ = [ + ("cbSize", wt.DWORD), + ("flags", wt.DWORD), + ("hwndActive", wt.HWND), + ("hwndFocus", wt.HWND), + ("hwndCapture", wt.HWND), + ("hwndMenuOwner", wt.HWND), + ("hwndMoveSize", wt.HWND), + ("hwndCaret", wt.HWND), + ("rcCaret", wt.RECT), + ] + + +user32.GetGUIThreadInfo.argtypes = [wt.DWORD, ctypes.POINTER(GUITHREADINFO)] +user32.GetGUIThreadInfo.restype = wt.BOOL + +user32.GetClassNameW.argtypes = [wt.HWND, wt.LPWSTR, ctypes.c_int] +user32.GetClassNameW.restype = ctypes.c_int + +# GetWindowLongPtrW only exists as a function on 64-bit Windows; on 32-bit +# Python (rare) it's GetWindowLongW. Use getattr so the import doesn't +# explode in the (currently unsupported) 32-bit case. +_GetWindowLongPtr = getattr(user32, "GetWindowLongPtrW", user32.GetWindowLongW) +_GetWindowLongPtr.argtypes = [wt.HWND, ctypes.c_int] +_GetWindowLongPtr.restype = ctypes.c_ssize_t + +# WindowFromPoint / ScreenToClient — used to identify the element under +# the mouse on click, matching mac's AXUIElementCopyElementAtPosition. +user32.WindowFromPoint.argtypes = [POINT] +user32.WindowFromPoint.restype = wt.HWND + +# Style bits that indicate a password Edit control (ES_PASSWORD = 0x0020). +GWL_STYLE = -16 +ES_PASSWORD = 0x0020 +WS_EX_NOREDIRECTIONBITMAP = 0x00200000 # not used directly, kept for ref + + +# ─── Callback type for SetWinEventHook ───────────────────────────────── +WINEVENTPROC = ctypes.WINFUNCTYPE( + None, + wt.HANDLE, # hWinEventHook + wt.DWORD, # event + wt.HWND, # hwnd + ctypes.c_long, # idObject + ctypes.c_long, # idChild + wt.DWORD, # idEventThread + wt.DWORD, # dwmsEventTime +) + +# Low-level hook callback type +HOOKPROC = ctypes.WINFUNCTYPE( + LRESULT, + ctypes.c_int, + wt.WPARAM, + wt.LPARAM, +) + + +# ─── Tunables (matched to mac-ax-watcher.swift) ───────────────────── +_TEXT_INPUT_DEBOUNCE_SECONDS = 5.0 +_TEXT_INPUT_MAX_CONTINUOUS_SECONDS = 60.0 + + +def _now_iso_local() -> str: + """Local-tz ISO 8601 with milliseconds, matching mac-ax-watcher.""" + return datetime.now(timezone.utc).astimezone().isoformat(timespec="milliseconds") + + +def _is_typing_vk(vk: int) -> bool: + """Return True for keys that produce text content the user is typing. + + Modifiers, navigation keys, function keys, and lock keys are excluded + so they don't reset the debounce timer (mirrors the mac watcher's + private-use-area + modifier-flag filter). + """ + if vk in (VK_SHIFT, VK_CONTROL, VK_MENU, VK_LWIN, VK_RWIN, VK_APPS): + return False + if vk in (VK_CAPITAL, VK_NUMLOCK, VK_SCROLL, VK_PAUSE): + return False + if vk == VK_ESCAPE: + return False + if VK_PRIOR <= vk <= VK_HELP: + # 0x21-0x2F: page up/down, end, home, arrows, select/print/exec, + # snapshot, insert, delete, help. None of these are "typing". + return False + if VK_F1 <= vk <= VK_F24: + return False + return True + + +def _get_window_text(hwnd: int) -> str: + if not hwnd: + return "" + length = user32.GetWindowTextLengthW(hwnd) + if length <= 0: + return "" + buf = ctypes.create_unicode_buffer(length + 1) + user32.GetWindowTextW(hwnd, buf, length + 1) + return buf.value + + +def _get_window_pid(hwnd: int) -> int: + if not hwnd: + return 0 + pid = wt.DWORD(0) + user32.GetWindowThreadProcessId(hwnd, ctypes.byref(pid)) + return pid.value + + +def _get_exe_path(pid: int) -> str: + if not pid: + return "" + handle = kernel32.OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, False, pid) + if not handle: + return "" + try: + buf = ctypes.create_unicode_buffer(1024) + size = wt.DWORD(1024) + ok = kernel32.QueryFullProcessImageNameW(handle, 0, buf, ctypes.byref(size)) + return buf.value if ok else "" + finally: + kernel32.CloseHandle(handle) + + +def _window_context(hwnd: int) -> tuple[int, str, str, str]: + """Resolve (pid, app_name, bundle_id, window_title) for a window. + + ``bundle_id`` is the full executable path — Windows' closest analogue + to a macOS bundle identifier. ``app_name`` is the executable basename + without extension, matching ``window_meta.active_window``. + """ + title = _get_window_text(hwnd) + pid = _get_window_pid(hwnd) + exe = _get_exe_path(pid) + app_name = Path(exe).stem if exe else "" + return pid, app_name, exe, title + + +def _get_class_name(hwnd: int) -> str: + if not hwnd: + return "" + buf = ctypes.create_unicode_buffer(256) + n = user32.GetClassNameW(hwnd, buf, 256) + return buf.value if n > 0 else "" + + +# Map common Win32 window class names to mac AX role strings. The pool is +# tiny on purpose — only common controls have stable, well-known classes. +# Anything else falls back to ``AX`` so the role at least +# carries some signal for debugging without polluting the mac AX +# vocabulary with garbage. +_CLASSNAME_TO_ROLE = { + "Edit": "AXEdit", + "RICHEDIT50W": "AXTextArea", + "RICHEDIT60": "AXTextArea", + "RichEditA": "AXTextArea", + "RichEditW": "AXTextArea", + "Static": "AXStaticText", + "Button": "AXButton", + "ComboBox": "AXComboBox", + "ListBox": "AXList", + "SysListView32": "AXList", + "SysTreeView32": "AXOutline", + "SysHeader32": "AXHeading", + "SysTabControl32": "AXTabGroup", + "msctls_progress32": "AXProgressIndicator", + "msctls_trackbar32": "AXSlider", + "msctls_updown32": "AXIncrementor", + "ToolbarWindow32": "AXToolbar", + "ScrollBar": "AXScrollBar", + "#32768": "AXMenu", + "#32770": "AXGroup", +} + + +def _classname_to_role(class_name: str) -> str: + if not class_name: + return "" + if class_name in _CLASSNAME_TO_ROLE: + return _CLASSNAME_TO_ROLE[class_name] + return f"AX{class_name}" + + +def _is_secure_hwnd(hwnd: int) -> bool: + """True if ``hwnd`` is a password edit control (Edit + ES_PASSWORD).""" + if not hwnd: + return False + if _get_class_name(hwnd) != "Edit": + return False + try: + style = _GetWindowLongPtr(hwnd, GWL_STYLE) + except OSError: + return False + return bool(style & ES_PASSWORD) + + +def _focused_hwnd_for_window(hwnd: int) -> int: + """Return the GUI focus HWND owned by the same thread as ``hwnd``. + + We pull the *thread*'s focus rather than ``GetFocus`` (which only + returns a focus owned by the calling thread). This lines up with + mac's ``AXUIElementCopyAttributeValue(app, kAXFocusedUIElementAttribute)``. + """ + if not hwnd: + return 0 + pid = wt.DWORD(0) + tid = user32.GetWindowThreadProcessId(hwnd, ctypes.byref(pid)) + if not tid: + return 0 + info = GUITHREADINFO() + info.cbSize = ctypes.sizeof(GUITHREADINFO) + if not user32.GetGUIThreadInfo(tid, ctypes.byref(info)): + return 0 + return int(info.hwndFocus or 0) + + +def _empty_element() -> dict[str, str]: + """Schema-stable empty element matching mac describeElement().""" + return { + "role": "", + "subrole": "", + "title": "", + "identifier": "", + "value": "", + } + + +def _describe_hwnd(hwnd: int) -> dict[str, str]: + """Describe a HWND as a mac-watcher-style element dict. + + Mirrors describeElement() in mac-ax-watcher.swift. Subrole / identifier + don't have direct Win32 equivalents — we leave them empty rather than + fabricate values, so consumers can tell "missing on Windows" apart + from "this control simply has none". + """ + if not hwnd: + return _empty_element() + class_name = _get_class_name(hwnd) + is_secure = _is_secure_hwnd(hwnd) + title = _get_window_text(hwnd) + if len(title) > 200: + title = title[:200] + "…" + return { + "role": _classname_to_role(class_name), + "subrole": "AXSecureTextField" if is_secure else "", + "title": title, + "identifier": "", + "value": "[REDACTED]" if is_secure else "", + } + + +def _is_focused_secure(window_hwnd: int) -> bool: + """True if the focused control inside the foreground window is a secure + edit. Matches mac-ax-watcher's ``isFocusedSecure``.""" + return _is_secure_hwnd(_focused_hwnd_for_window(window_hwnd)) + + +def _build_event( + event_type: str, + hwnd: int, + *, + details: dict[str, Any] | None = None, +) -> dict[str, Any]: + """Build an event dict matching mac-ax-watcher's JSONL shape. + + The ``hwnd`` field is Windows-specific: it carries the foreground + HWND observed at the moment the hook fired. Downstream code uses it + to anchor the win-uia-helper subprocess (which is session-isolated + and can't see the desktop on its own). It has no mac counterpart — + on mac the helper uses NSWorkspace.frontmostApplication, which is + always available — so consumers of the cross-platform schema simply + ignore the field outside of Windows. + """ + pid, app_name, bundle_id, title = _window_context(hwnd) + if _is_focused_secure(hwnd): + title = "[REDACTED]" + event: dict[str, Any] = { + "event_type": event_type, + "pid": pid, + "app_name": app_name, + "bundle_id": bundle_id, + "window_title": title, + "timestamp": _now_iso_local(), + "hwnd": int(hwnd) if hwnd else 0, + } + if details is not None: + event["details"] = details + return event + + +def _ctrl_or_win_held() -> bool: + """True if any Ctrl or Win key is currently down (shortcut indicator).""" + return bool( + user32.GetAsyncKeyState(VK_CONTROL) & _HIGH_BIT + or user32.GetAsyncKeyState(VK_LWIN) & _HIGH_BIT + or user32.GetAsyncKeyState(VK_RWIN) & _HIGH_BIT + ) + + +class _TextInputAggregator: + """Debounces raw keystrokes into a single ``UserTextInput`` event. + + Mirrors ``InteractionTapper`` from mac-ax-watcher.swift: every typing + keyDown resets a 5s debounce timer; the first keystroke of a burst + captures the foreground window so a flush triggered by a focus + change still attributes the text to the *outgoing* field. A 60s + safety cap force-flushes during very long uninterrupted typing. + + A pending burst is flushed on focus change, mouse click, or + watcher shutdown — ensuring the order of events downstream is + "typed A, then clicked B" rather than "clicked B, typed A". + """ + + def __init__( + self, + emit: Callable[[dict[str, Any]], None], + ) -> None: + self._emit = emit + self._lock = threading.Lock() + self._timer: threading.Timer | None = None + self._typing_started_at: float | None = None + self._typing_hwnd: int = 0 + + def on_keystroke(self) -> None: + """Record one keystroke. Resets the debounce timer.""" + force_flush = False + with self._lock: + if self._typing_started_at is None: + self._typing_started_at = time.monotonic() + # Capture the target window on the first keystroke of a + # burst. We deliberately don't re-capture mid-burst — a + # transient focus blip shouldn't reattribute the text. + self._typing_hwnd = user32.GetForegroundWindow() or 0 + else: + elapsed = time.monotonic() - self._typing_started_at + if elapsed >= _TEXT_INPUT_MAX_CONTINUOUS_SECONDS: + force_flush = True + + if not force_flush: + self._cancel_timer_locked() + t = threading.Timer( + _TEXT_INPUT_DEBOUNCE_SECONDS, + lambda: self.flush("debounce"), + ) + t.daemon = True + self._timer = t + t.start() + + if force_flush: + self.flush("max_duration") + + def flush(self, reason: str) -> None: + """Emit a pending UserTextInput. Idempotent if nothing buffered.""" + with self._lock: + if self._typing_started_at is None: + return + hwnd = self._typing_hwnd + self._typing_started_at = None + self._typing_hwnd = 0 + self._cancel_timer_locked() + + # Resolve the focused HWND of the burst-target window. This is + # the closest Win32 analogue to mac's ``focusedElement`` AX call. + # When the control is a password edit, _describe_hwnd returns + # role=AXEdit + subrole=AXSecureTextField + value=[REDACTED] + # so the schema matches mac's secure-field redaction exactly. + focus_hwnd = _focused_hwnd_for_window(hwnd) or hwnd + element = _describe_hwnd(focus_hwnd) + details = {"reason": reason, "element": element} + try: + self._emit(_build_event("UserTextInput", hwnd, details=details)) + except Exception as exc: # noqa: BLE001 + logger.warning("UserTextInput emit failed: %s", exc) + + def shutdown(self) -> None: + with self._lock: + self._cancel_timer_locked() + self._typing_started_at = None + self._typing_hwnd = 0 + + def _cancel_timer_locked(self) -> None: + if self._timer is not None: + self._timer.cancel() + self._timer = None + + +class WinWatcherThread: + """Windows event watcher running in a dedicated message-loop thread. + + Mirrors the AXWatcherProcess interface (``available``, ``running``, + ``on_event``, ``start``, ``stop``) so the scheduler can use either + interchangeably. + """ + + def __init__(self) -> None: + self._callback: Callable[[dict[str, Any]], None] | None = None + self._thread: threading.Thread | None = None + self._stop_event = threading.Event() + self._hooks: list[int] = [] + self._ll_hooks: list[int] = [] + + # Must hold strong refs to the C callback objects so they're not + # garbage-collected while a Win32 hook still has their pointer. + self._winevent_proc: WINEVENTPROC | None = None + self._mouse_proc: HOOKPROC | None = None + self._keyboard_proc: HOOKPROC | None = None + + self._text_input = _TextInputAggregator(self._dispatch_safe) + + @property + def available(self) -> bool: + return True + + @property + def running(self) -> bool: + return self._thread is not None and self._thread.is_alive() + + def on_event(self, callback: Callable[[dict[str, Any]], None]) -> None: + self._callback = callback + + def start(self) -> None: + if self.running: + return + self._stop_event.clear() + self._thread = threading.Thread( + target=self._run_loop, daemon=True, name="win-watcher", + ) + self._thread.start() + logger.info("Windows event watcher started") + + def stop(self, *, join_timeout: float = 5.0) -> None: + self._stop_event.set() + # Flush any pending typing before tearing down so no UserTextInput + # is silently dropped on shutdown. + self._text_input.flush("shutdown") + + if self._thread and self._thread.is_alive(): + tid = self._thread.ident + if tid: + # Posting WM_QUIT unblocks GetMessageW; without it the + # message loop sits forever and join() times out. + user32.PostThreadMessageW(wt.DWORD(tid), WM_QUIT, 0, 0) + self._thread.join(timeout=join_timeout) + if self._thread.is_alive(): + logger.warning( + "Windows watcher thread did not exit within %.1fs", + join_timeout, + ) + self._thread = None + logger.info("Windows event watcher stopped") + + # ─── Internal: message loop & dispatch ──────────────────────────── + + def _dispatch_safe(self, event: dict[str, Any]) -> None: + cb = self._callback + if cb is None: + return + try: + cb(event) + except Exception as exc: # noqa: BLE001 + logger.warning("watcher event callback error: %s", exc) + + def _emit_internal(self, event_type: str) -> None: + """Log a watcher-internal status event. + + Mirrors mac-ax-watcher's ``_*``-prefixed events: useful for + diagnostics, but never forwarded to the dispatcher (which only + understands semantic AX/Interaction events). + """ + logger.debug("Watcher internal event: %s", event_type) + + def _run_loop(self) -> None: + try: + self._install_hooks() + self._emit_internal("_watcher_started") + + msg = wt.MSG() + while not self._stop_event.is_set(): + # Pump messages. WM_QUIT (posted by stop()) makes + # GetMessageW return 0; -1 indicates an error. + result = user32.GetMessageW(ctypes.byref(msg), None, 0, 0) + if result == 0 or result == -1: + break + user32.TranslateMessage(ctypes.byref(msg)) + user32.DispatchMessageW(ctypes.byref(msg)) + except Exception as exc: # noqa: BLE001 + logger.error("Windows watcher loop error: %s", exc) + finally: + self._text_input.shutdown() + self._remove_hooks() + self._emit_internal("_watcher_stopped") + + # ─── Hook install / remove ──────────────────────────────────────── + + def _install_hooks(self) -> None: + # WinEvent hooks for window / focus events. ``OUTOFCONTEXT`` + # delivers events on this thread via the message loop. + self._winevent_proc = WINEVENTPROC(self._winevent_callback) + winevent_proc_ptr = ctypes.cast(self._winevent_proc, ctypes.c_void_p) + events = ( + EVENT_SYSTEM_FOREGROUND, + EVENT_OBJECT_FOCUS, + EVENT_OBJECT_NAMECHANGE, + EVENT_OBJECT_VALUECHANGE, + ) + for ev in events: + hook = user32.SetWinEventHook( + ev, ev, + None, + winevent_proc_ptr, + 0, 0, + WINEVENT_OUTOFCONTEXT | WINEVENT_SKIPOWNPROCESS, + ) + if hook: + self._hooks.append(hook) + else: + logger.warning("SetWinEventHook failed for event 0x%04X", ev) + + # Low-level mouse hook for click detection. + self._mouse_proc = HOOKPROC(self._mouse_callback) + mhook = user32.SetWindowsHookExW( + WH_MOUSE_LL, + ctypes.cast(self._mouse_proc, ctypes.c_void_p), + None, 0, + ) + if mhook: + self._ll_hooks.append(mhook) + else: + logger.warning("SetWindowsHookExW(WH_MOUSE_LL) failed") + + # Low-level keyboard hook for typing detection. + self._keyboard_proc = HOOKPROC(self._keyboard_callback) + khook = user32.SetWindowsHookExW( + WH_KEYBOARD_LL, + ctypes.cast(self._keyboard_proc, ctypes.c_void_p), + None, 0, + ) + if khook: + self._ll_hooks.append(khook) + else: + logger.warning("SetWindowsHookExW(WH_KEYBOARD_LL) failed") + + def _remove_hooks(self) -> None: + for hook in self._hooks: + try: + user32.UnhookWinEvent(hook) + except OSError as exc: + logger.debug("UnhookWinEvent error: %s", exc) + self._hooks.clear() + for hook in self._ll_hooks: + try: + user32.UnhookWindowsHookEx(hook) + except OSError as exc: + logger.debug("UnhookWindowsHookEx error: %s", exc) + self._ll_hooks.clear() + # Drop the C callbacks last so they outlive the hooks they're + # attached to. (Order matters: an in-flight callback that fires + # between Unhook and refcount-drop would otherwise crash.) + self._winevent_proc = None + self._mouse_proc = None + self._keyboard_proc = None + + # ─── Callbacks ──────────────────────────────────────────────────── + + def _winevent_callback( + self, + hook: int, event: int, hwnd: int, + id_object: int, id_child: int, + thread_id: int, event_time: int, + ) -> None: + if id_object != OBJID_WINDOW: + return + + # Mac flushes any pending UserTextInput on focus / app + # activation so typed text is attributed to the *outgoing* + # field. We do the same here. + if event in (EVENT_SYSTEM_FOREGROUND, EVENT_OBJECT_FOCUS): + self._text_input.flush("focus_change") + + if event == EVENT_SYSTEM_FOREGROUND: + self._dispatch_safe(_build_event("AXApplicationActivated", hwnd)) + self._dispatch_safe(_build_event("AXFocusedWindowChanged", hwnd)) + elif event == EVENT_OBJECT_FOCUS: + self._dispatch_safe(_build_event("AXFocusedWindowChanged", hwnd)) + elif event == EVENT_OBJECT_NAMECHANGE: + self._dispatch_safe(_build_event("AXTitleChanged", hwnd)) + elif event == EVENT_OBJECT_VALUECHANGE: + self._dispatch_safe(_build_event("AXValueChanged", hwnd)) + + def _mouse_callback(self, ncode: int, wparam: int, lparam: int) -> int: + # Always chain to the next hook, no matter what — failing to do + # so makes mouse input feel laggy / dropped system-wide. + try: + if ncode >= 0 and wparam in ( + WM_LBUTTONDOWN, WM_RBUTTONDOWN, + WM_MBUTTONDOWN, WM_XBUTTONDOWN, + ): + self._handle_mouse_down(wparam, lparam) + except Exception as exc: # noqa: BLE001 + logger.debug("mouse callback error: %s", exc) + return user32.CallNextHookEx(None, ncode, wparam, lparam) + + def _handle_mouse_down(self, wparam: int, lparam: int) -> None: + # Flush pending typing first so order is "typed, then clicked" + # (matches mac-ax-watcher's flushText(reason: "mouse_click")). + self._text_input.flush("mouse_click") + + # mac collapses middle/X-button into "other" via .otherMouseDown. + # Match that exactly so downstream code doesn't need per-OS branches. + button = { + WM_LBUTTONDOWN: "left", + WM_RBUTTONDOWN: "right", + WM_MBUTTONDOWN: "other", + WM_XBUTTONDOWN: "other", + }.get(wparam, "other") + + try: + data = ctypes.cast( + ctypes.c_void_p(lparam), + ctypes.POINTER(MSLLHOOKSTRUCT), + ).contents + x, y = data.pt.x, data.pt.y + except (ValueError, OSError): + x = y = 0 + + # Resolve the HWND under the cursor; that's the click target for + # the mac equivalent (AXUIElementCopyElementAtPosition). Fall back + # to the foreground window when WindowFromPoint can't resolve. + try: + click_hwnd = user32.WindowFromPoint(POINT(x, y)) or 0 + except (ValueError, OSError): + click_hwnd = 0 + hwnd = user32.GetForegroundWindow() or 0 + target_hwnd = click_hwnd or hwnd + element = _describe_hwnd(target_hwnd) + details: dict[str, Any] = { + "button": button, + "x": x, + "y": y, + "element": element, + } + self._dispatch_safe( + _build_event("UserMouseClick", hwnd, details=details) + ) + + def _keyboard_callback(self, ncode: int, wparam: int, lparam: int) -> int: + try: + if ncode >= 0 and wparam in (WM_KEYDOWN, WM_SYSKEYDOWN): + self._handle_key_down(lparam) + except Exception as exc: # noqa: BLE001 + logger.debug("keyboard callback error: %s", exc) + return user32.CallNextHookEx(None, ncode, wparam, lparam) + + def _handle_key_down(self, lparam: int) -> None: + try: + data = ctypes.cast( + ctypes.c_void_p(lparam), + ctypes.POINTER(KBDLLHOOKSTRUCT), + ).contents + vk = data.vkCode + except (ValueError, OSError): + return + + # Ctrl / Cmd-equivalent (Win) held = shortcut, not typing. + # Alt is allowed through because AltGr (right Alt) on + # international layouts produces real characters. + if _ctrl_or_win_held(): + return + + if not _is_typing_vk(vk): + return + + self._text_input.on_keystroke() diff --git a/src/openchronicle/capture/window_meta.py b/src/openchronicle/capture/window_meta.py index 96e1faa..8effc9d 100644 --- a/src/openchronicle/capture/window_meta.py +++ b/src/openchronicle/capture/window_meta.py @@ -1,6 +1,7 @@ -"""Foreground app / window metadata via osascript. macOS only in v1. +"""Foreground app / window metadata — cross-platform. -Extracted from Einsia-Partner's capture_service.get_active_window_macos(). +macOS: osascript (AppleScript via System Events) +Windows: ctypes (user32 / kernel32) """ from __future__ import annotations @@ -13,7 +14,7 @@ logger = get("openchronicle.capture") -_SCRIPT = """ +_MACOS_SCRIPT = """ tell application "System Events" set frontProc to first application process whose frontmost is true set appName to name of frontProc @@ -39,12 +40,10 @@ class WindowMeta: bundle_id: str = "" -def active_window() -> WindowMeta: - if platform.system() != "Darwin": - return WindowMeta() +def _active_window_darwin() -> WindowMeta: try: proc = subprocess.run( - ["osascript", "-e", _SCRIPT], capture_output=True, text=True, timeout=5 + ["osascript", "-e", _MACOS_SCRIPT], capture_output=True, text=True, timeout=5 ) except (FileNotFoundError, subprocess.TimeoutExpired) as exc: logger.warning("osascript failed: %s", exc) @@ -60,3 +59,29 @@ def active_window() -> WindowMeta: title=parts[1] if len(parts) > 1 else "", bundle_id=parts[2] if len(parts) > 2 else "", ) + + +def _active_window_windows() -> WindowMeta: + try: + from .win_meta import get_active_window_info + + info = get_active_window_info() + return WindowMeta( + app_name=info.get("app_name", ""), + title=info.get("title", ""), + bundle_id=info.get("bundle_id", ""), + ) + except Exception as exc: # noqa: BLE001 + logger.warning("win_meta failed: %s", exc) + return WindowMeta() + + +_SYSTEM = platform.system() + + +def active_window() -> WindowMeta: + if _SYSTEM == "Darwin": + return _active_window_darwin() + if _SYSTEM == "Windows": + return _active_window_windows() + return WindowMeta() diff --git a/src/openchronicle/cli.py b/src/openchronicle/cli.py index 6e7f880..3751e00 100644 --- a/src/openchronicle/cli.py +++ b/src/openchronicle/cli.py @@ -5,9 +5,11 @@ import contextlib import json import os +import platform import shutil import signal import subprocess +import sys from datetime import datetime from pathlib import Path @@ -21,6 +23,8 @@ from .store import entries as entries_mod from .store import fts, index_md +_IS_WINDOWS = platform.system() == "Windows" + app = typer.Typer( add_completion=False, no_args_is_help=True, @@ -39,6 +43,16 @@ def _init() -> config_mod.Config: def _is_pid_alive(pid: int) -> bool: + if _IS_WINDOWS: + import ctypes + + kernel32 = ctypes.windll.kernel32 # type: ignore[attr-defined] + PROCESS_QUERY_LIMITED_INFORMATION = 0x1000 + handle = kernel32.OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, False, pid) + if not handle: + return False + kernel32.CloseHandle(handle) + return True try: os.kill(pid, 0) except ProcessLookupError: @@ -77,7 +91,39 @@ def start( daemon.run(cfg, capture_only=capture_only) return - # Background: double-fork + if _IS_WINDOWS: + _start_background_windows(capture_only=capture_only) + else: + _start_background_unix(cfg, capture_only=capture_only) + + +def _start_background_windows(*, capture_only: bool) -> None: + """Launch the daemon as a detached subprocess on Windows.""" + import subprocess + + CREATE_NO_WINDOW = 0x08000000 + DETACHED_PROCESS = 0x00000008 + python_exe = sys.executable + cmd = [python_exe, "-m", "openchronicle.daemon"] + if capture_only: + cmd.append("--capture-only") + + proc = subprocess.Popen( + cmd, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + creationflags=CREATE_NO_WINDOW | DETACHED_PROCESS, + close_fds=True, + ) + console.print(f"[green]OpenChronicle started in background (pid {proc.pid}).[/green]") + console.print(f"Logs: {paths.logs_dir()}") + + +def _start_background_unix(cfg: "config_mod.Config", *, capture_only: bool) -> None: + """Launch the daemon via double-fork on Unix/macOS.""" + from . import daemon + if os.fork() != 0: console.print("[green]OpenChronicle started in background.[/green]") console.print(f"Logs: {paths.logs_dir()}") @@ -85,8 +131,6 @@ def start( os.setsid() if os.fork() != 0: os._exit(0) - # Redirect stdio to /dev/null. After dup2 the original fd is no longer - # needed; closing it avoids leaking one descriptor per daemon start. devnull = os.open(os.devnull, os.O_RDWR) for fd in (0, 1, 2): os.dup2(devnull, fd) @@ -104,8 +148,34 @@ def stop() -> None: if not pid: console.print("[yellow]Daemon not running.[/yellow]") raise typer.Exit(1) - os.kill(pid, signal.SIGTERM) - console.print(f"[green]Sent SIGTERM to pid {pid}.[/green]") + if _IS_WINDOWS: + _stop_windows(pid) + else: + os.kill(pid, signal.SIGTERM) + console.print(f"[green]Sent SIGTERM to pid {pid}.[/green]") + + +def _stop_windows(pid: int) -> None: + """Terminate a daemon process on Windows.""" + import ctypes + + kernel32 = ctypes.windll.kernel32 # type: ignore[attr-defined] + PROCESS_TERMINATE = 0x0001 + handle = kernel32.OpenProcess(PROCESS_TERMINATE, False, pid) + if not handle: + console.print(f"[red]Could not open process {pid}.[/red]") + raise typer.Exit(1) + try: + ok = kernel32.TerminateProcess(handle, 1) + if ok: + console.print(f"[green]Terminated process {pid}.[/green]") + else: + console.print(f"[red]Failed to terminate process {pid}.[/red]") + raise typer.Exit(1) + finally: + kernel32.CloseHandle(handle) + with contextlib.suppress(FileNotFoundError): + paths.pid_file().unlink() @app.command() @@ -255,6 +325,11 @@ def install_claude_code( def _claude_desktop_config_path() -> Path: + if _IS_WINDOWS: + appdata = os.environ.get("APPDATA", "") + if appdata: + return Path(appdata) / "Claude" / "claude_desktop_config.json" + return Path.home() / "AppData" / "Roaming" / "Claude" / "claude_desktop_config.json" return ( Path.home() / "Library" @@ -282,8 +357,9 @@ def _load_claude_desktop_config(path: Path) -> dict: def _restart_reminder(action: str) -> None: + quit_hint = "Ctrl+Q or right-click tray → Quit" if _IS_WINDOWS else "Cmd+Q" console.print( - f"[yellow]Claude Desktop must be fully quit (Cmd+Q) and reopened to {action}.[/yellow]" + f"[yellow]Claude Desktop must be fully quit ({quit_hint}) and reopened to {action}.[/yellow]" ) console.print( "[dim]The app only reads claude_desktop_config.json at launch. You won't need to " diff --git a/src/openchronicle/config.py b/src/openchronicle/config.py index 21794c8..d1b5cda 100644 --- a/src/openchronicle/config.py +++ b/src/openchronicle/config.py @@ -23,7 +23,7 @@ class ModelConfig: @dataclass class CaptureConfig: # Event-driven capture knobs - event_driven: bool = True # consume mac-ax-watcher events + event_driven: bool = True # consume AX/UIA watcher events (cross-platform) heartbeat_minutes: int = 10 # periodic capture even without events debounce_seconds: float = 3.0 # for AXValueChanged bursts min_capture_gap_seconds: float = 2.0 # between consecutive captures @@ -228,7 +228,7 @@ def load(path: Path | None = None) -> Config: # Accuracy-sensitive — pick a capable model. [capture] -event_driven = true # capture on window/app/typing events via mac-ax-watcher +event_driven = true # capture on window/app/typing events (macOS: AX watcher, Windows: SetWinEventHook) heartbeat_minutes = 10 # periodic capture even when nothing happens debounce_seconds = 3.0 # for AXValueChanged bursts min_capture_gap_seconds = 2.0 # minimum gap between consecutive captures @@ -240,7 +240,7 @@ def load(path: Path | None = None) -> Config: include_screenshot = true screenshot_max_width = 1920 screenshot_jpeg_quality = 80 -ax_depth = 100 # Electron apps (Claude Desktop, VS Code, Slack) have deep DOM; 8 only reaches the chrome +ax_depth = 100 # Electron apps have deep DOM; 8 only reaches the chrome (macOS: AX tree, Windows: UIA tree) ax_timeout_seconds = 3 [timeline] diff --git a/src/openchronicle/daemon.py b/src/openchronicle/daemon.py index de0af57..c5a0950 100644 --- a/src/openchronicle/daemon.py +++ b/src/openchronicle/daemon.py @@ -4,13 +4,18 @@ (wired in ``session/tick.py``) spawns the S2 reducer on a daemon thread, and the reducer's success callback kicks the classifier. No periodic writer loop is needed — each session produces exactly one reducer + classifier pass. + +Can also be invoked directly via ``python -m openchronicle.daemon`` (used by +the Windows background launcher in cli.py). """ from __future__ import annotations import asyncio import os +import platform import signal +import sys from contextlib import suppress from . import paths @@ -22,6 +27,8 @@ logger = get("openchronicle.daemon") +_IS_WINDOWS = platform.system() == "Windows" + async def _mcp_loop(cfg: Config) -> None: """Host the MCP server inside the daemon. On crash, back off and restart.""" @@ -95,9 +102,14 @@ def _handle_stop() -> None: stop.set() loop = asyncio.get_running_loop() - for sig in (signal.SIGTERM, signal.SIGINT): - with suppress(NotImplementedError): - loop.add_signal_handler(sig, _handle_stop) + if _IS_WINDOWS: + # Windows does not support add_signal_handler; use signal.signal instead + signal.signal(signal.SIGINT, lambda *_: loop.call_soon_threadsafe(_handle_stop)) + signal.signal(signal.SIGTERM, lambda *_: loop.call_soon_threadsafe(_handle_stop)) + else: + for sig in (signal.SIGTERM, signal.SIGINT): + with suppress(NotImplementedError): + loop.add_signal_handler(sig, _handle_stop) done_task = asyncio.create_task(stop.wait()) await asyncio.wait( @@ -123,3 +135,15 @@ def _handle_stop() -> None: def run(cfg: Config, *, capture_only: bool = False) -> None: asyncio.run(_run(cfg, capture_only=capture_only)) + + +if __name__ == "__main__": + from . import config as config_mod + from . import logger as logger_mod + + paths.ensure_dirs() + config_mod.write_default_if_missing() + logger_mod.setup(console=False) + cfg = config_mod.load() + capture_only = "--capture-only" in sys.argv + run(cfg, capture_only=capture_only) diff --git a/src/openchronicle/writer/llm.py b/src/openchronicle/writer/llm.py index 6e4317b..3aa245d 100644 --- a/src/openchronicle/writer/llm.py +++ b/src/openchronicle/writer/llm.py @@ -5,12 +5,41 @@ import json import os from typing import Any +from urllib.parse import urlparse from ..config import Config, resolve_api_key from ..logger import get logger = get("openchronicle.writer") +_LOCAL_OLLAMA_NO_PROXY = ("localhost", "127.0.0.1", "::1") + + +def _ensure_local_ollama_proxy_bypass(model: str, api_base: str) -> None: + """Ensure local Ollama calls don't get routed through corporate proxies. + + The daemon inherits environment variables only at process start. If the + user sets NO_PROXY later, an already-running daemon still lacks it and + litellm/httpx can send http://localhost:11434 to HTTP_PROXY, producing + proxy HTML like "403 Forbidden: incorrect proxy service was requested". + This makes the local-Ollama invariant explicit inside the process. + """ + if not model.startswith("ollama/"): + return + + host = urlparse(api_base).hostname if api_base else "localhost" + if host not in _LOCAL_OLLAMA_NO_PROXY: + return + + for key in ("NO_PROXY", "no_proxy"): + existing = os.environ.get(key, "") + parts = [p.strip() for p in existing.split(",") if p.strip()] + lower_parts = {p.lower() for p in parts} + for token in _LOCAL_OLLAMA_NO_PROXY: + if token.lower() not in lower_parts: + parts.append(token) + os.environ[key] = ",".join(parts) + def call_llm( cfg: Config, @@ -30,6 +59,7 @@ def call_llm( import litellm # imported lazily to keep CLI startup fast model_cfg = cfg.model_for(stage) + _ensure_local_ollama_proxy_bypass(model_cfg.model, model_cfg.base_url) kwargs: dict[str, Any] = { "model": model_cfg.model, "messages": messages, diff --git a/tests/test_s1_parser.py b/tests/test_s1_parser.py deleted file mode 100644 index 8772644..0000000 --- a/tests/test_s1_parser.py +++ /dev/null @@ -1,206 +0,0 @@ -from __future__ import annotations - -from openchronicle.capture import s1_parser - - -def _ax_tree(*apps: dict) -> dict: - return {"apps": list(apps), "timestamp": "2026-04-21T10:00:00+08:00"} - - -def test_enrich_noop_without_ax_tree() -> None: - capture = {"timestamp": "x", "window_meta": {"app_name": "A"}} - s1_parser.enrich(capture) - assert "focused_element" not in capture - assert "visible_text" not in capture - - -def test_enrich_picks_frontmost_app() -> None: - capture = { - "ax_tree": _ax_tree( - {"name": "Background", "bundle_id": "b", "is_frontmost": False, "windows": []}, - { - "name": "Cursor", - "bundle_id": "com.todesktop.230313mzl4w4u92", - "is_frontmost": True, - "windows": [ - { - "title": "s1_parser.py", - "focused": True, - "elements": [ - { - "role": "AXTextArea", - "title": "editor", - "value": "def enrich(capture):\n ...", - } - ], - } - ], - }, - ) - } - s1_parser.enrich(capture) - assert capture["focused_element"]["role"] == "AXTextArea" - assert capture["focused_element"]["is_editable"] is True - assert capture["focused_element"]["has_value"] is True - assert capture["focused_element"]["value_length"] > 0 - assert "s1_parser.py" in capture["visible_text"] - assert capture["url"] is None - - -def test_enrich_extracts_browser_url() -> None: - capture = { - "ax_tree": _ax_tree( - { - "name": "Chrome", - "bundle_id": "com.google.Chrome", - "is_frontmost": True, - "windows": [ - { - "title": "Anthropic", - "focused": True, - "elements": [ - { - "role": "AXTextField", - "title": "Address and search bar", - "value": "https://www.anthropic.com/news", - } - ], - } - ], - } - ) - } - s1_parser.enrich(capture) - assert capture["url"] == "https://www.anthropic.com/news" - assert capture["focused_element"]["role"] == "AXTextField" - - -def test_enrich_prefixes_bare_url() -> None: - capture = { - "ax_tree": _ax_tree( - { - "name": "Safari", - "bundle_id": "com.apple.Safari", - "is_frontmost": True, - "windows": [ - { - "title": "", - "focused": True, - "elements": [ - { - "role": "AXTextField", - "value": "anthropic.com", - } - ], - } - ], - } - ) - } - s1_parser.enrich(capture) - assert capture["url"] == "https://anthropic.com" - - -def test_enrich_non_browser_has_no_url() -> None: - capture = { - "ax_tree": _ax_tree( - { - "name": "Cursor", - "bundle_id": "com.todesktop.230313mzl4w4u92", - "is_frontmost": True, - "windows": [ - { - "title": "file.py", - "focused": True, - "elements": [ - { - "role": "AXTextField", - "value": "https://example.com", - } - ], - } - ], - } - ) - } - s1_parser.enrich(capture) - assert capture["url"] is None - - -def test_enrich_visible_text_truncation() -> None: - huge_value = "x" * 20_000 - capture = { - "ax_tree": _ax_tree( - { - "name": "App", - "bundle_id": "b", - "is_frontmost": True, - "windows": [ - { - "title": "T", - "focused": True, - "elements": [ - {"role": "AXStaticText", "title": "header", "value": huge_value} - ], - } - ], - } - ) - } - s1_parser.enrich(capture) - assert len(capture["visible_text"]) <= 10_000 + len("\n...(truncated)") - assert capture["visible_text"].endswith("(truncated)") - - -def test_enrich_no_focused_window_returns_empty_element() -> None: - capture = { - "ax_tree": _ax_tree( - { - "name": "App", - "bundle_id": "b", - "is_frontmost": True, - "windows": [ - { - "title": "unfocused", - "focused": False, - "elements": [ - {"role": "AXTextField", "value": "something"} - ], - } - ], - } - ) - } - s1_parser.enrich(capture) - fe = capture["focused_element"] - assert fe["role"] == "" - assert fe["value"] == "" - assert fe["is_editable"] is False - - -def test_enrich_empty_ax_tree() -> None: - capture = {"ax_tree": {"apps": []}} - s1_parser.enrich(capture) - assert capture["focused_element"]["role"] == "" - assert capture["visible_text"] == "" - assert capture["url"] is None - - -def test_enrich_falls_back_to_first_app_when_no_frontmost() -> None: - capture = { - "ax_tree": _ax_tree( - { - "name": "OnlyApp", - "bundle_id": "b", - "windows": [ - { - "title": "T", - "focused": True, - "elements": [{"role": "AXStaticText", "value": "hello"}], - } - ], - } - ) - } - s1_parser.enrich(capture) - assert "hello" in capture["visible_text"]