From 971de2b352ccaab16c16729f2954132618af1225 Mon Sep 17 00:00:00 2001
From: Harold Ship <harold@il.ibm.com>
Date: Sun, 10 May 2026 12:08:35 +0300
Subject: [PATCH 1/5] fix: pin shell scripts to LF line endings via
 .gitattributes

Prevents Windows clones (with core.autocrlf=true) from checking out
*.sh files with CRLF, which breaks bash under WSL.

Closes #74

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .gitattributes | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 .gitattributes

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..f80f339
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,4 @@
+# Force LF line endings for shell scripts so they run correctly under
+# bash/WSL even when the repo is cloned on Windows with core.autocrlf=true.
+*.sh    text eol=lf
+*.bash  text eol=lf

From 4e7a6fbe05de77d7c56e910422d5f50346fbdfc6 Mon Sep 17 00:00:00 2001
From: Harold Ship <harold@il.ibm.com>
Date: Mon, 11 May 2026 12:47:03 +0300
Subject: [PATCH 2/5] fix: add Windows .bat to strip CRLF from *.sh and *.env

Expands the CRLF fix beyond the .gitattributes pin so existing Windows
clones can recover without re-cloning:

* .gitattributes now also pins *.env, *.yaml, *.yml, *.toml to LF
  (only *.sh and *.bash were covered before, so *.env files like
  appworld.env were still checked out as CRLF on Windows).
* Adds fix_line_endings.bat at repo root: a self-contained Windows
  batch + PowerShell script that walks the repo and strips CRLF from
  *.sh and *.env files, skipping .git/.venv/vendor/node_modules.
* Pins *.bat/*.cmd/*.ps1 to CRLF (cmd.exe expects CRLF in batch files).
* README: short Windows/WSL note pointing users at the .bat.

A bash self-heal prelude was attempted but is fundamentally unworkable:
bash cannot parse a script whose own keywords end in \r ("fi\r" != "fi"),
so the heal code never gets a chance to run.

Refs #74

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .gitattributes       | 15 +++++++++++++--
 .secrets.baseline    | 11 ++++++++++-
 README.md            |  6 ++++++
 fix_line_endings.bat | 37 +++++++++++++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 3 deletions(-)
 create mode 100644 fix_line_endings.bat

diff --git a/.gitattributes b/.gitattributes
index f80f339..7210b43 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,4 +1,15 @@
-# Force LF line endings for shell scripts so they run correctly under
-# bash/WSL even when the repo is cloned on Windows with core.autocrlf=true.
+# Force LF line endings for shell scripts and config text files so they
+# work correctly under bash/WSL even when the repo is cloned on Windows
+# with core.autocrlf=true. CRLF in *.env files breaks `source`, and CRLF
+# in *.sh files breaks the bash interpreter ("$'\r': command not found").
 *.sh    text eol=lf
 *.bash  text eol=lf
+*.env   text eol=lf
+*.yaml  text eol=lf
+*.yml   text eol=lf
+*.toml  text eol=lf
+
+# Windows batch scripts must keep CRLF so cmd.exe parses them reliably.
+*.bat   text eol=crlf
+*.cmd   text eol=crlf
+*.ps1   text eol=crlf
diff --git a/.secrets.baseline b/.secrets.baseline
index f33da84..3bd68e3 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -3,7 +3,7 @@
     "files": "^.secrets.baseline$",
     "lines": null
   },
-  "generated_at": "2026-05-10T11:36:08Z",
+  "generated_at": "2026-05-11T09:46:48Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -107,6 +107,15 @@
         "verified_result": null
       }
     ],
+    "README.md": [
+      {
+        "hashed_secret": "b45fc270bb9e9ddf4829b9124321ce244d38668e",
+        "is_verified": false,
+        "line_number": 72,
+        "type": "Secret Keyword",
+        "verified_result": null
+      }
+    ],
     "benchmarks/appworld/debug/example.py": [
       {
         "hashed_secret": "c18006fc138809314751cd1991f1e0b820fabd37",
diff --git a/README.md b/README.md
index b2a5b1c..6ea7d17 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,12 @@ git clone https://github.com/cuga-project/cuga-eval.git
 cd cuga-eval
 ```
 
+> **Windows users (WSL):** if you cloned with Windows git (default `core.autocrlf=true`),
+> the `*.sh` and `*.env` files end up with CRLF line endings, which break bash under WSL
+> (`$'\r': command not found`) and `source`d env files. Run `fix_line_endings.bat`
+> (double-click in Explorer, or run from cmd.exe / PowerShell) once before running any
+> setup scripts under WSL.
+
 ### 2. Ensure CUGA Agent is in parent directory
 The `cuga-agent` repository must be located at `../cuga-agent` (one directory up from this repository).
 
diff --git a/fix_line_endings.bat b/fix_line_endings.bat
new file mode 100644
index 0000000..5f5f6ff
--- /dev/null
+++ b/fix_line_endings.bat
@@ -0,0 +1,37 @@
+@echo off
+REM ============================================================================
+REM  fix_line_endings.bat
+REM
+REM  Strips CRLF line endings from *.sh and *.env files in this repo so they
+REM  work under WSL bash. Run this once on Windows after cloning the repo (or
+REM  after pulling, if you have stale CRLF files), BEFORE running setup_cuga.sh
+REM  or setup_m3.sh under WSL.
+REM
+REM  Usage:  double-click, or from cmd.exe / PowerShell:  fix_line_endings.bat
+REM ============================================================================
+
+setlocal
+cd /d "%~dp0"
+
+echo.
+echo Normalizing *.sh and *.env line endings (CRLF -^> LF) under:
+echo   %CD%
+echo.
+
+powershell -NoProfile -ExecutionPolicy Bypass -Command "$ErrorActionPreference='Stop'; $root = (Get-Location).Path; $count = 0; $files = Get-ChildItem -Path . -Recurse -File -Include *.sh,*.env | Where-Object { $_.FullName -notmatch '\\(\.git|\.venv|vendor|node_modules)\\' }; foreach ($f in $files) { $b = [IO.File]::ReadAllBytes($f.FullName); if ($b -contains 13) { $c = New-Object Collections.Generic.List[byte]; foreach ($x in $b) { if ($x -ne 13) { $c.Add($x) } }; [IO.File]::WriteAllBytes($f.FullName, $c.ToArray()); Write-Host ('  normalized: ' + $f.FullName.Substring($root.Length + 1)); $count++ } }; Write-Host ''; Write-Host ('Normalized ' + $count + ' file(s).')"
+
+if errorlevel 1 (
+    echo.
+    echo ERROR: normalization failed. See PowerShell error above.
+    exit /b 1
+)
+
+echo.
+echo Done. You can now run setup_cuga.sh / setup_m3.sh under WSL.
+echo.
+
+REM Pause so the window stays open if double-clicked from Explorer.
+if defined PROMPT goto :end
+pause
+:end
+endlocal

From b71e183157db0e4d5afd82c06768a7239b167823 Mon Sep 17 00:00:00 2001
From: Harold Ship <harold@il.ibm.com>
Date: Mon, 11 May 2026 13:13:41 +0300
Subject: [PATCH 3/5] fix: make fix_line_endings.bat exclusions cross-platform

The exclusion regex used \\(...)\\ which only matches Windows
backslash separators. Verified with pwsh on macOS: vendor/
and node_modules/ paths were silently being normalized
because the regex never matched their forward-slash paths.

Switch to [\\/](...)[\\/] so exclusions work whether the script
is invoked under Windows cmd.exe (backslash paths) or pwsh on
any OS (which is how this was discovered).

Tested via pwsh on macOS against a fixture with .git/, .venv/,
vendor/, node_modules/ paths -- all correctly skipped now.

Refs #74

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 fix_line_endings.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fix_line_endings.bat b/fix_line_endings.bat
index 5f5f6ff..4bfb851 100644
--- a/fix_line_endings.bat
+++ b/fix_line_endings.bat
@@ -18,7 +18,7 @@ echo Normalizing *.sh and *.env line endings (CRLF -^> LF) under:
 echo   %CD%
 echo.
 
-powershell -NoProfile -ExecutionPolicy Bypass -Command "$ErrorActionPreference='Stop'; $root = (Get-Location).Path; $count = 0; $files = Get-ChildItem -Path . -Recurse -File -Include *.sh,*.env | Where-Object { $_.FullName -notmatch '\\(\.git|\.venv|vendor|node_modules)\\' }; foreach ($f in $files) { $b = [IO.File]::ReadAllBytes($f.FullName); if ($b -contains 13) { $c = New-Object Collections.Generic.List[byte]; foreach ($x in $b) { if ($x -ne 13) { $c.Add($x) } }; [IO.File]::WriteAllBytes($f.FullName, $c.ToArray()); Write-Host ('  normalized: ' + $f.FullName.Substring($root.Length + 1)); $count++ } }; Write-Host ''; Write-Host ('Normalized ' + $count + ' file(s).')"
+powershell -NoProfile -ExecutionPolicy Bypass -Command "$ErrorActionPreference='Stop'; $root = (Get-Location).Path; $count = 0; $files = Get-ChildItem -Path . -Recurse -File -Include *.sh,*.env | Where-Object { $_.FullName -notmatch '[\\/](\.git|\.venv|vendor|node_modules)[\\/]' }; foreach ($f in $files) { $b = [IO.File]::ReadAllBytes($f.FullName); if ($b -contains 13) { $c = New-Object Collections.Generic.List[byte]; foreach ($x in $b) { if ($x -ne 13) { $c.Add($x) } }; [IO.File]::WriteAllBytes($f.FullName, $c.ToArray()); Write-Host ('  normalized: ' + $f.FullName.Substring($root.Length + 1)); $count++ } }; Write-Host ''; Write-Host ('Normalized ' + $count + ' file(s).')"
 
 if errorlevel 1 (
     echo.

From 7493f57b1c509449dfb2f6e533206a4157880992 Mon Sep 17 00:00:00 2001
From: Harold Ship <harold@il.ibm.com>
Date: Sun, 17 May 2026 16:37:54 +0300
Subject: [PATCH 4/5] feat: add Windows .bat equivalents for all .sh scripts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Every .sh in the repo now has a sibling .bat (35 scripts + 1 shared
_delegate_to_bash.bat helper) so Windows users have a path that doesn't
require Git Bash for the simple cases.

Two translation styles:

- Pure cmd.exe ports (15 files): the simple wrappers and env-loaders
  — setup_cuga, setup_m3, setup_appworld, load_env, run_registry
  (helper + 4 per-benchmark stubs), model_profiles, viz, the analyze
  thin-stubs, all run_app/run_eval wrappers. These are usable on a
  vanilla Windows install.

- bash-delegate shims (19 files): for the heavy scripts that use
  POSIX-only features (lsof, pkill, signal traps, process substitution,
  sourceable function libraries, embedded Python heredocs, mktemp,
  comm, find -mindepth/-maxdepth). Each shim is ~6 lines and calls
  _delegate_to_bash.bat, which tries Git Bash (well-known install
  paths) -> bash on PATH -> WSL -> friendly install instructions.

benchmarks/helpers/common.bat is a placeholder noting that the bash
function library it mirrors can't be sourced into cmd.exe; callers
that delegate to bash source the .sh version directly.

.secrets.baseline gets one new entry for the openai/gpt-oss-120b model
name in scripts/model_profiles.bat (false positive — same string is
already in scripts/model_profiles.sh; cmd.exe's lack of inline
comment syntax means the standard pragma can't be embedded on the
line, so the baseline is the cleaner workaround).

The longer-term cleanup — move logic into Python so .sh and .bat both
become ~5-line wrappers around `uv run python -m ...` — is tracked
in research-rpa/cuga-internal-evaluation#88.
---
 .secrets.baseline                             |  11 +-
 benchmarks/appworld/analyze.bat               |   9 +
 benchmarks/appworld/compare.bat               |   7 +
 benchmarks/appworld/eval.bat                  |   8 +
 benchmarks/appworld/run_app.bat               |  19 ++
 benchmarks/appworld/run_eval.bat              |  13 +
 benchmarks/appworld/run_registry.bat          |   8 +
 benchmarks/bpo/compare.bat                    |   7 +
 benchmarks/bpo/eval.bat                       |   7 +
 benchmarks/bpo/run_app.bat                    |  19 ++
 benchmarks/bpo/run_registry.bat               |   7 +
 benchmarks/helpers/_delegate_to_bash.bat      |  60 +++++
 benchmarks/helpers/common.bat                 |  24 ++
 benchmarks/helpers/load_env.bat               |  55 +++++
 benchmarks/helpers/run_registry.bat           |  28 +++
 benchmarks/m3/analyze.bat                     |   9 +
 benchmarks/m3/clean.bat                       |   8 +
 benchmarks/m3/compare.bat                     |   7 +
 benchmarks/m3/eval.bat                        |  11 +
 benchmarks/m3/eval/scripts/monitor_eval.bat   |   7 +
 .../m3/eval/scripts/run_eval_background.bat   |   8 +
 benchmarks/m3/eval/scripts/setup_m3_eval.bat  |   8 +
 benchmarks/m3/run_registry.bat                |   7 +
 benchmarks/m3/run_with_container.bat          |   7 +
 benchmarks/oak_health_insurance/compare.bat   |   7 +
 benchmarks/oak_health_insurance/eval.bat      |   7 +
 benchmarks/oak_health_insurance/run_app.bat   |  19 ++
 .../oak_health_insurance/run_registry.bat     |   7 +
 scripts/analyze.bat                           |   8 +
 scripts/compare.bat                           |   7 +
 scripts/eval.bat                              |   9 +
 scripts/m3_pad_to_cap_verify.bat              |  11 +
 scripts/model_profiles.bat                    |  56 +++++
 scripts/viz.bat                               |  27 +++
 setup_appworld.bat                            |  58 +++++
 setup_cuga.bat                                |  93 +++++++
 setup_m3.bat                                  | 229 ++++++++++++++++++
 37 files changed, 891 insertions(+), 1 deletion(-)
 create mode 100644 benchmarks/appworld/analyze.bat
 create mode 100644 benchmarks/appworld/compare.bat
 create mode 100644 benchmarks/appworld/eval.bat
 create mode 100644 benchmarks/appworld/run_app.bat
 create mode 100644 benchmarks/appworld/run_eval.bat
 create mode 100644 benchmarks/appworld/run_registry.bat
 create mode 100644 benchmarks/bpo/compare.bat
 create mode 100644 benchmarks/bpo/eval.bat
 create mode 100644 benchmarks/bpo/run_app.bat
 create mode 100644 benchmarks/bpo/run_registry.bat
 create mode 100644 benchmarks/helpers/_delegate_to_bash.bat
 create mode 100644 benchmarks/helpers/common.bat
 create mode 100644 benchmarks/helpers/load_env.bat
 create mode 100644 benchmarks/helpers/run_registry.bat
 create mode 100644 benchmarks/m3/analyze.bat
 create mode 100644 benchmarks/m3/clean.bat
 create mode 100644 benchmarks/m3/compare.bat
 create mode 100644 benchmarks/m3/eval.bat
 create mode 100644 benchmarks/m3/eval/scripts/monitor_eval.bat
 create mode 100644 benchmarks/m3/eval/scripts/run_eval_background.bat
 create mode 100644 benchmarks/m3/eval/scripts/setup_m3_eval.bat
 create mode 100644 benchmarks/m3/run_registry.bat
 create mode 100644 benchmarks/m3/run_with_container.bat
 create mode 100644 benchmarks/oak_health_insurance/compare.bat
 create mode 100644 benchmarks/oak_health_insurance/eval.bat
 create mode 100644 benchmarks/oak_health_insurance/run_app.bat
 create mode 100644 benchmarks/oak_health_insurance/run_registry.bat
 create mode 100644 scripts/analyze.bat
 create mode 100644 scripts/compare.bat
 create mode 100644 scripts/eval.bat
 create mode 100644 scripts/m3_pad_to_cap_verify.bat
 create mode 100644 scripts/model_profiles.bat
 create mode 100644 scripts/viz.bat
 create mode 100644 setup_appworld.bat
 create mode 100644 setup_cuga.bat
 create mode 100644 setup_m3.bat

diff --git a/.secrets.baseline b/.secrets.baseline
index 3bd68e3..2e4c4fd 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -3,7 +3,7 @@
     "files": "^.secrets.baseline$",
     "lines": null
   },
-  "generated_at": "2026-05-11T09:46:48Z",
+  "generated_at": "2026-05-17T13:36:51Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -315,6 +315,15 @@
         "type": "Hex High Entropy String",
         "verified_result": null
       }
+    ],
+    "scripts/model_profiles.bat": [
+      {
+        "hashed_secret": "af89b35ce32cfc9eaf4c102325da47616e6eff93",
+        "is_verified": false,
+        "line_number": 18,
+        "type": "Base64 High Entropy String",
+        "verified_result": null
+      }
     ]
   },
   "version": "0.13.1+ibm.64.dss",
diff --git a/benchmarks/appworld/analyze.bat b/benchmarks/appworld/analyze.bat
new file mode 100644
index 0000000..f3b679b
--- /dev/null
+++ b/benchmarks/appworld/analyze.bat
@@ -0,0 +1,9 @@
+@echo off
+REM Windows equivalent of benchmarks/appworld/analyze.sh
+REM Thin wrapper around scripts/analyze.bat with --benchmark appworld.
+
+setlocal
+set "SCRIPT_DIR=%~dp0"
+if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
+call "%SCRIPT_DIR%\..\..\scripts\analyze.bat" --benchmark appworld %*
+exit /b %errorlevel%
diff --git a/benchmarks/appworld/compare.bat b/benchmarks/appworld/compare.bat
new file mode 100644
index 0000000..fddd539
--- /dev/null
+++ b/benchmarks/appworld/compare.bat
@@ -0,0 +1,7 @@
+@echo off
+REM Windows equivalent of benchmarks/appworld/compare.sh — delegates to bash.
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\compare.sh" %*
+exit /b %errorlevel%
diff --git a/benchmarks/appworld/eval.bat b/benchmarks/appworld/eval.bat
new file mode 100644
index 0000000..4f355c8
--- /dev/null
+++ b/benchmarks/appworld/eval.bat
@@ -0,0 +1,8 @@
+@echo off
+REM Windows equivalent of benchmarks/appworld/eval.sh — delegates to bash
+REM (traps, kill -0, lsof, process substitution, find with -mindepth).
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\eval.sh" %*
+exit /b %errorlevel%
diff --git a/benchmarks/appworld/run_app.bat b/benchmarks/appworld/run_app.bat
new file mode 100644
index 0000000..dc1567c
--- /dev/null
+++ b/benchmarks/appworld/run_app.bat
@@ -0,0 +1,19 @@
+@echo off
+REM Windows equivalent of benchmarks/appworld/run_app.sh
+REM Loads env and starts AppWorld.
+
+setlocal
+set "SCRIPT_DIR=%~dp0"
+if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
+pushd "%SCRIPT_DIR%\..\.." >nul
+set "PROJECT_ROOT=%CD%"
+popd >nul
+
+echo Loading AppWorld configuration...
+call "%PROJECT_ROOT%\benchmarks\helpers\load_env.bat" "appworld"
+
+echo.
+echo Starting AppWorld...
+cd /d "%PROJECT_ROOT%"
+uv run cuga start appworld
+exit /b %errorlevel%
diff --git a/benchmarks/appworld/run_eval.bat b/benchmarks/appworld/run_eval.bat
new file mode 100644
index 0000000..a14a943
--- /dev/null
+++ b/benchmarks/appworld/run_eval.bat
@@ -0,0 +1,13 @@
+@echo off
+REM Windows equivalent of benchmarks/appworld/run_eval.sh
+REM Loads AppWorld env and runs cuga-eval.
+
+setlocal
+set "SCRIPT_DIR=%~dp0"
+if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
+pushd "%SCRIPT_DIR%\..\.." >nul
+set "PROJECT_ROOT=%CD%"
+popd >nul
+call "%PROJECT_ROOT%\benchmarks\helpers\load_env.bat" "appworld"
+cuga-eval appworld %*
+exit /b %errorlevel%
diff --git a/benchmarks/appworld/run_registry.bat b/benchmarks/appworld/run_registry.bat
new file mode 100644
index 0000000..bac0aab
--- /dev/null
+++ b/benchmarks/appworld/run_registry.bat
@@ -0,0 +1,8 @@
+@echo off
+REM Windows equivalent of benchmarks/appworld/run_registry.sh
+REM Delegates to the generic helper.
+setlocal
+set "SCRIPT_DIR=%~dp0"
+if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
+call "%SCRIPT_DIR%\..\helpers\run_registry.bat" "appworld"
+exit /b %errorlevel%
diff --git a/benchmarks/bpo/compare.bat b/benchmarks/bpo/compare.bat
new file mode 100644
index 0000000..e1eb1bd
--- /dev/null
+++ b/benchmarks/bpo/compare.bat
@@ -0,0 +1,7 @@
+@echo off
+REM Windows equivalent of benchmarks/bpo/compare.sh — delegates to bash.
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\compare.sh" %*
+exit /b %errorlevel%
diff --git a/benchmarks/bpo/eval.bat b/benchmarks/bpo/eval.bat
new file mode 100644
index 0000000..b6d0e05
--- /dev/null
+++ b/benchmarks/bpo/eval.bat
@@ -0,0 +1,7 @@
+@echo off
+REM Windows equivalent of benchmarks/bpo/eval.sh — delegates to bash.
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\eval.sh" %*
+exit /b %errorlevel%
diff --git a/benchmarks/bpo/run_app.bat b/benchmarks/bpo/run_app.bat
new file mode 100644
index 0000000..f3c414b
--- /dev/null
+++ b/benchmarks/bpo/run_app.bat
@@ -0,0 +1,19 @@
+@echo off
+REM Windows equivalent of benchmarks/bpo/run_app.sh
+REM Loads env and runs the BPO FastAPI app on port 8095.
+
+setlocal
+set "SCRIPT_DIR=%~dp0"
+if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
+pushd "%SCRIPT_DIR%\..\.." >nul
+set "PROJECT_ROOT=%CD%"
+popd >nul
+
+echo Loading BPO configuration...
+call "%PROJECT_ROOT%\benchmarks\helpers\load_env.bat" "bpo"
+
+echo.
+echo Starting BPO FastAPI app on port 8095...
+cd /d "%PROJECT_ROOT%"
+uv run uvicorn benchmarks.bpo.main:app --reload --port 8095
+exit /b %errorlevel%
diff --git a/benchmarks/bpo/run_registry.bat b/benchmarks/bpo/run_registry.bat
new file mode 100644
index 0000000..7787dc8
--- /dev/null
+++ b/benchmarks/bpo/run_registry.bat
@@ -0,0 +1,7 @@
+@echo off
+REM Windows equivalent of benchmarks/bpo/run_registry.sh
+setlocal
+set "SCRIPT_DIR=%~dp0"
+if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
+call "%SCRIPT_DIR%\..\helpers\run_registry.bat" "bpo"
+exit /b %errorlevel%
diff --git a/benchmarks/helpers/_delegate_to_bash.bat b/benchmarks/helpers/_delegate_to_bash.bat
new file mode 100644
index 0000000..aedb26e
--- /dev/null
+++ b/benchmarks/helpers/_delegate_to_bash.bat
@@ -0,0 +1,60 @@
+@echo off
+REM Shared helper: invokes a .sh script via Git Bash or WSL, forwarding all args.
+REM
+REM Usage (from another .bat):
+REM   call "<path-to-helpers>\_delegate_to_bash.bat" "<absolute-or-relative-path-to-script.sh>" %*
+REM
+REM Rationale: many of the .sh scripts in this repo use POSIX-only features
+REM (process substitution, traps, lsof, pkill, comm, find -mindepth, mktemp,
+REM heredocs, etc.) that don't have clean cmd.exe equivalents. Rather than
+REM ship subtly-broken cmd.exe ports, we delegate to a real bash. A native
+REM Python port is tracked in the follow-up issue.
+
+setlocal enabledelayedexpansion
+
+if "%~1"=="" (
+    echo [ERROR] _delegate_to_bash.bat called without a script path
+    exit /b 2
+)
+set "_SCRIPT=%~1"
+shift
+
+if not exist "%_SCRIPT%" (
+    echo [ERROR] Script not found: %_SCRIPT%
+    exit /b 2
+)
+
+REM Try Git Bash in well-known install locations
+for %%G in (
+    "%ProgramFiles%\Git\bin\bash.exe"
+    "%ProgramFiles(x86)%\Git\bin\bash.exe"
+    "%LocalAppData%\Programs\Git\bin\bash.exe"
+) do (
+    if exist %%G (
+        %%G "%_SCRIPT%" %*
+        exit /b !errorlevel!
+    )
+)
+
+REM Then any bash on PATH (e.g. msys2, cygwin)
+where bash >nul 2>&1
+if not errorlevel 1 (
+    bash "%_SCRIPT%" %*
+    exit /b !errorlevel!
+)
+
+REM Finally WSL
+where wsl >nul 2>&1
+if not errorlevel 1 (
+    for /f "delims=" %%P in ('wsl wslpath -u "%_SCRIPT%" 2^>nul') do set "_WSL_SCRIPT=%%P"
+    if not "!_WSL_SCRIPT!"=="" (
+        wsl bash "!_WSL_SCRIPT!" %*
+        exit /b !errorlevel!
+    )
+)
+
+echo [ERROR] No bash interpreter found on this system.
+echo This script requires bash. Install one of:
+echo   - Git for Windows ^(provides Git Bash^): https://git-scm.com/download/win
+echo   - WSL ^(Windows Subsystem for Linux^):   wsl --install
+exit /b 1
diff --git a/benchmarks/helpers/common.bat b/benchmarks/helpers/common.bat
new file mode 100644
index 0000000..95913a3
--- /dev/null
+++ b/benchmarks/helpers/common.bat
@@ -0,0 +1,24 @@
+@echo off
+REM Placeholder for benchmarks/helpers/common.sh.
+REM
+REM common.sh is a bash function library (port_in_use, wait_for_server,
+REM parse_common_args, cleanup_pids, etc.) that gets sourced by other .sh
+REM scripts. There's no equivalent of `source` for function definitions in
+REM cmd.exe, so a direct port is not feasible.
+REM
+REM In practice, this file is never called directly: the heavy .bat files
+REM in this repo (eval.bat, compare.bat, etc.) delegate to bash via
+REM _delegate_to_bash.bat, and bash sources common.sh itself.
+REM
+REM If you ARE invoking this file directly, you probably want one of:
+REM   - call _delegate_to_bash.bat ".\common.sh" ^<args^>    (run from bash)
+REM   - Use Git Bash or WSL to source it the normal way
+REM
+REM See the follow-up issue for the Python migration that removes this gap.
+
+if "%~1"=="" (
+    echo common.bat is a placeholder. See comment block in this file.
+    exit /b 0
+)
+echo [WARN] common.bat does not implement %~1 in cmd.exe. Use bash to source common.sh.
+exit /b 1
diff --git a/benchmarks/helpers/load_env.bat b/benchmarks/helpers/load_env.bat
new file mode 100644
index 0000000..863c4e4
--- /dev/null
+++ b/benchmarks/helpers/load_env.bat
@@ -0,0 +1,55 @@
+@echo off
+REM Windows equivalent of load_env.sh
+REM
+REM Usage: call load_env.bat [benchmark_name]
+REM
+REM Sourcing semantics: this script writes a temporary .bat snippet of `set`
+REM commands and calls it, so env vars persist into the caller's scope when
+REM invoked via `call`.
+
+setlocal enabledelayedexpansion
+
+set "BENCHMARK_NAME=%~1"
+
+set "HELPERS_DIR=%~dp0"
+if "%HELPERS_DIR:~-1%"=="\" set "HELPERS_DIR=%HELPERS_DIR:~0,-1%"
+pushd "%HELPERS_DIR%\..\.." >nul
+set "PROJECT_ROOT=%CD%"
+popd >nul
+set "CONFIG_DIR=%PROJECT_ROOT%\config"
+
+REM Temp file holds the set-commands we'll call from the caller's scope
+set "_SETS=%TEMP%\cuga_loadenv_%RANDOM%_%RANDOM%.bat"
+echo @echo off> "%_SETS%"
+
+call :emit_env_file "%PROJECT_ROOT%\.env"            ".env (secrets)"
+call :emit_env_file "%CONFIG_DIR%\global.env"        "global.env"
+if not "%BENCHMARK_NAME%"=="" (
+    call :emit_env_file "%PROJECT_ROOT%\benchmarks\%BENCHMARK_NAME%\config\%BENCHMARK_NAME%.env" "%BENCHMARK_NAME%.env"
+)
+
+REM Default LOGURU_LEVEL handling
+if "%LOGURU_LEVEL%"=="" echo set "LOGURU_LEVEL=WARNING">> "%_SETS%"
+if /i "%VERBOSE%"=="true" echo set "LOGURU_LEVEL=DEBUG">> "%_SETS%"
+
+REM Single-line endlocal so %_SETS% is expanded at parse time (before endlocal runs)
+endlocal & call "%_SETS%" & del "%_SETS%" 2>nul
+exit /b 0
+
+:emit_env_file
+set "_FILE=%~1"
+set "_LABEL=%~2"
+if not exist "%_FILE%" (
+    if not "%_LABEL%"=="" echo (skipping missing %_LABEL%)
+    exit /b 0
+)
+echo [ok] Loading %_LABEL%
+for /f "usebackq tokens=* eol=#" %%L in ("%_FILE%") do (
+    set "_line=%%L"
+    if not "!_line!"=="" (
+        for /f "tokens=1,* delims==" %%A in ("!_line!") do (
+            echo set "%%A=%%B">> "%_SETS%"
+        )
+    )
+)
+exit /b 0
diff --git a/benchmarks/helpers/run_registry.bat b/benchmarks/helpers/run_registry.bat
new file mode 100644
index 0000000..306a64f
--- /dev/null
+++ b/benchmarks/helpers/run_registry.bat
@@ -0,0 +1,28 @@
+@echo off
+REM Windows equivalent of run_registry.sh
+REM Loads env (global + benchmark-specific) and starts the registry server.
+REM Usage: run_registry.bat ^<benchmark_name^>
+
+setlocal
+
+set "BENCHMARK_NAME=%~1"
+if "%BENCHMARK_NAME%"=="" (
+    echo Usage: %~nx0 ^<benchmark_name^>
+    echo Example: %~nx0 m3
+    exit /b 1
+)
+
+set "SCRIPT_DIR=%~dp0"
+if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
+pushd "%SCRIPT_DIR%\..\.." >nul
+set "PROJECT_ROOT=%CD%"
+popd >nul
+
+echo Loading %BENCHMARK_NAME% evaluation configuration...
+call "%SCRIPT_DIR%\load_env.bat" "%BENCHMARK_NAME%"
+
+echo.
+echo Starting registry server...
+cd /d "%PROJECT_ROOT%"
+uv run registry
+exit /b %errorlevel%
diff --git a/benchmarks/m3/analyze.bat b/benchmarks/m3/analyze.bat
new file mode 100644
index 0000000..9905eff
--- /dev/null
+++ b/benchmarks/m3/analyze.bat
@@ -0,0 +1,9 @@
+@echo off
+REM Windows equivalent of benchmarks/m3/analyze.sh
+REM Thin wrapper around scripts/analyze.bat with --benchmark m3.
+
+setlocal
+set "SCRIPT_DIR=%~dp0"
+if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
+call "%SCRIPT_DIR%\..\..\scripts\analyze.bat" --benchmark m3 %*
+exit /b %errorlevel%
diff --git a/benchmarks/m3/clean.bat b/benchmarks/m3/clean.bat
new file mode 100644
index 0000000..757be01
--- /dev/null
+++ b/benchmarks/m3/clean.bat
@@ -0,0 +1,8 @@
+@echo off
+REM Windows equivalent of benchmarks/m3/clean.sh — delegates to bash.
+REM (Uses pkill, lsof, docker exec curl loops, glob removal — POSIX-only.)
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\clean.sh" %*
+exit /b %errorlevel%
diff --git a/benchmarks/m3/compare.bat b/benchmarks/m3/compare.bat
new file mode 100644
index 0000000..835cf99
--- /dev/null
+++ b/benchmarks/m3/compare.bat
@@ -0,0 +1,7 @@
+@echo off
+REM Windows equivalent of benchmarks/m3/compare.sh — delegates to bash.
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\compare.sh" %*
+exit /b %errorlevel%
diff --git a/benchmarks/m3/eval.bat b/benchmarks/m3/eval.bat
new file mode 100644
index 0000000..04a4e4b
--- /dev/null
+++ b/benchmarks/m3/eval.bat
@@ -0,0 +1,11 @@
+@echo off
+REM Windows equivalent of benchmarks/m3/eval.sh
+REM Delegates to bash (Git Bash / WSL) because the script uses POSIX features
+REM that don't translate cleanly to cmd.exe (traps, lsof, process subs, ...).
+REM Tracked in the follow-up issue: migrate these to Python.
+
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\eval.sh" %*
+exit /b %errorlevel%
diff --git a/benchmarks/m3/eval/scripts/monitor_eval.bat b/benchmarks/m3/eval/scripts/monitor_eval.bat
new file mode 100644
index 0000000..cfcaebc
--- /dev/null
+++ b/benchmarks/m3/eval/scripts/monitor_eval.bat
@@ -0,0 +1,7 @@
+@echo off
+REM Windows equivalent of monitor_eval.sh — delegates to bash.
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\..\..\helpers\_delegate_to_bash.bat" "%_THIS%\monitor_eval.sh" %*
+exit /b %errorlevel%
diff --git a/benchmarks/m3/eval/scripts/run_eval_background.bat b/benchmarks/m3/eval/scripts/run_eval_background.bat
new file mode 100644
index 0000000..69a18b3
--- /dev/null
+++ b/benchmarks/m3/eval/scripts/run_eval_background.bat
@@ -0,0 +1,8 @@
+@echo off
+REM Windows equivalent of run_eval_background.sh — delegates to bash
+REM (nohup, &, signal traps, PID files — POSIX background-job semantics).
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\..\..\helpers\_delegate_to_bash.bat" "%_THIS%\run_eval_background.sh" %*
+exit /b %errorlevel%
diff --git a/benchmarks/m3/eval/scripts/setup_m3_eval.bat b/benchmarks/m3/eval/scripts/setup_m3_eval.bat
new file mode 100644
index 0000000..a60b413
--- /dev/null
+++ b/benchmarks/m3/eval/scripts/setup_m3_eval.bat
@@ -0,0 +1,8 @@
+@echo off
+REM Windows equivalent of setup_m3_eval.sh — delegates to bash (uses
+REM interactive prompts, docker detection, file edits — bash-only).
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\..\..\helpers\_delegate_to_bash.bat" "%_THIS%\setup_m3_eval.sh" %*
+exit /b %errorlevel%
diff --git a/benchmarks/m3/run_registry.bat b/benchmarks/m3/run_registry.bat
new file mode 100644
index 0000000..2ebc173
--- /dev/null
+++ b/benchmarks/m3/run_registry.bat
@@ -0,0 +1,7 @@
+@echo off
+REM Windows equivalent of benchmarks/m3/run_registry.sh
+setlocal
+set "SCRIPT_DIR=%~dp0"
+if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
+call "%SCRIPT_DIR%\..\helpers\run_registry.bat" "m3"
+exit /b %errorlevel%
diff --git a/benchmarks/m3/run_with_container.bat b/benchmarks/m3/run_with_container.bat
new file mode 100644
index 0000000..72c8718
--- /dev/null
+++ b/benchmarks/m3/run_with_container.bat
@@ -0,0 +1,7 @@
+@echo off
+REM Windows equivalent of benchmarks/m3/run_with_container.sh — delegates to bash.
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\run_with_container.sh" %*
+exit /b %errorlevel%
diff --git a/benchmarks/oak_health_insurance/compare.bat b/benchmarks/oak_health_insurance/compare.bat
new file mode 100644
index 0000000..8a9558f
--- /dev/null
+++ b/benchmarks/oak_health_insurance/compare.bat
@@ -0,0 +1,7 @@
+@echo off
+REM Windows equivalent of benchmarks/oak_health_insurance/compare.sh — delegates to bash.
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\compare.sh" %*
+exit /b %errorlevel%
diff --git a/benchmarks/oak_health_insurance/eval.bat b/benchmarks/oak_health_insurance/eval.bat
new file mode 100644
index 0000000..8a37a55
--- /dev/null
+++ b/benchmarks/oak_health_insurance/eval.bat
@@ -0,0 +1,7 @@
+@echo off
+REM Windows equivalent of benchmarks/oak_health_insurance/eval.sh — delegates to bash.
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\eval.sh" %*
+exit /b %errorlevel%
diff --git a/benchmarks/oak_health_insurance/run_app.bat b/benchmarks/oak_health_insurance/run_app.bat
new file mode 100644
index 0000000..8bea569
--- /dev/null
+++ b/benchmarks/oak_health_insurance/run_app.bat
@@ -0,0 +1,19 @@
+@echo off
+REM Windows equivalent of benchmarks/oak_health_insurance/run_app.sh
+REM Loads env and runs the Oak Health Insurance FastAPI app on port 8090.
+
+setlocal
+set "SCRIPT_DIR=%~dp0"
+if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
+pushd "%SCRIPT_DIR%\..\.." >nul
+set "PROJECT_ROOT=%CD%"
+popd >nul
+
+echo Loading Oak Health Insurance configuration...
+call "%PROJECT_ROOT%\benchmarks\helpers\load_env.bat" "oak_health_insurance"
+
+echo.
+echo Starting FastAPI app...
+cd /d "%SCRIPT_DIR%"
+uv run uvicorn main:app --reload --port 8090
+exit /b %errorlevel%
diff --git a/benchmarks/oak_health_insurance/run_registry.bat b/benchmarks/oak_health_insurance/run_registry.bat
new file mode 100644
index 0000000..203b480
--- /dev/null
+++ b/benchmarks/oak_health_insurance/run_registry.bat
@@ -0,0 +1,7 @@
+@echo off
+REM Windows equivalent of benchmarks/oak_health_insurance/run_registry.sh
+setlocal
+set "SCRIPT_DIR=%~dp0"
+if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
+call "%SCRIPT_DIR%\..\helpers\run_registry.bat" "oak_health_insurance"
+exit /b %errorlevel%
diff --git a/scripts/analyze.bat b/scripts/analyze.bat
new file mode 100644
index 0000000..06dae69
--- /dev/null
+++ b/scripts/analyze.bat
@@ -0,0 +1,8 @@
+@echo off
+REM Windows equivalent of scripts/analyze.sh — delegates to bash (uses
+REM bash arrays for --bundles / --task-ids, sources config .conf via source).
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\benchmarks\helpers\_delegate_to_bash.bat" "%_THIS%\analyze.sh" %*
+exit /b %errorlevel%
diff --git a/scripts/compare.bat b/scripts/compare.bat
new file mode 100644
index 0000000..b278bb7
--- /dev/null
+++ b/scripts/compare.bat
@@ -0,0 +1,7 @@
+@echo off
+REM Windows equivalent of scripts/compare.sh — delegates to bash (sources common.sh).
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\benchmarks\helpers\_delegate_to_bash.bat" "%_THIS%\compare.sh" %*
+exit /b %errorlevel%
diff --git a/scripts/eval.bat b/scripts/eval.bat
new file mode 100644
index 0000000..9005e7f
--- /dev/null
+++ b/scripts/eval.bat
@@ -0,0 +1,9 @@
+@echo off
+REM Windows equivalent of scripts/eval.sh — delegates to bash because the
+REM script sources common.sh (a bash function library: parse_common_args,
+REM apply_model_profile_if_set, check_langfuse_env, list_benchmarks).
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\benchmarks\helpers\_delegate_to_bash.bat" "%_THIS%\eval.sh" %*
+exit /b %errorlevel%
diff --git a/scripts/m3_pad_to_cap_verify.bat b/scripts/m3_pad_to_cap_verify.bat
new file mode 100644
index 0000000..28abb0f
--- /dev/null
+++ b/scripts/m3_pad_to_cap_verify.bat
@@ -0,0 +1,11 @@
+@echo off
+REM Windows equivalent of scripts/m3_pad_to_cap_verify.sh — delegates to bash.
+REM
+REM This script uses tee, mktemp, embedded Python heredoc, gh api PATCH with
+REM @file body, process substitution, and signal traps. None of these have
+REM clean cmd.exe equivalents. Use Git Bash or WSL.
+setlocal
+set "_THIS=%~dp0"
+if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
+call "%_THIS%\..\benchmarks\helpers\_delegate_to_bash.bat" "%_THIS%\m3_pad_to_cap_verify.sh" %*
+exit /b %errorlevel%
diff --git a/scripts/model_profiles.bat b/scripts/model_profiles.bat
new file mode 100644
index 0000000..3372835
--- /dev/null
+++ b/scripts/model_profiles.bat
@@ -0,0 +1,56 @@
+@echo off
+REM Windows equivalent of model_profiles.sh
+REM Usage: call model_profiles.bat ^<profile_name^>
+REM Sets AGENT_SETTING_CONFIG, MODEL_NAME, OPENAI_BASE_URL, OPENAI_API_VERSION.
+
+setlocal
+set "PROFILE=%~1"
+
+set "_AGENT_SETTING="
+set "_MODEL_NAME="
+set "_BASE_URL="
+set "_API_VERSION="
+set "_RC=0"
+
+if "%PROFILE%"=="" goto done
+if /i "%PROFILE%"=="gpt-oss" (
+    set "_AGENT_SETTING=settings.groq.toml"
+    set "_MODEL_NAME=openai/gpt-oss-120b"
+    echo [OK] Model profile: gpt-oss
+    goto done
+)
+if /i "%PROFILE%"=="gpt4o" (
+    set "_AGENT_SETTING=settings.openai.toml"
+    set "_MODEL_NAME=Azure/gpt-4o"
+    set "_BASE_URL=https://ete-litellm.bx.cloud9.ibm.com"
+    set "_API_VERSION=2024-08-06"
+    echo [OK] Model profile: gpt4o ^(Azure/gpt-4o^)
+    goto done
+)
+if /i "%PROFILE%"=="gpt4.1" (
+    set "_AGENT_SETTING=settings.openai.toml"
+    set "_MODEL_NAME=Azure/gpt-4.1"
+    set "_BASE_URL=https://ete-litellm.bx.cloud9.ibm.com"
+    set "_API_VERSION=2024-08-06"
+    echo [OK] Model profile: gpt4.1 ^(Azure/gpt-4.1^)
+    goto done
+)
+if /i "%PROFILE%"=="opus4.5" (
+    set "_AGENT_SETTING=settings.openai.toml"
+    set "_MODEL_NAME=claude-opus-4-5-20251101"
+    set "_BASE_URL=https://ete-litellm.bx.cloud9.ibm.com"
+    echo [OK] Model profile: opus4.5
+    goto done
+)
+echo [ERROR] Unknown model profile '%PROFILE%'
+echo Valid values: gpt-oss, gpt4o, gpt4.1, opus4.5
+set "_RC=1"
+
+:done
+endlocal & (
+    if not "%_AGENT_SETTING%"=="" set "AGENT_SETTING_CONFIG=%_AGENT_SETTING%"
+    if not "%_MODEL_NAME%"=="" set "MODEL_NAME=%_MODEL_NAME%"
+    if not "%_BASE_URL%"=="" (set "OPENAI_BASE_URL=%_BASE_URL%") else (set "OPENAI_BASE_URL=")
+    if not "%_API_VERSION%"=="" (set "OPENAI_API_VERSION=%_API_VERSION%") else (set "OPENAI_API_VERSION=")
+    exit /b %_RC%
+)
diff --git a/scripts/viz.bat b/scripts/viz.bat
new file mode 100644
index 0000000..37bcb04
--- /dev/null
+++ b/scripts/viz.bat
@@ -0,0 +1,27 @@
+@echo off
+REM Windows equivalent of viz.sh
+REM Loads benchmark env and runs cuga viz against the trajectory_data dir.
+
+setlocal enabledelayedexpansion
+
+set "BENCHMARK_NAME=%~1"
+if "%BENCHMARK_NAME%"=="" (
+    echo Usage: %~nx0 ^<benchmark_name^>
+    echo Example: %~nx0 m3
+    exit /b 1
+)
+
+set "SCRIPT_DIR=%~dp0"
+if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
+pushd "%SCRIPT_DIR%\.." >nul
+set "PROJECT_ROOT=%CD%"
+popd >nul
+
+echo Loading %BENCHMARK_NAME% visualization configuration...
+call "%PROJECT_ROOT%\benchmarks\helpers\load_env.bat" "%BENCHMARK_NAME%"
+
+echo.
+echo Running cuga viz...
+cd /d "%PROJECT_ROOT%"
+uv run cuga-viz run %CUGA_LOGGING_DIR%\trajectory_data\
+exit /b %errorlevel%
diff --git a/setup_appworld.bat b/setup_appworld.bat
new file mode 100644
index 0000000..ea3feaa
--- /dev/null
+++ b/setup_appworld.bat
@@ -0,0 +1,58 @@
+@echo off
+REM Windows equivalent of setup_appworld.sh
+REM
+REM Sources the appworld env file (set -a equivalent), then installs AppWorld
+REM via uv. Interactive reinstall prompt preserved.
+
+setlocal enabledelayedexpansion
+
+set "APPWORLD_DIR=benchmarks\appworld"
+set "APPWORLD_ENV_FILE=benchmarks\appworld\config\appworld.env"
+set "APPWORLD_REPO_DIR=%APPWORLD_DIR%\appworld"
+set "APPWORLD_DATA_DIR=%APPWORLD_REPO_DIR%\data"
+
+if not exist "%APPWORLD_DIR%\" (
+    echo Error: '%APPWORLD_DIR%' directory not found!
+    echo Please clone the repository first
+    exit /b 1
+)
+
+if not exist "%APPWORLD_ENV_FILE%" (
+    echo Error: '%APPWORLD_ENV_FILE%' file not found!
+    exit /b 1
+)
+
+REM Load env file: each non-comment KEY=VALUE line becomes a set
+for /f "usebackq tokens=* eol=#" %%L in ("%APPWORLD_ENV_FILE%") do (
+    set "_line=%%L"
+    if not "!_line!"=="" (
+        for /f "tokens=1,* delims==" %%A in ("!_line!") do (
+            set "%%A=%%B"
+        )
+    )
+)
+
+if not exist "%APPWORLD_REPO_DIR%\" (
+    echo Error: '%APPWORLD_REPO_DIR%' directory not found!
+    echo Please clone the AppWorld repository into '%APPWORLD_REPO_DIR%' first
+    exit /b 1
+)
+
+if exist "%APPWORLD_DATA_DIR%\" (
+    echo AppWorld repository already present at '%APPWORLD_REPO_DIR%'.
+    echo AppWorld data already exists at '%APPWORLD_DATA_DIR%'.
+    set /p REINSTALL="Would you like to reinstall AppWorld and re-download the data? [y/N] "
+    if /i not "!REINSTALL!"=="y" if /i not "!REINSTALL!"=="yes" (
+        echo Keeping existing AppWorld installation and data. Skipping setup.
+        exit /b 0
+    )
+    echo Reinstalling AppWorld and downloading data...
+)
+
+pushd "%APPWORLD_REPO_DIR%" || exit /b 1
+uv pip install .                || (popd & exit /b 1)
+uv run -m appworld.cli install  || (popd & exit /b 1)
+uv run appworld install --repo  || (popd & exit /b 1)
+uv run appworld download data   || (popd & exit /b 1)
+popd
+exit /b 0
diff --git a/setup_cuga.bat b/setup_cuga.bat
new file mode 100644
index 0000000..9515269
--- /dev/null
+++ b/setup_cuga.bat
@@ -0,0 +1,93 @@
+@echo off
+REM Windows equivalent of setup_cuga.sh
+REM Clones cuga-agent next to this repo (matches pyproject.toml path "../cuga-agent")
+REM and sets up environment variables for the current session.
+REM
+REM Note: env vars set here only persist for this cmd.exe session.
+REM See the follow-up issue tracking conversion of these scripts to Python.
+
+setlocal
+
+set "REPO_URL=https://github.com/cuga-project/cuga-agent.git"
+set "REPO_BRANCH=main"
+set "SCRIPT_DIR=%~dp0"
+REM strip trailing backslash from %~dp0
+if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
+pushd "%SCRIPT_DIR%\.." >nul || (echo [ERROR] Could not access parent of %SCRIPT_DIR% & exit /b 1)
+set "PARENT_DIR=%CD%"
+popd >nul
+set "REPO_NAME=cuga-agent"
+set "REPO_PATH=%PARENT_DIR%\%REPO_NAME%"
+
+echo ============================================================
+echo                    CUGA Agent Setup Script
+echo ============================================================
+echo.
+
+where git >nul 2>&1
+if errorlevel 1 (
+    echo [ERROR] Git is not installed. Please install git and try again.
+    exit /b 1
+)
+
+if exist "%REPO_PATH%\.git" (
+    echo [INFO] Repository already exists at %REPO_PATH%
+    echo [INFO] Pulling latest changes from branch: %REPO_BRANCH%...
+    pushd "%REPO_PATH%" || exit /b 1
+    git fetch origin && git checkout "%REPO_BRANCH%" && git pull origin "%REPO_BRANCH%"
+    if errorlevel 1 echo [WARNING] Could not update repository. You may need to resolve conflicts manually.
+    popd
+) else if exist "%REPO_PATH%\" (
+    echo [WARNING] Directory exists but is not a git repository. Removing and cloning fresh...
+    rmdir /s /q "%REPO_PATH%"
+    call :clone_repo || exit /b 1
+) else (
+    call :clone_repo || exit /b 1
+)
+
+echo [INFO] Exporting environment variables...
+endlocal & (
+    set "ENV_FILE=.\.env"
+    set "MCP_SERVERS_FILE=.\mcp_servers.yaml"
+    set "CUGA_LOGGING_DIR=.\logging"
+    echo [INFO] Exported ENV_FILE=.\.env
+    echo [INFO] Exported MCP_SERVERS_FILE=.\mcp_servers.yaml
+    echo [INFO] Exported CUGA_LOGGING_DIR=.\logging
+)
+
+if not exist ".\logging" (
+    echo [INFO] Creating logging directory...
+    mkdir ".\logging"
+)
+
+REM Optionally run AppWorld setup if the script exists
+if exist "%~dp0setup_appworld.bat" (
+    echo [INFO] Running AppWorld setup...
+    call "%~dp0setup_appworld.bat"
+    if errorlevel 1 (
+        echo [ERROR] AppWorld setup failed
+        exit /b 1
+    )
+) else (
+    echo [WARNING] AppWorld setup script not found. Skipping.
+)
+
+echo.
+echo [SUCCESS] Setup completed successfully!
+echo.
+echo Next steps:
+echo   1. Check the cloned repository at: %REPO_PATH%
+echo   2. Environment variables are now available in this terminal session
+echo   3. Note: Variables will only persist for this terminal session
+echo.
+exit /b 0
+
+:clone_repo
+echo [INFO] Cloning %REPO_URL% (branch: %REPO_BRANCH%)...
+git clone -b "%REPO_BRANCH%" "%REPO_URL%" "%REPO_PATH%"
+if errorlevel 1 (
+    echo [ERROR] Failed to clone repository. Please check your SSH keys and network connection.
+    exit /b 1
+)
+echo [SUCCESS] Repository cloned successfully to %REPO_PATH%
+exit /b 0
diff --git a/setup_m3.bat b/setup_m3.bat
new file mode 100644
index 0000000..2592a3c
--- /dev/null
+++ b/setup_m3.bat
@@ -0,0 +1,229 @@
+@echo off
+REM Windows equivalent of setup_m3.sh
+REM
+REM Clones vakra into vendor\, sets up Python venv, installs deps, downloads
+REM benchmark data, builds Docker image, starts containers.
+REM
+REM Requires: git, docker or podman, python (with venv), HF_TOKEN env var.
+
+setlocal enabledelayedexpansion
+
+set "REPO_URL=https://github.com/IBM/vakra.git"
+set "VENDOR_DIR=.\vendor"
+set "REPO_NAME=vakra"
+set "REPO_PATH=%VENDOR_DIR%\%REPO_NAME%"
+set "DATA_DIR=%REPO_PATH%\data"
+
+REM Mode flags
+set "DOWNLOAD_ONLY=false"
+set "BUILD_ONLY=false"
+set "START_ONLY=false"
+set "VERIFY_ONLY=false"
+set "SKIP_DOWNLOAD=false"
+
+:parse_args
+if "%~1"=="" goto args_done
+if "%~1"=="--download-only" (set "DOWNLOAD_ONLY=true" & shift & goto parse_args)
+if "%~1"=="--build-only"    (set "BUILD_ONLY=true"    & shift & goto parse_args)
+if "%~1"=="--start-only"    (set "START_ONLY=true"    & shift & goto parse_args)
+if "%~1"=="--verify"        (set "VERIFY_ONLY=true"   & shift & goto parse_args)
+if "%~1"=="--skip-download" (set "SKIP_DOWNLOAD=true" & shift & goto parse_args)
+if "%~1"=="--help" goto show_usage
+echo [ERROR] Unknown option: %~1
+goto show_usage
+
+:show_usage
+echo Usage: %~nx0 [OPTIONS]
+echo.
+echo Options:
+echo   --download-only Download data only (no build/start)
+echo   --build-only    Only build image, don't start containers
+echo   --start-only    Only start containers (assumes already built)
+echo   --verify        Only verify containers are running
+echo   --skip-download Skip data download step
+echo   --help          Show this help message
+exit /b 0
+
+:args_done
+echo ============================================================
+echo               Vakra Benchmark Setup Script
+echo ============================================================
+echo.
+
+REM Check prerequisites
+where git >nul 2>&1
+if errorlevel 1 (
+    echo [ERROR] Missing required dependency: git
+    exit /b 1
+)
+set "RUNTIME="
+where docker >nul 2>&1 && set "RUNTIME=docker"
+if "%RUNTIME%"=="" where podman >nul 2>&1 && set "RUNTIME=podman"
+if "%RUNTIME%"=="" (
+    echo [ERROR] Missing required dependency: docker or podman
+    exit /b 1
+)
+echo [INFO] Using container runtime: %RUNTIME%
+
+if "%VERIFY_ONLY%"=="true" (
+    call :verify_containers
+    exit /b !errorlevel!
+)
+
+if "%START_ONLY%"=="true" (
+    call :start_containers || exit /b 1
+    call :verify_containers
+    exit /b !errorlevel!
+)
+
+REM Step 1: Clone or update repo
+if not exist "%VENDOR_DIR%\" (
+    echo [INFO] Creating vendor directory...
+    mkdir "%VENDOR_DIR%"
+)
+if exist "%REPO_PATH%\.git" (
+    echo [INFO] Repository already exists at %REPO_PATH%, pulling latest...
+    pushd "%REPO_PATH%" || exit /b 1
+    git pull origin main 2>nul || git pull origin master 2>nul
+    if errorlevel 1 echo [WARNING] Could not update repository
+    popd
+) else if exist "%REPO_PATH%\" (
+    echo [WARNING] Directory exists but is not a git repository. Removing and cloning fresh...
+    rmdir /s /q "%REPO_PATH%"
+    call :clone_repo || exit /b 1
+) else (
+    call :clone_repo || exit /b 1
+)
+
+REM Step 2: Python env + install deps
+echo [INFO] Step 2: Installing Python dependencies...
+pushd "%REPO_PATH%" || exit /b 1
+if not exist ".venv\" (
+    echo [INFO] Creating Python virtual environment...
+    python -m venv .venv || (echo [ERROR] Failed to create venv & popd & exit /b 1)
+)
+echo [INFO] Activating virtual environment and installing vakra...
+call .venv\Scripts\activate.bat
+pip install -e ".[init]" || (echo [ERROR] vakra install failed & popd & exit /b 1)
+pip install -r requirements_benchmark.txt || (echo [ERROR] benchmark deps install failed & popd & exit /b 1)
+popd
+
+REM Step 3: Download data
+if "%SKIP_DOWNLOAD%"=="false" (
+    if not exist "%DATA_DIR%\" (
+        call :download_data || exit /b 1
+    ) else (
+        dir /b /a "%DATA_DIR%" >nul 2>&1
+        if errorlevel 1 (
+            call :download_data || exit /b 1
+        ) else (
+            echo [INFO] Data directory exists and is not empty, skipping download
+        )
+    )
+)
+
+if "%DOWNLOAD_ONLY%"=="true" (
+    echo [SUCCESS] Setup and data download completed!
+    exit /b 0
+)
+
+REM Build + start
+call :build_image || exit /b 1
+if "%BUILD_ONLY%"=="false" (
+    call :start_containers
+    call :verify_containers
+)
+
+echo.
+echo [SUCCESS] Vakra setup completed successfully!
+echo.
+echo Container Information:
+echo   * capability_1_bi_apis - Tool Chaining MCP Server
+echo   * capability_2_dashboard_apis - Tool Selection MCP Server
+echo   * capability_3_multihop_reasoning - Multi-hop Reasoning MCP Server
+echo   * capability_4_multiturn - Multi-hop Multi-Source MCP Server
+exit /b 0
+
+:clone_repo
+echo [INFO] Cloning %REPO_URL%...
+git clone "%REPO_URL%" "%REPO_PATH%"
+if errorlevel 1 (
+    echo [ERROR] Failed to clone repository. Check SSH keys / network.
+    exit /b 1
+)
+echo [SUCCESS] Repository cloned successfully
+exit /b 0
+
+:download_data
+echo [INFO] Downloading benchmark data from HuggingFace (~30 GB)...
+if "%HF_TOKEN%"=="" (
+    echo [ERROR] HF_TOKEN environment variable is not set
+    echo Set it with: set HF_TOKEN=hf_your_token_here
+    echo Get your token from: https://huggingface.co/settings/tokens
+    exit /b 1
+)
+pushd "%REPO_PATH%" || exit /b 1
+if exist ".venv\Scripts\activate.bat" call .venv\Scripts\activate.bat
+where make >nul 2>&1
+if not errorlevel 1 (
+    make download
+) else (
+    python benchmark_setup.py --download-data
+)
+if errorlevel 1 (
+    echo [ERROR] Failed to download data
+    popd
+    exit /b 1
+)
+popd
+echo [SUCCESS] Data downloaded successfully
+exit /b 0
+
+:build_image
+echo [INFO] Building vakra Docker image using %RUNTIME%...
+pushd "%REPO_PATH%" || exit /b 1
+where make >nul 2>&1
+if not errorlevel 1 (
+    set "DOCKER=%RUNTIME%" && make build
+) else (
+    %RUNTIME% build -t m3_environ -f docker/Dockerfile.unified .
+)
+if errorlevel 1 (
+    echo [ERROR] Failed to build image
+    popd
+    exit /b 1
+)
+popd
+echo [SUCCESS] Image built successfully
+exit /b 0
+
+:start_containers
+echo [INFO] Starting containers using %RUNTIME% compose...
+pushd "%REPO_PATH%" || exit /b 1
+where make >nul 2>&1
+if not errorlevel 1 (
+    set "DOCKER=%RUNTIME%" && make start
+) else (
+    %RUNTIME% compose up -d
+)
+if errorlevel 1 (
+    echo [ERROR] Failed to start containers
+    popd
+    exit /b 1
+)
+popd
+echo [SUCCESS] Containers started successfully
+exit /b 0
+
+:verify_containers
+echo [INFO] Verifying containers...
+%RUNTIME% ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
+for /f %%C in ('%RUNTIME% ps --format "{{.Names}}" ^| findstr /c:"capability_" /c:"" ^| find /c "capability_"') do set "RUNNING=%%C"
+if "%RUNNING%"=="" set "RUNNING=0"
+if %RUNNING% GEQ 4 (
+    echo [SUCCESS] Found %RUNNING% capability containers running
+    exit /b 0
+) else (
+    echo [WARNING] Only %RUNNING% capability containers running (expected 4)
+    exit /b 1
+)

From 51ffa9d87d15fad79c5c1da645b7dbd79564e210 Mon Sep 17 00:00:00 2001
From: Harold Ship <harold@il.ibm.com>
Date: Sun, 17 May 2026 16:48:28 +0300
Subject: [PATCH 5/5] docs: document Windows .bat usage + add pwsh smoke test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

README changes:
- Expand the Windows note in §Installation to mention that every .sh
  has a .bat sibling, and that the simple wrappers run on stock cmd.exe
  (no WSL/Git Bash needed) while the heavy scripts delegate to bash.
- New §Running on Windows in Quick Start with cmd.exe/PowerShell
  examples mirroring the bash examples above it.
- Reference the smoke test and the long-term Python migration (#88).

scripts/test_bat_scripts.ps1 — new pwsh 7 smoke test that verifies
structural invariants of the .bat layer:
  1. every in-scope .sh has a sibling .bat
  2. every .bat starts with `@echo off`
  3. every .bat has an `exit /b` terminator
  4. every delegate shim points to an existing .sh
  5. every shim's `_delegate_to_bash.bat` exists where expected
It doesn't execute the .bat files (cmd.exe isn't available on
mac/linux), but it caught a real bug on its first run — three orphan
`analyze.bat` files delegating to .sh files that don't exist on this
branch (they were added on master after this branch was cut). Test
runs on any host with pwsh:  `pwsh scripts/test_bat_scripts.ps1`.

Orphan deletions: benchmarks/appworld/analyze.bat,
benchmarks/m3/analyze.bat, scripts/analyze.bat. Their .sh counterparts
will arrive when master is merged in; the .bat files can come back at
that point.

.secrets.baseline auto-updated by the detect-secrets hook to refresh
the timestamp after the line-number shift in scripts/model_profiles.bat.
---
 .secrets.baseline               |   4 +-
 README.md                       |  62 ++++++++++++++-
 benchmarks/appworld/analyze.bat |   9 ---
 benchmarks/m3/analyze.bat       |   9 ---
 scripts/analyze.bat             |   8 --
 scripts/test_bat_scripts.ps1    | 131 ++++++++++++++++++++++++++++++++
 6 files changed, 193 insertions(+), 30 deletions(-)
 delete mode 100644 benchmarks/appworld/analyze.bat
 delete mode 100644 benchmarks/m3/analyze.bat
 delete mode 100644 scripts/analyze.bat
 create mode 100644 scripts/test_bat_scripts.ps1

diff --git a/.secrets.baseline b/.secrets.baseline
index 2e4c4fd..2fd95b3 100644
--- a/.secrets.baseline
+++ b/.secrets.baseline
@@ -3,7 +3,7 @@
     "files": "^.secrets.baseline$",
     "lines": null
   },
-  "generated_at": "2026-05-17T13:36:51Z",
+  "generated_at": "2026-05-17T13:48:18Z",
   "plugins_used": [
     {
       "name": "AWSKeyDetector"
@@ -111,7 +111,7 @@
       {
         "hashed_secret": "b45fc270bb9e9ddf4829b9124321ce244d38668e",
         "is_verified": false,
-        "line_number": 72,
+        "line_number": 79,
         "type": "Secret Keyword",
         "verified_result": null
       }
diff --git a/README.md b/README.md
index 6ea7d17..9a7983c 100644
--- a/README.md
+++ b/README.md
@@ -39,10 +39,17 @@ git clone https://github.com/cuga-project/cuga-eval.git
 cd cuga-eval
 ```
 
-> **Windows users (WSL):** if you cloned with Windows git (default `core.autocrlf=true`),
+> **Windows users:** every `.sh` script in this repo has a sibling `.bat`. You don't need
+> WSL or Git Bash for the simple wrappers (`setup_cuga.bat`, `run_app.bat`, `run_registry.bat`,
+> `viz.bat`, `model_profiles.bat`, the per-benchmark `analyze.bat`, etc.) — they run on
+> stock `cmd.exe`. The heavier scripts (eval/compare/clean and the `m3_pad_to_cap_verify`
+> helper) delegate to bash via Git Bash or WSL because they use POSIX-only features. See
+> [Running on Windows](#running-on-windows) below.
+>
+> If you're using WSL and cloned with Windows git (default `core.autocrlf=true`),
 > the `*.sh` and `*.env` files end up with CRLF line endings, which break bash under WSL
 > (`$'\r': command not found`) and `source`d env files. Run `fix_line_endings.bat`
-> (double-click in Explorer, or run from cmd.exe / PowerShell) once before running any
+> (double-click in Explorer, or run from `cmd.exe` / PowerShell) once before running any
 > setup scripts under WSL.
 
 ### 2. Ensure CUGA Agent is in parent directory
@@ -128,6 +135,57 @@ cd benchmarks/m3 && ./eval.sh
 cd benchmarks/appworld && ./eval.sh
 ```
 
+### Running on Windows
+
+Every script has a `.bat` sibling. Same flags, same semantics; just substitute the
+extension and use `\` instead of `/`:
+
+```bat
+:: Top-level dispatcher (these scripts delegate to bash — see note below)
+scripts\eval.bat --benchmark bpo
+scripts\eval.bat --benchmark m3 --model-profile gpt-oss
+scripts\compare.bat --benchmark bpo --runs 3
+
+:: Setup (pure cmd.exe — no bash required)
+setup_cuga.bat
+setup_m3.bat --verify
+setup_appworld.bat
+
+:: Per-benchmark, from the benchmark dir
+cd benchmarks\bpo && eval.bat
+cd benchmarks\m3 && run_registry.bat
+
+:: Run from PowerShell the same way — pwsh launches .bat via cmd.exe
+.\setup_cuga.bat
+.\scripts\eval.bat --benchmark bpo
+```
+
+The `.bat` files fall into two groups:
+
+- **Pure `cmd.exe` ports** — setup scripts, env loaders, registry runners, app
+  launchers, model profiles, the analyze and viz thin-wrappers. Work on a vanilla
+  Windows install with `cmd.exe` or PowerShell. No bash needed.
+- **Bash-delegate shims** — the heavy eval/compare/clean scripts and
+  `m3_pad_to_cap_verify`. These use POSIX features (signal traps, `lsof`, `pkill`,
+  process substitution, sourceable function libraries, embedded `python3` here-docs)
+  that don't have clean `cmd.exe` equivalents, so each shim calls
+  [`benchmarks\helpers\_delegate_to_bash.bat`](benchmarks/helpers/_delegate_to_bash.bat),
+  which finds a `bash` in this order: Git Bash (well-known install paths) →
+  `bash` on `PATH` → WSL. Install [Git for Windows](https://git-scm.com/download/win)
+  (provides Git Bash) or run `wsl --install` if neither is present.
+
+A smoke test for the `.bat` scripts ships at `scripts/test_bat_scripts.ps1`. It
+runs on any platform with PowerShell 7+:
+
+```bash
+pwsh scripts/test_bat_scripts.ps1
+```
+
+It validates that every `.sh` has a `.bat` sibling, that each `.bat` is well-formed,
+and that the delegate shims point to existing `.sh` files. Long-term, this whole
+layer will move to Python (one entrypoint instead of two parallel script trees) —
+tracked in [issue #88](../../issues/88).
+
 ### Model profiles
 
 Available profiles: `gpt-oss`, `gpt4o`, `gpt4.1`, `opus4.5`
diff --git a/benchmarks/appworld/analyze.bat b/benchmarks/appworld/analyze.bat
deleted file mode 100644
index f3b679b..0000000
--- a/benchmarks/appworld/analyze.bat
+++ /dev/null
@@ -1,9 +0,0 @@
-@echo off
-REM Windows equivalent of benchmarks/appworld/analyze.sh
-REM Thin wrapper around scripts/analyze.bat with --benchmark appworld.
-
-setlocal
-set "SCRIPT_DIR=%~dp0"
-if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
-call "%SCRIPT_DIR%\..\..\scripts\analyze.bat" --benchmark appworld %*
-exit /b %errorlevel%
diff --git a/benchmarks/m3/analyze.bat b/benchmarks/m3/analyze.bat
deleted file mode 100644
index 9905eff..0000000
--- a/benchmarks/m3/analyze.bat
+++ /dev/null
@@ -1,9 +0,0 @@
-@echo off
-REM Windows equivalent of benchmarks/m3/analyze.sh
-REM Thin wrapper around scripts/analyze.bat with --benchmark m3.
-
-setlocal
-set "SCRIPT_DIR=%~dp0"
-if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%"
-call "%SCRIPT_DIR%\..\..\scripts\analyze.bat" --benchmark m3 %*
-exit /b %errorlevel%
diff --git a/scripts/analyze.bat b/scripts/analyze.bat
deleted file mode 100644
index 06dae69..0000000
--- a/scripts/analyze.bat
+++ /dev/null
@@ -1,8 +0,0 @@
-@echo off
-REM Windows equivalent of scripts/analyze.sh — delegates to bash (uses
-REM bash arrays for --bundles / --task-ids, sources config .conf via source).
-setlocal
-set "_THIS=%~dp0"
-if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%"
-call "%_THIS%\..\benchmarks\helpers\_delegate_to_bash.bat" "%_THIS%\analyze.sh" %*
-exit /b %errorlevel%
diff --git a/scripts/test_bat_scripts.ps1 b/scripts/test_bat_scripts.ps1
new file mode 100644
index 0000000..98ee32c
--- /dev/null
+++ b/scripts/test_bat_scripts.ps1
@@ -0,0 +1,131 @@
+#!/usr/bin/env pwsh
+# Smoke-test the .bat scripts that mirror the .sh scripts in this repo.
+#
+# Runs on any platform with PowerShell 7+ (`pwsh`). Does NOT actually execute
+# the .bat files — cmd.exe isn't available on macOS/Linux — but verifies
+# structural invariants that catch typical authoring mistakes:
+#   1. Every in-scope .sh has a sibling .bat
+#   2. Every .bat starts with `@echo off`
+#   3. Every .bat terminates the main flow with `exit /b`
+#   4. Delegate shims reference an actually-existing .sh
+#   5. `_delegate_to_bash.bat` exists where delegates expect it
+#
+# On Windows, this same script can be extended to actually invoke each .bat
+# with --help (where supported) and check the exit code.
+#
+# Usage:  pwsh scripts/test_bat_scripts.ps1
+
+$ErrorActionPreference = 'Stop'
+$repoRoot = (Resolve-Path "$PSScriptRoot/..").Path
+
+$failures = [System.Collections.Generic.List[string]]::new()
+$passes   = 0
+function Fail([string]$msg) { $script:failures.Add($msg); Write-Host "  FAIL  $msg" -ForegroundColor Red }
+function Pass([string]$msg) { $script:passes++;          Write-Host "  ok    $msg" -ForegroundColor DarkGreen }
+
+$excludedDirs = @('vendor', 'node_modules', '.venv', '.git', 'site-packages')
+$excludedPathFrag = 'benchmarks' + [IO.Path]::DirectorySeparatorChar + 'appworld' + [IO.Path]::DirectorySeparatorChar + 'appworld'
+
+function IsExcluded([string]$path) {
+    foreach ($d in $excludedDirs) {
+        if ($path -match ([regex]::Escape([IO.Path]::DirectorySeparatorChar + $d + [IO.Path]::DirectorySeparatorChar))) { return $true }
+    }
+    if ($path -like "*$excludedPathFrag*") { return $true }
+    return $false
+}
+
+function RelPath([string]$full) { return $full.Substring($repoRoot.Length + 1) }
+
+$shFiles  = Get-ChildItem -Path $repoRoot -Recurse -Filter '*.sh'  -File | Where-Object { -not (IsExcluded $_.FullName) }
+$batFiles = Get-ChildItem -Path $repoRoot -Recurse -Filter '*.bat' -File | Where-Object { -not (IsExcluded $_.FullName) }
+
+Write-Host "Repo root: $repoRoot"
+Write-Host "Found $($shFiles.Count) .sh files and $($batFiles.Count) .bat files in scope."
+
+# ---- [1] every .sh has a sibling .bat -----------------------------------
+Write-Host "`n[1] every .sh has a sibling .bat" -ForegroundColor Cyan
+foreach ($sh in $shFiles) {
+    $sibling = [IO.Path]::ChangeExtension($sh.FullName, '.bat')
+    if (Test-Path -LiteralPath $sibling) { Pass (RelPath $sh.FullName) }
+    else { Fail "missing .bat sibling for $(RelPath $sh.FullName)" }
+}
+
+# ---- [2] every .bat starts with @echo off --------------------------------
+Write-Host "`n[2] every .bat starts with '@echo off'" -ForegroundColor Cyan
+foreach ($bat in $batFiles) {
+    $first = (Get-Content -LiteralPath $bat.FullName -TotalCount 1).Trim()
+    if ($first -eq '@echo off') { Pass (RelPath $bat.FullName) }
+    else { Fail "$(RelPath $bat.FullName) first line is '$first', expected '@echo off'" }
+}
+
+# ---- [3] every .bat has an `exit /b` terminator --------------------------
+# common.bat is intentionally a placeholder and uses `exit /b 0` early; ok.
+Write-Host "`n[3] every .bat contains 'exit /b' somewhere" -ForegroundColor Cyan
+foreach ($bat in $batFiles) {
+    $content = Get-Content -LiteralPath $bat.FullName -Raw
+    if ($content -match 'exit\s+/b') { Pass (RelPath $bat.FullName) }
+    else { Fail "$(RelPath $bat.FullName) has no 'exit /b' terminator" }
+}
+
+# ---- [4] delegate shims reference a real .sh -----------------------------
+# Skip _delegate_to_bash.bat itself — its REM comments contain example
+# placeholders like "<absolute-or-relative-path-to-script.sh>" that aren't
+# actual code paths.
+Write-Host "`n[4] delegate shims reference an existing .sh" -ForegroundColor Cyan
+$delegateRegex = [regex]'_delegate_to_bash\.bat"\s+"([^"]+)"'
+foreach ($bat in $batFiles) {
+    if ($bat.Name -eq '_delegate_to_bash.bat') { continue }
+    # Strip REM-prefixed lines so example syntax in comment blocks is ignored.
+    $codeLines = (Get-Content -LiteralPath $bat.FullName) | Where-Object { $_ -notmatch '^\s*(REM|::|@REM)\s' }
+    $content = $codeLines -join "`n"
+    $m = $delegateRegex.Match($content)
+    if (-not $m.Success) { continue }  # not a delegate shim
+    $target = $m.Groups[1].Value
+    # Expand %_THIS% to the .bat's own directory, normalise separators.
+    $batDir = Split-Path -Parent $bat.FullName
+    $resolved = $target -replace '%_THIS%', $batDir
+    $resolved = $resolved -replace '\\', ([IO.Path]::DirectorySeparatorChar)
+    # Collapse parent traversals (Resolve-Path errors if the file is missing)
+    try {
+        $abs = [IO.Path]::GetFullPath($resolved)
+    } catch { $abs = $resolved }
+    if (Test-Path -LiteralPath $abs) {
+        Pass "$(RelPath $bat.FullName) -> $(Split-Path -Leaf $abs)"
+    } else {
+        Fail "$(RelPath $bat.FullName) delegates to missing $target (resolved: $abs)"
+    }
+}
+
+# ---- [5] every delegate shim's _delegate_to_bash.bat actually exists -----
+# Skip the helper itself; its own REM block shows an example `call` statement.
+Write-Host "`n[5] _delegate_to_bash.bat exists where shims expect it" -ForegroundColor Cyan
+$delegateHelperRegex = [regex]'call\s+"([^"]*_delegate_to_bash\.bat)"'
+foreach ($bat in $batFiles) {
+    if ($bat.Name -eq '_delegate_to_bash.bat') { continue }
+    $codeLines = (Get-Content -LiteralPath $bat.FullName) | Where-Object { $_ -notmatch '^\s*(REM|::|@REM)\s' }
+    $content = $codeLines -join "`n"
+    $m = $delegateHelperRegex.Match($content)
+    if (-not $m.Success) { continue }
+    $target = $m.Groups[1].Value
+    $batDir = Split-Path -Parent $bat.FullName
+    $resolved = $target -replace '%_THIS%', $batDir
+    $resolved = $resolved -replace '\\', ([IO.Path]::DirectorySeparatorChar)
+    try { $abs = [IO.Path]::GetFullPath($resolved) } catch { $abs = $resolved }
+    if (Test-Path -LiteralPath $abs) {
+        Pass (RelPath $bat.FullName)
+    } else {
+        Fail "$(RelPath $bat.FullName) calls missing $target (resolved: $abs)"
+    }
+}
+
+# ---- summary -------------------------------------------------------------
+Write-Host "`n=== Summary ===" -ForegroundColor Cyan
+Write-Host ("checks passed: {0}" -f $passes) -ForegroundColor Green
+Write-Host ("checks failed: {0}" -f $failures.Count) -ForegroundColor ($(if ($failures.Count -eq 0) { 'Green' } else { 'Red' }))
+if ($failures.Count -gt 0) {
+    Write-Host "`nFailures:" -ForegroundColor Red
+    $failures | ForEach-Object { Write-Host "  - $_" -ForegroundColor Red }
+    exit 1
+}
+Write-Host "`nAll structural checks passed."
+exit 0