From 971de2b352ccaab16c16729f2954132618af1225 Mon Sep 17 00:00:00 2001 From: Harold Ship Date: Sun, 10 May 2026 12:08:35 +0300 Subject: [PATCH 1/5] fix: pin shell scripts to LF line endings via .gitattributes Prevents Windows clones (with core.autocrlf=true) from checking out *.sh files with CRLF, which breaks bash under WSL. Closes #74 Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitattributes | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..f80f339 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,4 @@ +# Force LF line endings for shell scripts so they run correctly under +# bash/WSL even when the repo is cloned on Windows with core.autocrlf=true. +*.sh text eol=lf +*.bash text eol=lf From 4e7a6fbe05de77d7c56e910422d5f50346fbdfc6 Mon Sep 17 00:00:00 2001 From: Harold Ship Date: Mon, 11 May 2026 12:47:03 +0300 Subject: [PATCH 2/5] fix: add Windows .bat to strip CRLF from *.sh and *.env Expands the CRLF fix beyond the .gitattributes pin so existing Windows clones can recover without re-cloning: * .gitattributes now also pins *.env, *.yaml, *.yml, *.toml to LF (only *.sh and *.bash were covered before, so *.env files like appworld.env were still checked out as CRLF on Windows). * Adds fix_line_endings.bat at repo root: a self-contained Windows batch + PowerShell script that walks the repo and strips CRLF from *.sh and *.env files, skipping .git/.venv/vendor/node_modules. * Pins *.bat/*.cmd/*.ps1 to CRLF (cmd.exe expects CRLF in batch files). * README: short Windows/WSL note pointing users at the .bat. A bash self-heal prelude was attempted but is fundamentally unworkable: bash cannot parse a script whose own keywords end in \r ("fi\r" != "fi"), so the heal code never gets a chance to run. Refs #74 Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitattributes | 15 +++++++++++++-- .secrets.baseline | 11 ++++++++++- README.md | 6 ++++++ fix_line_endings.bat | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 fix_line_endings.bat diff --git a/.gitattributes b/.gitattributes index f80f339..7210b43 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,15 @@ -# Force LF line endings for shell scripts so they run correctly under -# bash/WSL even when the repo is cloned on Windows with core.autocrlf=true. +# Force LF line endings for shell scripts and config text files so they +# work correctly under bash/WSL even when the repo is cloned on Windows +# with core.autocrlf=true. CRLF in *.env files breaks `source`, and CRLF +# in *.sh files breaks the bash interpreter ("$'\r': command not found"). *.sh text eol=lf *.bash text eol=lf +*.env text eol=lf +*.yaml text eol=lf +*.yml text eol=lf +*.toml text eol=lf + +# Windows batch scripts must keep CRLF so cmd.exe parses them reliably. +*.bat text eol=crlf +*.cmd text eol=crlf +*.ps1 text eol=crlf diff --git a/.secrets.baseline b/.secrets.baseline index f33da84..3bd68e3 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "^.secrets.baseline$", "lines": null }, - "generated_at": "2026-05-10T11:36:08Z", + "generated_at": "2026-05-11T09:46:48Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -107,6 +107,15 @@ "verified_result": null } ], + "README.md": [ + { + "hashed_secret": "b45fc270bb9e9ddf4829b9124321ce244d38668e", + "is_verified": false, + "line_number": 72, + "type": "Secret Keyword", + "verified_result": null + } + ], "benchmarks/appworld/debug/example.py": [ { "hashed_secret": "c18006fc138809314751cd1991f1e0b820fabd37", diff --git a/README.md b/README.md index b2a5b1c..6ea7d17 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,12 @@ git clone https://github.com/cuga-project/cuga-eval.git cd cuga-eval ``` +> **Windows users (WSL):** if you cloned with Windows git (default `core.autocrlf=true`), +> the `*.sh` and `*.env` files end up with CRLF line endings, which break bash under WSL +> (`$'\r': command not found`) and `source`d env files. Run `fix_line_endings.bat` +> (double-click in Explorer, or run from cmd.exe / PowerShell) once before running any +> setup scripts under WSL. + ### 2. Ensure CUGA Agent is in parent directory The `cuga-agent` repository must be located at `../cuga-agent` (one directory up from this repository). diff --git a/fix_line_endings.bat b/fix_line_endings.bat new file mode 100644 index 0000000..5f5f6ff --- /dev/null +++ b/fix_line_endings.bat @@ -0,0 +1,37 @@ +@echo off +REM ============================================================================ +REM fix_line_endings.bat +REM +REM Strips CRLF line endings from *.sh and *.env files in this repo so they +REM work under WSL bash. Run this once on Windows after cloning the repo (or +REM after pulling, if you have stale CRLF files), BEFORE running setup_cuga.sh +REM or setup_m3.sh under WSL. +REM +REM Usage: double-click, or from cmd.exe / PowerShell: fix_line_endings.bat +REM ============================================================================ + +setlocal +cd /d "%~dp0" + +echo. +echo Normalizing *.sh and *.env line endings (CRLF -^> LF) under: +echo %CD% +echo. + +powershell -NoProfile -ExecutionPolicy Bypass -Command "$ErrorActionPreference='Stop'; $root = (Get-Location).Path; $count = 0; $files = Get-ChildItem -Path . -Recurse -File -Include *.sh,*.env | Where-Object { $_.FullName -notmatch '\\(\.git|\.venv|vendor|node_modules)\\' }; foreach ($f in $files) { $b = [IO.File]::ReadAllBytes($f.FullName); if ($b -contains 13) { $c = New-Object Collections.Generic.List[byte]; foreach ($x in $b) { if ($x -ne 13) { $c.Add($x) } }; [IO.File]::WriteAllBytes($f.FullName, $c.ToArray()); Write-Host (' normalized: ' + $f.FullName.Substring($root.Length + 1)); $count++ } }; Write-Host ''; Write-Host ('Normalized ' + $count + ' file(s).')" + +if errorlevel 1 ( + echo. + echo ERROR: normalization failed. See PowerShell error above. + exit /b 1 +) + +echo. +echo Done. You can now run setup_cuga.sh / setup_m3.sh under WSL. +echo. + +REM Pause so the window stays open if double-clicked from Explorer. +if defined PROMPT goto :end +pause +:end +endlocal From b71e183157db0e4d5afd82c06768a7239b167823 Mon Sep 17 00:00:00 2001 From: Harold Ship Date: Mon, 11 May 2026 13:13:41 +0300 Subject: [PATCH 3/5] fix: make fix_line_endings.bat exclusions cross-platform The exclusion regex used \\(...)\\ which only matches Windows backslash separators. Verified with pwsh on macOS: vendor/ and node_modules/ paths were silently being normalized because the regex never matched their forward-slash paths. Switch to [\\/](...)[\\/] so exclusions work whether the script is invoked under Windows cmd.exe (backslash paths) or pwsh on any OS (which is how this was discovered). Tested via pwsh on macOS against a fixture with .git/, .venv/, vendor/, node_modules/ paths -- all correctly skipped now. Refs #74 Co-Authored-By: Claude Opus 4.7 (1M context) --- fix_line_endings.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fix_line_endings.bat b/fix_line_endings.bat index 5f5f6ff..4bfb851 100644 --- a/fix_line_endings.bat +++ b/fix_line_endings.bat @@ -18,7 +18,7 @@ echo Normalizing *.sh and *.env line endings (CRLF -^> LF) under: echo %CD% echo. -powershell -NoProfile -ExecutionPolicy Bypass -Command "$ErrorActionPreference='Stop'; $root = (Get-Location).Path; $count = 0; $files = Get-ChildItem -Path . -Recurse -File -Include *.sh,*.env | Where-Object { $_.FullName -notmatch '\\(\.git|\.venv|vendor|node_modules)\\' }; foreach ($f in $files) { $b = [IO.File]::ReadAllBytes($f.FullName); if ($b -contains 13) { $c = New-Object Collections.Generic.List[byte]; foreach ($x in $b) { if ($x -ne 13) { $c.Add($x) } }; [IO.File]::WriteAllBytes($f.FullName, $c.ToArray()); Write-Host (' normalized: ' + $f.FullName.Substring($root.Length + 1)); $count++ } }; Write-Host ''; Write-Host ('Normalized ' + $count + ' file(s).')" +powershell -NoProfile -ExecutionPolicy Bypass -Command "$ErrorActionPreference='Stop'; $root = (Get-Location).Path; $count = 0; $files = Get-ChildItem -Path . -Recurse -File -Include *.sh,*.env | Where-Object { $_.FullName -notmatch '[\\/](\.git|\.venv|vendor|node_modules)[\\/]' }; foreach ($f in $files) { $b = [IO.File]::ReadAllBytes($f.FullName); if ($b -contains 13) { $c = New-Object Collections.Generic.List[byte]; foreach ($x in $b) { if ($x -ne 13) { $c.Add($x) } }; [IO.File]::WriteAllBytes($f.FullName, $c.ToArray()); Write-Host (' normalized: ' + $f.FullName.Substring($root.Length + 1)); $count++ } }; Write-Host ''; Write-Host ('Normalized ' + $count + ' file(s).')" if errorlevel 1 ( echo. From 7493f57b1c509449dfb2f6e533206a4157880992 Mon Sep 17 00:00:00 2001 From: Harold Ship Date: Sun, 17 May 2026 16:37:54 +0300 Subject: [PATCH 4/5] feat: add Windows .bat equivalents for all .sh scripts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every .sh in the repo now has a sibling .bat (35 scripts + 1 shared _delegate_to_bash.bat helper) so Windows users have a path that doesn't require Git Bash for the simple cases. Two translation styles: - Pure cmd.exe ports (15 files): the simple wrappers and env-loaders — setup_cuga, setup_m3, setup_appworld, load_env, run_registry (helper + 4 per-benchmark stubs), model_profiles, viz, the analyze thin-stubs, all run_app/run_eval wrappers. These are usable on a vanilla Windows install. - bash-delegate shims (19 files): for the heavy scripts that use POSIX-only features (lsof, pkill, signal traps, process substitution, sourceable function libraries, embedded Python heredocs, mktemp, comm, find -mindepth/-maxdepth). Each shim is ~6 lines and calls _delegate_to_bash.bat, which tries Git Bash (well-known install paths) -> bash on PATH -> WSL -> friendly install instructions. benchmarks/helpers/common.bat is a placeholder noting that the bash function library it mirrors can't be sourced into cmd.exe; callers that delegate to bash source the .sh version directly. .secrets.baseline gets one new entry for the openai/gpt-oss-120b model name in scripts/model_profiles.bat (false positive — same string is already in scripts/model_profiles.sh; cmd.exe's lack of inline comment syntax means the standard pragma can't be embedded on the line, so the baseline is the cleaner workaround). The longer-term cleanup — move logic into Python so .sh and .bat both become ~5-line wrappers around `uv run python -m ...` — is tracked in research-rpa/cuga-internal-evaluation#88. --- .secrets.baseline | 11 +- benchmarks/appworld/analyze.bat | 9 + benchmarks/appworld/compare.bat | 7 + benchmarks/appworld/eval.bat | 8 + benchmarks/appworld/run_app.bat | 19 ++ benchmarks/appworld/run_eval.bat | 13 + benchmarks/appworld/run_registry.bat | 8 + benchmarks/bpo/compare.bat | 7 + benchmarks/bpo/eval.bat | 7 + benchmarks/bpo/run_app.bat | 19 ++ benchmarks/bpo/run_registry.bat | 7 + benchmarks/helpers/_delegate_to_bash.bat | 60 +++++ benchmarks/helpers/common.bat | 24 ++ benchmarks/helpers/load_env.bat | 55 +++++ benchmarks/helpers/run_registry.bat | 28 +++ benchmarks/m3/analyze.bat | 9 + benchmarks/m3/clean.bat | 8 + benchmarks/m3/compare.bat | 7 + benchmarks/m3/eval.bat | 11 + benchmarks/m3/eval/scripts/monitor_eval.bat | 7 + .../m3/eval/scripts/run_eval_background.bat | 8 + benchmarks/m3/eval/scripts/setup_m3_eval.bat | 8 + benchmarks/m3/run_registry.bat | 7 + benchmarks/m3/run_with_container.bat | 7 + benchmarks/oak_health_insurance/compare.bat | 7 + benchmarks/oak_health_insurance/eval.bat | 7 + benchmarks/oak_health_insurance/run_app.bat | 19 ++ .../oak_health_insurance/run_registry.bat | 7 + scripts/analyze.bat | 8 + scripts/compare.bat | 7 + scripts/eval.bat | 9 + scripts/m3_pad_to_cap_verify.bat | 11 + scripts/model_profiles.bat | 56 +++++ scripts/viz.bat | 27 +++ setup_appworld.bat | 58 +++++ setup_cuga.bat | 93 +++++++ setup_m3.bat | 229 ++++++++++++++++++ 37 files changed, 891 insertions(+), 1 deletion(-) create mode 100644 benchmarks/appworld/analyze.bat create mode 100644 benchmarks/appworld/compare.bat create mode 100644 benchmarks/appworld/eval.bat create mode 100644 benchmarks/appworld/run_app.bat create mode 100644 benchmarks/appworld/run_eval.bat create mode 100644 benchmarks/appworld/run_registry.bat create mode 100644 benchmarks/bpo/compare.bat create mode 100644 benchmarks/bpo/eval.bat create mode 100644 benchmarks/bpo/run_app.bat create mode 100644 benchmarks/bpo/run_registry.bat create mode 100644 benchmarks/helpers/_delegate_to_bash.bat create mode 100644 benchmarks/helpers/common.bat create mode 100644 benchmarks/helpers/load_env.bat create mode 100644 benchmarks/helpers/run_registry.bat create mode 100644 benchmarks/m3/analyze.bat create mode 100644 benchmarks/m3/clean.bat create mode 100644 benchmarks/m3/compare.bat create mode 100644 benchmarks/m3/eval.bat create mode 100644 benchmarks/m3/eval/scripts/monitor_eval.bat create mode 100644 benchmarks/m3/eval/scripts/run_eval_background.bat create mode 100644 benchmarks/m3/eval/scripts/setup_m3_eval.bat create mode 100644 benchmarks/m3/run_registry.bat create mode 100644 benchmarks/m3/run_with_container.bat create mode 100644 benchmarks/oak_health_insurance/compare.bat create mode 100644 benchmarks/oak_health_insurance/eval.bat create mode 100644 benchmarks/oak_health_insurance/run_app.bat create mode 100644 benchmarks/oak_health_insurance/run_registry.bat create mode 100644 scripts/analyze.bat create mode 100644 scripts/compare.bat create mode 100644 scripts/eval.bat create mode 100644 scripts/m3_pad_to_cap_verify.bat create mode 100644 scripts/model_profiles.bat create mode 100644 scripts/viz.bat create mode 100644 setup_appworld.bat create mode 100644 setup_cuga.bat create mode 100644 setup_m3.bat diff --git a/.secrets.baseline b/.secrets.baseline index 3bd68e3..2e4c4fd 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "^.secrets.baseline$", "lines": null }, - "generated_at": "2026-05-11T09:46:48Z", + "generated_at": "2026-05-17T13:36:51Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -315,6 +315,15 @@ "type": "Hex High Entropy String", "verified_result": null } + ], + "scripts/model_profiles.bat": [ + { + "hashed_secret": "af89b35ce32cfc9eaf4c102325da47616e6eff93", + "is_verified": false, + "line_number": 18, + "type": "Base64 High Entropy String", + "verified_result": null + } ] }, "version": "0.13.1+ibm.64.dss", diff --git a/benchmarks/appworld/analyze.bat b/benchmarks/appworld/analyze.bat new file mode 100644 index 0000000..f3b679b --- /dev/null +++ b/benchmarks/appworld/analyze.bat @@ -0,0 +1,9 @@ +@echo off +REM Windows equivalent of benchmarks/appworld/analyze.sh +REM Thin wrapper around scripts/analyze.bat with --benchmark appworld. + +setlocal +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" +call "%SCRIPT_DIR%\..\..\scripts\analyze.bat" --benchmark appworld %* +exit /b %errorlevel% diff --git a/benchmarks/appworld/compare.bat b/benchmarks/appworld/compare.bat new file mode 100644 index 0000000..fddd539 --- /dev/null +++ b/benchmarks/appworld/compare.bat @@ -0,0 +1,7 @@ +@echo off +REM Windows equivalent of benchmarks/appworld/compare.sh — delegates to bash. +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\compare.sh" %* +exit /b %errorlevel% diff --git a/benchmarks/appworld/eval.bat b/benchmarks/appworld/eval.bat new file mode 100644 index 0000000..4f355c8 --- /dev/null +++ b/benchmarks/appworld/eval.bat @@ -0,0 +1,8 @@ +@echo off +REM Windows equivalent of benchmarks/appworld/eval.sh — delegates to bash +REM (traps, kill -0, lsof, process substitution, find with -mindepth). +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\eval.sh" %* +exit /b %errorlevel% diff --git a/benchmarks/appworld/run_app.bat b/benchmarks/appworld/run_app.bat new file mode 100644 index 0000000..dc1567c --- /dev/null +++ b/benchmarks/appworld/run_app.bat @@ -0,0 +1,19 @@ +@echo off +REM Windows equivalent of benchmarks/appworld/run_app.sh +REM Loads env and starts AppWorld. + +setlocal +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" +pushd "%SCRIPT_DIR%\..\.." >nul +set "PROJECT_ROOT=%CD%" +popd >nul + +echo Loading AppWorld configuration... +call "%PROJECT_ROOT%\benchmarks\helpers\load_env.bat" "appworld" + +echo. +echo Starting AppWorld... +cd /d "%PROJECT_ROOT%" +uv run cuga start appworld +exit /b %errorlevel% diff --git a/benchmarks/appworld/run_eval.bat b/benchmarks/appworld/run_eval.bat new file mode 100644 index 0000000..a14a943 --- /dev/null +++ b/benchmarks/appworld/run_eval.bat @@ -0,0 +1,13 @@ +@echo off +REM Windows equivalent of benchmarks/appworld/run_eval.sh +REM Loads AppWorld env and runs cuga-eval. + +setlocal +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" +pushd "%SCRIPT_DIR%\..\.." >nul +set "PROJECT_ROOT=%CD%" +popd >nul +call "%PROJECT_ROOT%\benchmarks\helpers\load_env.bat" "appworld" +cuga-eval appworld %* +exit /b %errorlevel% diff --git a/benchmarks/appworld/run_registry.bat b/benchmarks/appworld/run_registry.bat new file mode 100644 index 0000000..bac0aab --- /dev/null +++ b/benchmarks/appworld/run_registry.bat @@ -0,0 +1,8 @@ +@echo off +REM Windows equivalent of benchmarks/appworld/run_registry.sh +REM Delegates to the generic helper. +setlocal +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" +call "%SCRIPT_DIR%\..\helpers\run_registry.bat" "appworld" +exit /b %errorlevel% diff --git a/benchmarks/bpo/compare.bat b/benchmarks/bpo/compare.bat new file mode 100644 index 0000000..e1eb1bd --- /dev/null +++ b/benchmarks/bpo/compare.bat @@ -0,0 +1,7 @@ +@echo off +REM Windows equivalent of benchmarks/bpo/compare.sh — delegates to bash. +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\compare.sh" %* +exit /b %errorlevel% diff --git a/benchmarks/bpo/eval.bat b/benchmarks/bpo/eval.bat new file mode 100644 index 0000000..b6d0e05 --- /dev/null +++ b/benchmarks/bpo/eval.bat @@ -0,0 +1,7 @@ +@echo off +REM Windows equivalent of benchmarks/bpo/eval.sh — delegates to bash. +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\eval.sh" %* +exit /b %errorlevel% diff --git a/benchmarks/bpo/run_app.bat b/benchmarks/bpo/run_app.bat new file mode 100644 index 0000000..f3c414b --- /dev/null +++ b/benchmarks/bpo/run_app.bat @@ -0,0 +1,19 @@ +@echo off +REM Windows equivalent of benchmarks/bpo/run_app.sh +REM Loads env and runs the BPO FastAPI app on port 8095. + +setlocal +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" +pushd "%SCRIPT_DIR%\..\.." >nul +set "PROJECT_ROOT=%CD%" +popd >nul + +echo Loading BPO configuration... +call "%PROJECT_ROOT%\benchmarks\helpers\load_env.bat" "bpo" + +echo. +echo Starting BPO FastAPI app on port 8095... +cd /d "%PROJECT_ROOT%" +uv run uvicorn benchmarks.bpo.main:app --reload --port 8095 +exit /b %errorlevel% diff --git a/benchmarks/bpo/run_registry.bat b/benchmarks/bpo/run_registry.bat new file mode 100644 index 0000000..7787dc8 --- /dev/null +++ b/benchmarks/bpo/run_registry.bat @@ -0,0 +1,7 @@ +@echo off +REM Windows equivalent of benchmarks/bpo/run_registry.sh +setlocal +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" +call "%SCRIPT_DIR%\..\helpers\run_registry.bat" "bpo" +exit /b %errorlevel% diff --git a/benchmarks/helpers/_delegate_to_bash.bat b/benchmarks/helpers/_delegate_to_bash.bat new file mode 100644 index 0000000..aedb26e --- /dev/null +++ b/benchmarks/helpers/_delegate_to_bash.bat @@ -0,0 +1,60 @@ +@echo off +REM Shared helper: invokes a .sh script via Git Bash or WSL, forwarding all args. +REM +REM Usage (from another .bat): +REM call "\_delegate_to_bash.bat" "" %* +REM +REM Rationale: many of the .sh scripts in this repo use POSIX-only features +REM (process substitution, traps, lsof, pkill, comm, find -mindepth, mktemp, +REM heredocs, etc.) that don't have clean cmd.exe equivalents. Rather than +REM ship subtly-broken cmd.exe ports, we delegate to a real bash. A native +REM Python port is tracked in the follow-up issue. + +setlocal enabledelayedexpansion + +if "%~1"=="" ( + echo [ERROR] _delegate_to_bash.bat called without a script path + exit /b 2 +) +set "_SCRIPT=%~1" +shift + +if not exist "%_SCRIPT%" ( + echo [ERROR] Script not found: %_SCRIPT% + exit /b 2 +) + +REM Try Git Bash in well-known install locations +for %%G in ( + "%ProgramFiles%\Git\bin\bash.exe" + "%ProgramFiles(x86)%\Git\bin\bash.exe" + "%LocalAppData%\Programs\Git\bin\bash.exe" +) do ( + if exist %%G ( + %%G "%_SCRIPT%" %* + exit /b !errorlevel! + ) +) + +REM Then any bash on PATH (e.g. msys2, cygwin) +where bash >nul 2>&1 +if not errorlevel 1 ( + bash "%_SCRIPT%" %* + exit /b !errorlevel! +) + +REM Finally WSL +where wsl >nul 2>&1 +if not errorlevel 1 ( + for /f "delims=" %%P in ('wsl wslpath -u "%_SCRIPT%" 2^>nul') do set "_WSL_SCRIPT=%%P" + if not "!_WSL_SCRIPT!"=="" ( + wsl bash "!_WSL_SCRIPT!" %* + exit /b !errorlevel! + ) +) + +echo [ERROR] No bash interpreter found on this system. +echo This script requires bash. Install one of: +echo - Git for Windows ^(provides Git Bash^): https://git-scm.com/download/win +echo - WSL ^(Windows Subsystem for Linux^): wsl --install +exit /b 1 diff --git a/benchmarks/helpers/common.bat b/benchmarks/helpers/common.bat new file mode 100644 index 0000000..95913a3 --- /dev/null +++ b/benchmarks/helpers/common.bat @@ -0,0 +1,24 @@ +@echo off +REM Placeholder for benchmarks/helpers/common.sh. +REM +REM common.sh is a bash function library (port_in_use, wait_for_server, +REM parse_common_args, cleanup_pids, etc.) that gets sourced by other .sh +REM scripts. There's no equivalent of `source` for function definitions in +REM cmd.exe, so a direct port is not feasible. +REM +REM In practice, this file is never called directly: the heavy .bat files +REM in this repo (eval.bat, compare.bat, etc.) delegate to bash via +REM _delegate_to_bash.bat, and bash sources common.sh itself. +REM +REM If you ARE invoking this file directly, you probably want one of: +REM - call _delegate_to_bash.bat ".\common.sh" ^ (run from bash) +REM - Use Git Bash or WSL to source it the normal way +REM +REM See the follow-up issue for the Python migration that removes this gap. + +if "%~1"=="" ( + echo common.bat is a placeholder. See comment block in this file. + exit /b 0 +) +echo [WARN] common.bat does not implement %~1 in cmd.exe. Use bash to source common.sh. +exit /b 1 diff --git a/benchmarks/helpers/load_env.bat b/benchmarks/helpers/load_env.bat new file mode 100644 index 0000000..863c4e4 --- /dev/null +++ b/benchmarks/helpers/load_env.bat @@ -0,0 +1,55 @@ +@echo off +REM Windows equivalent of load_env.sh +REM +REM Usage: call load_env.bat [benchmark_name] +REM +REM Sourcing semantics: this script writes a temporary .bat snippet of `set` +REM commands and calls it, so env vars persist into the caller's scope when +REM invoked via `call`. + +setlocal enabledelayedexpansion + +set "BENCHMARK_NAME=%~1" + +set "HELPERS_DIR=%~dp0" +if "%HELPERS_DIR:~-1%"=="\" set "HELPERS_DIR=%HELPERS_DIR:~0,-1%" +pushd "%HELPERS_DIR%\..\.." >nul +set "PROJECT_ROOT=%CD%" +popd >nul +set "CONFIG_DIR=%PROJECT_ROOT%\config" + +REM Temp file holds the set-commands we'll call from the caller's scope +set "_SETS=%TEMP%\cuga_loadenv_%RANDOM%_%RANDOM%.bat" +echo @echo off> "%_SETS%" + +call :emit_env_file "%PROJECT_ROOT%\.env" ".env (secrets)" +call :emit_env_file "%CONFIG_DIR%\global.env" "global.env" +if not "%BENCHMARK_NAME%"=="" ( + call :emit_env_file "%PROJECT_ROOT%\benchmarks\%BENCHMARK_NAME%\config\%BENCHMARK_NAME%.env" "%BENCHMARK_NAME%.env" +) + +REM Default LOGURU_LEVEL handling +if "%LOGURU_LEVEL%"=="" echo set "LOGURU_LEVEL=WARNING">> "%_SETS%" +if /i "%VERBOSE%"=="true" echo set "LOGURU_LEVEL=DEBUG">> "%_SETS%" + +REM Single-line endlocal so %_SETS% is expanded at parse time (before endlocal runs) +endlocal & call "%_SETS%" & del "%_SETS%" 2>nul +exit /b 0 + +:emit_env_file +set "_FILE=%~1" +set "_LABEL=%~2" +if not exist "%_FILE%" ( + if not "%_LABEL%"=="" echo (skipping missing %_LABEL%) + exit /b 0 +) +echo [ok] Loading %_LABEL% +for /f "usebackq tokens=* eol=#" %%L in ("%_FILE%") do ( + set "_line=%%L" + if not "!_line!"=="" ( + for /f "tokens=1,* delims==" %%A in ("!_line!") do ( + echo set "%%A=%%B">> "%_SETS%" + ) + ) +) +exit /b 0 diff --git a/benchmarks/helpers/run_registry.bat b/benchmarks/helpers/run_registry.bat new file mode 100644 index 0000000..306a64f --- /dev/null +++ b/benchmarks/helpers/run_registry.bat @@ -0,0 +1,28 @@ +@echo off +REM Windows equivalent of run_registry.sh +REM Loads env (global + benchmark-specific) and starts the registry server. +REM Usage: run_registry.bat ^ + +setlocal + +set "BENCHMARK_NAME=%~1" +if "%BENCHMARK_NAME%"=="" ( + echo Usage: %~nx0 ^ + echo Example: %~nx0 m3 + exit /b 1 +) + +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" +pushd "%SCRIPT_DIR%\..\.." >nul +set "PROJECT_ROOT=%CD%" +popd >nul + +echo Loading %BENCHMARK_NAME% evaluation configuration... +call "%SCRIPT_DIR%\load_env.bat" "%BENCHMARK_NAME%" + +echo. +echo Starting registry server... +cd /d "%PROJECT_ROOT%" +uv run registry +exit /b %errorlevel% diff --git a/benchmarks/m3/analyze.bat b/benchmarks/m3/analyze.bat new file mode 100644 index 0000000..9905eff --- /dev/null +++ b/benchmarks/m3/analyze.bat @@ -0,0 +1,9 @@ +@echo off +REM Windows equivalent of benchmarks/m3/analyze.sh +REM Thin wrapper around scripts/analyze.bat with --benchmark m3. + +setlocal +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" +call "%SCRIPT_DIR%\..\..\scripts\analyze.bat" --benchmark m3 %* +exit /b %errorlevel% diff --git a/benchmarks/m3/clean.bat b/benchmarks/m3/clean.bat new file mode 100644 index 0000000..757be01 --- /dev/null +++ b/benchmarks/m3/clean.bat @@ -0,0 +1,8 @@ +@echo off +REM Windows equivalent of benchmarks/m3/clean.sh — delegates to bash. +REM (Uses pkill, lsof, docker exec curl loops, glob removal — POSIX-only.) +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\clean.sh" %* +exit /b %errorlevel% diff --git a/benchmarks/m3/compare.bat b/benchmarks/m3/compare.bat new file mode 100644 index 0000000..835cf99 --- /dev/null +++ b/benchmarks/m3/compare.bat @@ -0,0 +1,7 @@ +@echo off +REM Windows equivalent of benchmarks/m3/compare.sh — delegates to bash. +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\compare.sh" %* +exit /b %errorlevel% diff --git a/benchmarks/m3/eval.bat b/benchmarks/m3/eval.bat new file mode 100644 index 0000000..04a4e4b --- /dev/null +++ b/benchmarks/m3/eval.bat @@ -0,0 +1,11 @@ +@echo off +REM Windows equivalent of benchmarks/m3/eval.sh +REM Delegates to bash (Git Bash / WSL) because the script uses POSIX features +REM that don't translate cleanly to cmd.exe (traps, lsof, process subs, ...). +REM Tracked in the follow-up issue: migrate these to Python. + +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\eval.sh" %* +exit /b %errorlevel% diff --git a/benchmarks/m3/eval/scripts/monitor_eval.bat b/benchmarks/m3/eval/scripts/monitor_eval.bat new file mode 100644 index 0000000..cfcaebc --- /dev/null +++ b/benchmarks/m3/eval/scripts/monitor_eval.bat @@ -0,0 +1,7 @@ +@echo off +REM Windows equivalent of monitor_eval.sh — delegates to bash. +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\..\..\helpers\_delegate_to_bash.bat" "%_THIS%\monitor_eval.sh" %* +exit /b %errorlevel% diff --git a/benchmarks/m3/eval/scripts/run_eval_background.bat b/benchmarks/m3/eval/scripts/run_eval_background.bat new file mode 100644 index 0000000..69a18b3 --- /dev/null +++ b/benchmarks/m3/eval/scripts/run_eval_background.bat @@ -0,0 +1,8 @@ +@echo off +REM Windows equivalent of run_eval_background.sh — delegates to bash +REM (nohup, &, signal traps, PID files — POSIX background-job semantics). +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\..\..\helpers\_delegate_to_bash.bat" "%_THIS%\run_eval_background.sh" %* +exit /b %errorlevel% diff --git a/benchmarks/m3/eval/scripts/setup_m3_eval.bat b/benchmarks/m3/eval/scripts/setup_m3_eval.bat new file mode 100644 index 0000000..a60b413 --- /dev/null +++ b/benchmarks/m3/eval/scripts/setup_m3_eval.bat @@ -0,0 +1,8 @@ +@echo off +REM Windows equivalent of setup_m3_eval.sh — delegates to bash (uses +REM interactive prompts, docker detection, file edits — bash-only). +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\..\..\helpers\_delegate_to_bash.bat" "%_THIS%\setup_m3_eval.sh" %* +exit /b %errorlevel% diff --git a/benchmarks/m3/run_registry.bat b/benchmarks/m3/run_registry.bat new file mode 100644 index 0000000..2ebc173 --- /dev/null +++ b/benchmarks/m3/run_registry.bat @@ -0,0 +1,7 @@ +@echo off +REM Windows equivalent of benchmarks/m3/run_registry.sh +setlocal +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" +call "%SCRIPT_DIR%\..\helpers\run_registry.bat" "m3" +exit /b %errorlevel% diff --git a/benchmarks/m3/run_with_container.bat b/benchmarks/m3/run_with_container.bat new file mode 100644 index 0000000..72c8718 --- /dev/null +++ b/benchmarks/m3/run_with_container.bat @@ -0,0 +1,7 @@ +@echo off +REM Windows equivalent of benchmarks/m3/run_with_container.sh — delegates to bash. +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\run_with_container.sh" %* +exit /b %errorlevel% diff --git a/benchmarks/oak_health_insurance/compare.bat b/benchmarks/oak_health_insurance/compare.bat new file mode 100644 index 0000000..8a9558f --- /dev/null +++ b/benchmarks/oak_health_insurance/compare.bat @@ -0,0 +1,7 @@ +@echo off +REM Windows equivalent of benchmarks/oak_health_insurance/compare.sh — delegates to bash. +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\compare.sh" %* +exit /b %errorlevel% diff --git a/benchmarks/oak_health_insurance/eval.bat b/benchmarks/oak_health_insurance/eval.bat new file mode 100644 index 0000000..8a37a55 --- /dev/null +++ b/benchmarks/oak_health_insurance/eval.bat @@ -0,0 +1,7 @@ +@echo off +REM Windows equivalent of benchmarks/oak_health_insurance/eval.sh — delegates to bash. +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\helpers\_delegate_to_bash.bat" "%_THIS%\eval.sh" %* +exit /b %errorlevel% diff --git a/benchmarks/oak_health_insurance/run_app.bat b/benchmarks/oak_health_insurance/run_app.bat new file mode 100644 index 0000000..8bea569 --- /dev/null +++ b/benchmarks/oak_health_insurance/run_app.bat @@ -0,0 +1,19 @@ +@echo off +REM Windows equivalent of benchmarks/oak_health_insurance/run_app.sh +REM Loads env and runs the Oak Health Insurance FastAPI app on port 8090. + +setlocal +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" +pushd "%SCRIPT_DIR%\..\.." >nul +set "PROJECT_ROOT=%CD%" +popd >nul + +echo Loading Oak Health Insurance configuration... +call "%PROJECT_ROOT%\benchmarks\helpers\load_env.bat" "oak_health_insurance" + +echo. +echo Starting FastAPI app... +cd /d "%SCRIPT_DIR%" +uv run uvicorn main:app --reload --port 8090 +exit /b %errorlevel% diff --git a/benchmarks/oak_health_insurance/run_registry.bat b/benchmarks/oak_health_insurance/run_registry.bat new file mode 100644 index 0000000..203b480 --- /dev/null +++ b/benchmarks/oak_health_insurance/run_registry.bat @@ -0,0 +1,7 @@ +@echo off +REM Windows equivalent of benchmarks/oak_health_insurance/run_registry.sh +setlocal +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" +call "%SCRIPT_DIR%\..\helpers\run_registry.bat" "oak_health_insurance" +exit /b %errorlevel% diff --git a/scripts/analyze.bat b/scripts/analyze.bat new file mode 100644 index 0000000..06dae69 --- /dev/null +++ b/scripts/analyze.bat @@ -0,0 +1,8 @@ +@echo off +REM Windows equivalent of scripts/analyze.sh — delegates to bash (uses +REM bash arrays for --bundles / --task-ids, sources config .conf via source). +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\benchmarks\helpers\_delegate_to_bash.bat" "%_THIS%\analyze.sh" %* +exit /b %errorlevel% diff --git a/scripts/compare.bat b/scripts/compare.bat new file mode 100644 index 0000000..b278bb7 --- /dev/null +++ b/scripts/compare.bat @@ -0,0 +1,7 @@ +@echo off +REM Windows equivalent of scripts/compare.sh — delegates to bash (sources common.sh). +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\benchmarks\helpers\_delegate_to_bash.bat" "%_THIS%\compare.sh" %* +exit /b %errorlevel% diff --git a/scripts/eval.bat b/scripts/eval.bat new file mode 100644 index 0000000..9005e7f --- /dev/null +++ b/scripts/eval.bat @@ -0,0 +1,9 @@ +@echo off +REM Windows equivalent of scripts/eval.sh — delegates to bash because the +REM script sources common.sh (a bash function library: parse_common_args, +REM apply_model_profile_if_set, check_langfuse_env, list_benchmarks). +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\benchmarks\helpers\_delegate_to_bash.bat" "%_THIS%\eval.sh" %* +exit /b %errorlevel% diff --git a/scripts/m3_pad_to_cap_verify.bat b/scripts/m3_pad_to_cap_verify.bat new file mode 100644 index 0000000..28abb0f --- /dev/null +++ b/scripts/m3_pad_to_cap_verify.bat @@ -0,0 +1,11 @@ +@echo off +REM Windows equivalent of scripts/m3_pad_to_cap_verify.sh — delegates to bash. +REM +REM This script uses tee, mktemp, embedded Python heredoc, gh api PATCH with +REM @file body, process substitution, and signal traps. None of these have +REM clean cmd.exe equivalents. Use Git Bash or WSL. +setlocal +set "_THIS=%~dp0" +if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" +call "%_THIS%\..\benchmarks\helpers\_delegate_to_bash.bat" "%_THIS%\m3_pad_to_cap_verify.sh" %* +exit /b %errorlevel% diff --git a/scripts/model_profiles.bat b/scripts/model_profiles.bat new file mode 100644 index 0000000..3372835 --- /dev/null +++ b/scripts/model_profiles.bat @@ -0,0 +1,56 @@ +@echo off +REM Windows equivalent of model_profiles.sh +REM Usage: call model_profiles.bat ^ +REM Sets AGENT_SETTING_CONFIG, MODEL_NAME, OPENAI_BASE_URL, OPENAI_API_VERSION. + +setlocal +set "PROFILE=%~1" + +set "_AGENT_SETTING=" +set "_MODEL_NAME=" +set "_BASE_URL=" +set "_API_VERSION=" +set "_RC=0" + +if "%PROFILE%"=="" goto done +if /i "%PROFILE%"=="gpt-oss" ( + set "_AGENT_SETTING=settings.groq.toml" + set "_MODEL_NAME=openai/gpt-oss-120b" + echo [OK] Model profile: gpt-oss + goto done +) +if /i "%PROFILE%"=="gpt4o" ( + set "_AGENT_SETTING=settings.openai.toml" + set "_MODEL_NAME=Azure/gpt-4o" + set "_BASE_URL=https://ete-litellm.bx.cloud9.ibm.com" + set "_API_VERSION=2024-08-06" + echo [OK] Model profile: gpt4o ^(Azure/gpt-4o^) + goto done +) +if /i "%PROFILE%"=="gpt4.1" ( + set "_AGENT_SETTING=settings.openai.toml" + set "_MODEL_NAME=Azure/gpt-4.1" + set "_BASE_URL=https://ete-litellm.bx.cloud9.ibm.com" + set "_API_VERSION=2024-08-06" + echo [OK] Model profile: gpt4.1 ^(Azure/gpt-4.1^) + goto done +) +if /i "%PROFILE%"=="opus4.5" ( + set "_AGENT_SETTING=settings.openai.toml" + set "_MODEL_NAME=claude-opus-4-5-20251101" + set "_BASE_URL=https://ete-litellm.bx.cloud9.ibm.com" + echo [OK] Model profile: opus4.5 + goto done +) +echo [ERROR] Unknown model profile '%PROFILE%' +echo Valid values: gpt-oss, gpt4o, gpt4.1, opus4.5 +set "_RC=1" + +:done +endlocal & ( + if not "%_AGENT_SETTING%"=="" set "AGENT_SETTING_CONFIG=%_AGENT_SETTING%" + if not "%_MODEL_NAME%"=="" set "MODEL_NAME=%_MODEL_NAME%" + if not "%_BASE_URL%"=="" (set "OPENAI_BASE_URL=%_BASE_URL%") else (set "OPENAI_BASE_URL=") + if not "%_API_VERSION%"=="" (set "OPENAI_API_VERSION=%_API_VERSION%") else (set "OPENAI_API_VERSION=") + exit /b %_RC% +) diff --git a/scripts/viz.bat b/scripts/viz.bat new file mode 100644 index 0000000..37bcb04 --- /dev/null +++ b/scripts/viz.bat @@ -0,0 +1,27 @@ +@echo off +REM Windows equivalent of viz.sh +REM Loads benchmark env and runs cuga viz against the trajectory_data dir. + +setlocal enabledelayedexpansion + +set "BENCHMARK_NAME=%~1" +if "%BENCHMARK_NAME%"=="" ( + echo Usage: %~nx0 ^ + echo Example: %~nx0 m3 + exit /b 1 +) + +set "SCRIPT_DIR=%~dp0" +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" +pushd "%SCRIPT_DIR%\.." >nul +set "PROJECT_ROOT=%CD%" +popd >nul + +echo Loading %BENCHMARK_NAME% visualization configuration... +call "%PROJECT_ROOT%\benchmarks\helpers\load_env.bat" "%BENCHMARK_NAME%" + +echo. +echo Running cuga viz... +cd /d "%PROJECT_ROOT%" +uv run cuga-viz run %CUGA_LOGGING_DIR%\trajectory_data\ +exit /b %errorlevel% diff --git a/setup_appworld.bat b/setup_appworld.bat new file mode 100644 index 0000000..ea3feaa --- /dev/null +++ b/setup_appworld.bat @@ -0,0 +1,58 @@ +@echo off +REM Windows equivalent of setup_appworld.sh +REM +REM Sources the appworld env file (set -a equivalent), then installs AppWorld +REM via uv. Interactive reinstall prompt preserved. + +setlocal enabledelayedexpansion + +set "APPWORLD_DIR=benchmarks\appworld" +set "APPWORLD_ENV_FILE=benchmarks\appworld\config\appworld.env" +set "APPWORLD_REPO_DIR=%APPWORLD_DIR%\appworld" +set "APPWORLD_DATA_DIR=%APPWORLD_REPO_DIR%\data" + +if not exist "%APPWORLD_DIR%\" ( + echo Error: '%APPWORLD_DIR%' directory not found! + echo Please clone the repository first + exit /b 1 +) + +if not exist "%APPWORLD_ENV_FILE%" ( + echo Error: '%APPWORLD_ENV_FILE%' file not found! + exit /b 1 +) + +REM Load env file: each non-comment KEY=VALUE line becomes a set +for /f "usebackq tokens=* eol=#" %%L in ("%APPWORLD_ENV_FILE%") do ( + set "_line=%%L" + if not "!_line!"=="" ( + for /f "tokens=1,* delims==" %%A in ("!_line!") do ( + set "%%A=%%B" + ) + ) +) + +if not exist "%APPWORLD_REPO_DIR%\" ( + echo Error: '%APPWORLD_REPO_DIR%' directory not found! + echo Please clone the AppWorld repository into '%APPWORLD_REPO_DIR%' first + exit /b 1 +) + +if exist "%APPWORLD_DATA_DIR%\" ( + echo AppWorld repository already present at '%APPWORLD_REPO_DIR%'. + echo AppWorld data already exists at '%APPWORLD_DATA_DIR%'. + set /p REINSTALL="Would you like to reinstall AppWorld and re-download the data? [y/N] " + if /i not "!REINSTALL!"=="y" if /i not "!REINSTALL!"=="yes" ( + echo Keeping existing AppWorld installation and data. Skipping setup. + exit /b 0 + ) + echo Reinstalling AppWorld and downloading data... +) + +pushd "%APPWORLD_REPO_DIR%" || exit /b 1 +uv pip install . || (popd & exit /b 1) +uv run -m appworld.cli install || (popd & exit /b 1) +uv run appworld install --repo || (popd & exit /b 1) +uv run appworld download data || (popd & exit /b 1) +popd +exit /b 0 diff --git a/setup_cuga.bat b/setup_cuga.bat new file mode 100644 index 0000000..9515269 --- /dev/null +++ b/setup_cuga.bat @@ -0,0 +1,93 @@ +@echo off +REM Windows equivalent of setup_cuga.sh +REM Clones cuga-agent next to this repo (matches pyproject.toml path "../cuga-agent") +REM and sets up environment variables for the current session. +REM +REM Note: env vars set here only persist for this cmd.exe session. +REM See the follow-up issue tracking conversion of these scripts to Python. + +setlocal + +set "REPO_URL=https://github.com/cuga-project/cuga-agent.git" +set "REPO_BRANCH=main" +set "SCRIPT_DIR=%~dp0" +REM strip trailing backslash from %~dp0 +if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" +pushd "%SCRIPT_DIR%\.." >nul || (echo [ERROR] Could not access parent of %SCRIPT_DIR% & exit /b 1) +set "PARENT_DIR=%CD%" +popd >nul +set "REPO_NAME=cuga-agent" +set "REPO_PATH=%PARENT_DIR%\%REPO_NAME%" + +echo ============================================================ +echo CUGA Agent Setup Script +echo ============================================================ +echo. + +where git >nul 2>&1 +if errorlevel 1 ( + echo [ERROR] Git is not installed. Please install git and try again. + exit /b 1 +) + +if exist "%REPO_PATH%\.git" ( + echo [INFO] Repository already exists at %REPO_PATH% + echo [INFO] Pulling latest changes from branch: %REPO_BRANCH%... + pushd "%REPO_PATH%" || exit /b 1 + git fetch origin && git checkout "%REPO_BRANCH%" && git pull origin "%REPO_BRANCH%" + if errorlevel 1 echo [WARNING] Could not update repository. You may need to resolve conflicts manually. + popd +) else if exist "%REPO_PATH%\" ( + echo [WARNING] Directory exists but is not a git repository. Removing and cloning fresh... + rmdir /s /q "%REPO_PATH%" + call :clone_repo || exit /b 1 +) else ( + call :clone_repo || exit /b 1 +) + +echo [INFO] Exporting environment variables... +endlocal & ( + set "ENV_FILE=.\.env" + set "MCP_SERVERS_FILE=.\mcp_servers.yaml" + set "CUGA_LOGGING_DIR=.\logging" + echo [INFO] Exported ENV_FILE=.\.env + echo [INFO] Exported MCP_SERVERS_FILE=.\mcp_servers.yaml + echo [INFO] Exported CUGA_LOGGING_DIR=.\logging +) + +if not exist ".\logging" ( + echo [INFO] Creating logging directory... + mkdir ".\logging" +) + +REM Optionally run AppWorld setup if the script exists +if exist "%~dp0setup_appworld.bat" ( + echo [INFO] Running AppWorld setup... + call "%~dp0setup_appworld.bat" + if errorlevel 1 ( + echo [ERROR] AppWorld setup failed + exit /b 1 + ) +) else ( + echo [WARNING] AppWorld setup script not found. Skipping. +) + +echo. +echo [SUCCESS] Setup completed successfully! +echo. +echo Next steps: +echo 1. Check the cloned repository at: %REPO_PATH% +echo 2. Environment variables are now available in this terminal session +echo 3. Note: Variables will only persist for this terminal session +echo. +exit /b 0 + +:clone_repo +echo [INFO] Cloning %REPO_URL% (branch: %REPO_BRANCH%)... +git clone -b "%REPO_BRANCH%" "%REPO_URL%" "%REPO_PATH%" +if errorlevel 1 ( + echo [ERROR] Failed to clone repository. Please check your SSH keys and network connection. + exit /b 1 +) +echo [SUCCESS] Repository cloned successfully to %REPO_PATH% +exit /b 0 diff --git a/setup_m3.bat b/setup_m3.bat new file mode 100644 index 0000000..2592a3c --- /dev/null +++ b/setup_m3.bat @@ -0,0 +1,229 @@ +@echo off +REM Windows equivalent of setup_m3.sh +REM +REM Clones vakra into vendor\, sets up Python venv, installs deps, downloads +REM benchmark data, builds Docker image, starts containers. +REM +REM Requires: git, docker or podman, python (with venv), HF_TOKEN env var. + +setlocal enabledelayedexpansion + +set "REPO_URL=https://github.com/IBM/vakra.git" +set "VENDOR_DIR=.\vendor" +set "REPO_NAME=vakra" +set "REPO_PATH=%VENDOR_DIR%\%REPO_NAME%" +set "DATA_DIR=%REPO_PATH%\data" + +REM Mode flags +set "DOWNLOAD_ONLY=false" +set "BUILD_ONLY=false" +set "START_ONLY=false" +set "VERIFY_ONLY=false" +set "SKIP_DOWNLOAD=false" + +:parse_args +if "%~1"=="" goto args_done +if "%~1"=="--download-only" (set "DOWNLOAD_ONLY=true" & shift & goto parse_args) +if "%~1"=="--build-only" (set "BUILD_ONLY=true" & shift & goto parse_args) +if "%~1"=="--start-only" (set "START_ONLY=true" & shift & goto parse_args) +if "%~1"=="--verify" (set "VERIFY_ONLY=true" & shift & goto parse_args) +if "%~1"=="--skip-download" (set "SKIP_DOWNLOAD=true" & shift & goto parse_args) +if "%~1"=="--help" goto show_usage +echo [ERROR] Unknown option: %~1 +goto show_usage + +:show_usage +echo Usage: %~nx0 [OPTIONS] +echo. +echo Options: +echo --download-only Download data only (no build/start) +echo --build-only Only build image, don't start containers +echo --start-only Only start containers (assumes already built) +echo --verify Only verify containers are running +echo --skip-download Skip data download step +echo --help Show this help message +exit /b 0 + +:args_done +echo ============================================================ +echo Vakra Benchmark Setup Script +echo ============================================================ +echo. + +REM Check prerequisites +where git >nul 2>&1 +if errorlevel 1 ( + echo [ERROR] Missing required dependency: git + exit /b 1 +) +set "RUNTIME=" +where docker >nul 2>&1 && set "RUNTIME=docker" +if "%RUNTIME%"=="" where podman >nul 2>&1 && set "RUNTIME=podman" +if "%RUNTIME%"=="" ( + echo [ERROR] Missing required dependency: docker or podman + exit /b 1 +) +echo [INFO] Using container runtime: %RUNTIME% + +if "%VERIFY_ONLY%"=="true" ( + call :verify_containers + exit /b !errorlevel! +) + +if "%START_ONLY%"=="true" ( + call :start_containers || exit /b 1 + call :verify_containers + exit /b !errorlevel! +) + +REM Step 1: Clone or update repo +if not exist "%VENDOR_DIR%\" ( + echo [INFO] Creating vendor directory... + mkdir "%VENDOR_DIR%" +) +if exist "%REPO_PATH%\.git" ( + echo [INFO] Repository already exists at %REPO_PATH%, pulling latest... + pushd "%REPO_PATH%" || exit /b 1 + git pull origin main 2>nul || git pull origin master 2>nul + if errorlevel 1 echo [WARNING] Could not update repository + popd +) else if exist "%REPO_PATH%\" ( + echo [WARNING] Directory exists but is not a git repository. Removing and cloning fresh... + rmdir /s /q "%REPO_PATH%" + call :clone_repo || exit /b 1 +) else ( + call :clone_repo || exit /b 1 +) + +REM Step 2: Python env + install deps +echo [INFO] Step 2: Installing Python dependencies... +pushd "%REPO_PATH%" || exit /b 1 +if not exist ".venv\" ( + echo [INFO] Creating Python virtual environment... + python -m venv .venv || (echo [ERROR] Failed to create venv & popd & exit /b 1) +) +echo [INFO] Activating virtual environment and installing vakra... +call .venv\Scripts\activate.bat +pip install -e ".[init]" || (echo [ERROR] vakra install failed & popd & exit /b 1) +pip install -r requirements_benchmark.txt || (echo [ERROR] benchmark deps install failed & popd & exit /b 1) +popd + +REM Step 3: Download data +if "%SKIP_DOWNLOAD%"=="false" ( + if not exist "%DATA_DIR%\" ( + call :download_data || exit /b 1 + ) else ( + dir /b /a "%DATA_DIR%" >nul 2>&1 + if errorlevel 1 ( + call :download_data || exit /b 1 + ) else ( + echo [INFO] Data directory exists and is not empty, skipping download + ) + ) +) + +if "%DOWNLOAD_ONLY%"=="true" ( + echo [SUCCESS] Setup and data download completed! + exit /b 0 +) + +REM Build + start +call :build_image || exit /b 1 +if "%BUILD_ONLY%"=="false" ( + call :start_containers + call :verify_containers +) + +echo. +echo [SUCCESS] Vakra setup completed successfully! +echo. +echo Container Information: +echo * capability_1_bi_apis - Tool Chaining MCP Server +echo * capability_2_dashboard_apis - Tool Selection MCP Server +echo * capability_3_multihop_reasoning - Multi-hop Reasoning MCP Server +echo * capability_4_multiturn - Multi-hop Multi-Source MCP Server +exit /b 0 + +:clone_repo +echo [INFO] Cloning %REPO_URL%... +git clone "%REPO_URL%" "%REPO_PATH%" +if errorlevel 1 ( + echo [ERROR] Failed to clone repository. Check SSH keys / network. + exit /b 1 +) +echo [SUCCESS] Repository cloned successfully +exit /b 0 + +:download_data +echo [INFO] Downloading benchmark data from HuggingFace (~30 GB)... +if "%HF_TOKEN%"=="" ( + echo [ERROR] HF_TOKEN environment variable is not set + echo Set it with: set HF_TOKEN=hf_your_token_here + echo Get your token from: https://huggingface.co/settings/tokens + exit /b 1 +) +pushd "%REPO_PATH%" || exit /b 1 +if exist ".venv\Scripts\activate.bat" call .venv\Scripts\activate.bat +where make >nul 2>&1 +if not errorlevel 1 ( + make download +) else ( + python benchmark_setup.py --download-data +) +if errorlevel 1 ( + echo [ERROR] Failed to download data + popd + exit /b 1 +) +popd +echo [SUCCESS] Data downloaded successfully +exit /b 0 + +:build_image +echo [INFO] Building vakra Docker image using %RUNTIME%... +pushd "%REPO_PATH%" || exit /b 1 +where make >nul 2>&1 +if not errorlevel 1 ( + set "DOCKER=%RUNTIME%" && make build +) else ( + %RUNTIME% build -t m3_environ -f docker/Dockerfile.unified . +) +if errorlevel 1 ( + echo [ERROR] Failed to build image + popd + exit /b 1 +) +popd +echo [SUCCESS] Image built successfully +exit /b 0 + +:start_containers +echo [INFO] Starting containers using %RUNTIME% compose... +pushd "%REPO_PATH%" || exit /b 1 +where make >nul 2>&1 +if not errorlevel 1 ( + set "DOCKER=%RUNTIME%" && make start +) else ( + %RUNTIME% compose up -d +) +if errorlevel 1 ( + echo [ERROR] Failed to start containers + popd + exit /b 1 +) +popd +echo [SUCCESS] Containers started successfully +exit /b 0 + +:verify_containers +echo [INFO] Verifying containers... +%RUNTIME% ps --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}" +for /f %%C in ('%RUNTIME% ps --format "{{.Names}}" ^| findstr /c:"capability_" /c:"" ^| find /c "capability_"') do set "RUNNING=%%C" +if "%RUNNING%"=="" set "RUNNING=0" +if %RUNNING% GEQ 4 ( + echo [SUCCESS] Found %RUNNING% capability containers running + exit /b 0 +) else ( + echo [WARNING] Only %RUNNING% capability containers running (expected 4) + exit /b 1 +) From 51ffa9d87d15fad79c5c1da645b7dbd79564e210 Mon Sep 17 00:00:00 2001 From: Harold Ship Date: Sun, 17 May 2026 16:48:28 +0300 Subject: [PATCH 5/5] docs: document Windows .bat usage + add pwsh smoke test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit README changes: - Expand the Windows note in §Installation to mention that every .sh has a .bat sibling, and that the simple wrappers run on stock cmd.exe (no WSL/Git Bash needed) while the heavy scripts delegate to bash. - New §Running on Windows in Quick Start with cmd.exe/PowerShell examples mirroring the bash examples above it. - Reference the smoke test and the long-term Python migration (#88). scripts/test_bat_scripts.ps1 — new pwsh 7 smoke test that verifies structural invariants of the .bat layer: 1. every in-scope .sh has a sibling .bat 2. every .bat starts with `@echo off` 3. every .bat has an `exit /b` terminator 4. every delegate shim points to an existing .sh 5. every shim's `_delegate_to_bash.bat` exists where expected It doesn't execute the .bat files (cmd.exe isn't available on mac/linux), but it caught a real bug on its first run — three orphan `analyze.bat` files delegating to .sh files that don't exist on this branch (they were added on master after this branch was cut). Test runs on any host with pwsh: `pwsh scripts/test_bat_scripts.ps1`. Orphan deletions: benchmarks/appworld/analyze.bat, benchmarks/m3/analyze.bat, scripts/analyze.bat. Their .sh counterparts will arrive when master is merged in; the .bat files can come back at that point. .secrets.baseline auto-updated by the detect-secrets hook to refresh the timestamp after the line-number shift in scripts/model_profiles.bat. --- .secrets.baseline | 4 +- README.md | 62 ++++++++++++++- benchmarks/appworld/analyze.bat | 9 --- benchmarks/m3/analyze.bat | 9 --- scripts/analyze.bat | 8 -- scripts/test_bat_scripts.ps1 | 131 ++++++++++++++++++++++++++++++++ 6 files changed, 193 insertions(+), 30 deletions(-) delete mode 100644 benchmarks/appworld/analyze.bat delete mode 100644 benchmarks/m3/analyze.bat delete mode 100644 scripts/analyze.bat create mode 100644 scripts/test_bat_scripts.ps1 diff --git a/.secrets.baseline b/.secrets.baseline index 2e4c4fd..2fd95b3 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "^.secrets.baseline$", "lines": null }, - "generated_at": "2026-05-17T13:36:51Z", + "generated_at": "2026-05-17T13:48:18Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -111,7 +111,7 @@ { "hashed_secret": "b45fc270bb9e9ddf4829b9124321ce244d38668e", "is_verified": false, - "line_number": 72, + "line_number": 79, "type": "Secret Keyword", "verified_result": null } diff --git a/README.md b/README.md index 6ea7d17..9a7983c 100644 --- a/README.md +++ b/README.md @@ -39,10 +39,17 @@ git clone https://github.com/cuga-project/cuga-eval.git cd cuga-eval ``` -> **Windows users (WSL):** if you cloned with Windows git (default `core.autocrlf=true`), +> **Windows users:** every `.sh` script in this repo has a sibling `.bat`. You don't need +> WSL or Git Bash for the simple wrappers (`setup_cuga.bat`, `run_app.bat`, `run_registry.bat`, +> `viz.bat`, `model_profiles.bat`, the per-benchmark `analyze.bat`, etc.) — they run on +> stock `cmd.exe`. The heavier scripts (eval/compare/clean and the `m3_pad_to_cap_verify` +> helper) delegate to bash via Git Bash or WSL because they use POSIX-only features. See +> [Running on Windows](#running-on-windows) below. +> +> If you're using WSL and cloned with Windows git (default `core.autocrlf=true`), > the `*.sh` and `*.env` files end up with CRLF line endings, which break bash under WSL > (`$'\r': command not found`) and `source`d env files. Run `fix_line_endings.bat` -> (double-click in Explorer, or run from cmd.exe / PowerShell) once before running any +> (double-click in Explorer, or run from `cmd.exe` / PowerShell) once before running any > setup scripts under WSL. ### 2. Ensure CUGA Agent is in parent directory @@ -128,6 +135,57 @@ cd benchmarks/m3 && ./eval.sh cd benchmarks/appworld && ./eval.sh ``` +### Running on Windows + +Every script has a `.bat` sibling. Same flags, same semantics; just substitute the +extension and use `\` instead of `/`: + +```bat +:: Top-level dispatcher (these scripts delegate to bash — see note below) +scripts\eval.bat --benchmark bpo +scripts\eval.bat --benchmark m3 --model-profile gpt-oss +scripts\compare.bat --benchmark bpo --runs 3 + +:: Setup (pure cmd.exe — no bash required) +setup_cuga.bat +setup_m3.bat --verify +setup_appworld.bat + +:: Per-benchmark, from the benchmark dir +cd benchmarks\bpo && eval.bat +cd benchmarks\m3 && run_registry.bat + +:: Run from PowerShell the same way — pwsh launches .bat via cmd.exe +.\setup_cuga.bat +.\scripts\eval.bat --benchmark bpo +``` + +The `.bat` files fall into two groups: + +- **Pure `cmd.exe` ports** — setup scripts, env loaders, registry runners, app + launchers, model profiles, the analyze and viz thin-wrappers. Work on a vanilla + Windows install with `cmd.exe` or PowerShell. No bash needed. +- **Bash-delegate shims** — the heavy eval/compare/clean scripts and + `m3_pad_to_cap_verify`. These use POSIX features (signal traps, `lsof`, `pkill`, + process substitution, sourceable function libraries, embedded `python3` here-docs) + that don't have clean `cmd.exe` equivalents, so each shim calls + [`benchmarks\helpers\_delegate_to_bash.bat`](benchmarks/helpers/_delegate_to_bash.bat), + which finds a `bash` in this order: Git Bash (well-known install paths) → + `bash` on `PATH` → WSL. Install [Git for Windows](https://git-scm.com/download/win) + (provides Git Bash) or run `wsl --install` if neither is present. + +A smoke test for the `.bat` scripts ships at `scripts/test_bat_scripts.ps1`. It +runs on any platform with PowerShell 7+: + +```bash +pwsh scripts/test_bat_scripts.ps1 +``` + +It validates that every `.sh` has a `.bat` sibling, that each `.bat` is well-formed, +and that the delegate shims point to existing `.sh` files. Long-term, this whole +layer will move to Python (one entrypoint instead of two parallel script trees) — +tracked in [issue #88](../../issues/88). + ### Model profiles Available profiles: `gpt-oss`, `gpt4o`, `gpt4.1`, `opus4.5` diff --git a/benchmarks/appworld/analyze.bat b/benchmarks/appworld/analyze.bat deleted file mode 100644 index f3b679b..0000000 --- a/benchmarks/appworld/analyze.bat +++ /dev/null @@ -1,9 +0,0 @@ -@echo off -REM Windows equivalent of benchmarks/appworld/analyze.sh -REM Thin wrapper around scripts/analyze.bat with --benchmark appworld. - -setlocal -set "SCRIPT_DIR=%~dp0" -if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" -call "%SCRIPT_DIR%\..\..\scripts\analyze.bat" --benchmark appworld %* -exit /b %errorlevel% diff --git a/benchmarks/m3/analyze.bat b/benchmarks/m3/analyze.bat deleted file mode 100644 index 9905eff..0000000 --- a/benchmarks/m3/analyze.bat +++ /dev/null @@ -1,9 +0,0 @@ -@echo off -REM Windows equivalent of benchmarks/m3/analyze.sh -REM Thin wrapper around scripts/analyze.bat with --benchmark m3. - -setlocal -set "SCRIPT_DIR=%~dp0" -if "%SCRIPT_DIR:~-1%"=="\" set "SCRIPT_DIR=%SCRIPT_DIR:~0,-1%" -call "%SCRIPT_DIR%\..\..\scripts\analyze.bat" --benchmark m3 %* -exit /b %errorlevel% diff --git a/scripts/analyze.bat b/scripts/analyze.bat deleted file mode 100644 index 06dae69..0000000 --- a/scripts/analyze.bat +++ /dev/null @@ -1,8 +0,0 @@ -@echo off -REM Windows equivalent of scripts/analyze.sh — delegates to bash (uses -REM bash arrays for --bundles / --task-ids, sources config .conf via source). -setlocal -set "_THIS=%~dp0" -if "%_THIS:~-1%"=="\" set "_THIS=%_THIS:~0,-1%" -call "%_THIS%\..\benchmarks\helpers\_delegate_to_bash.bat" "%_THIS%\analyze.sh" %* -exit /b %errorlevel% diff --git a/scripts/test_bat_scripts.ps1 b/scripts/test_bat_scripts.ps1 new file mode 100644 index 0000000..98ee32c --- /dev/null +++ b/scripts/test_bat_scripts.ps1 @@ -0,0 +1,131 @@ +#!/usr/bin/env pwsh +# Smoke-test the .bat scripts that mirror the .sh scripts in this repo. +# +# Runs on any platform with PowerShell 7+ (`pwsh`). Does NOT actually execute +# the .bat files — cmd.exe isn't available on macOS/Linux — but verifies +# structural invariants that catch typical authoring mistakes: +# 1. Every in-scope .sh has a sibling .bat +# 2. Every .bat starts with `@echo off` +# 3. Every .bat terminates the main flow with `exit /b` +# 4. Delegate shims reference an actually-existing .sh +# 5. `_delegate_to_bash.bat` exists where delegates expect it +# +# On Windows, this same script can be extended to actually invoke each .bat +# with --help (where supported) and check the exit code. +# +# Usage: pwsh scripts/test_bat_scripts.ps1 + +$ErrorActionPreference = 'Stop' +$repoRoot = (Resolve-Path "$PSScriptRoot/..").Path + +$failures = [System.Collections.Generic.List[string]]::new() +$passes = 0 +function Fail([string]$msg) { $script:failures.Add($msg); Write-Host " FAIL $msg" -ForegroundColor Red } +function Pass([string]$msg) { $script:passes++; Write-Host " ok $msg" -ForegroundColor DarkGreen } + +$excludedDirs = @('vendor', 'node_modules', '.venv', '.git', 'site-packages') +$excludedPathFrag = 'benchmarks' + [IO.Path]::DirectorySeparatorChar + 'appworld' + [IO.Path]::DirectorySeparatorChar + 'appworld' + +function IsExcluded([string]$path) { + foreach ($d in $excludedDirs) { + if ($path -match ([regex]::Escape([IO.Path]::DirectorySeparatorChar + $d + [IO.Path]::DirectorySeparatorChar))) { return $true } + } + if ($path -like "*$excludedPathFrag*") { return $true } + return $false +} + +function RelPath([string]$full) { return $full.Substring($repoRoot.Length + 1) } + +$shFiles = Get-ChildItem -Path $repoRoot -Recurse -Filter '*.sh' -File | Where-Object { -not (IsExcluded $_.FullName) } +$batFiles = Get-ChildItem -Path $repoRoot -Recurse -Filter '*.bat' -File | Where-Object { -not (IsExcluded $_.FullName) } + +Write-Host "Repo root: $repoRoot" +Write-Host "Found $($shFiles.Count) .sh files and $($batFiles.Count) .bat files in scope." + +# ---- [1] every .sh has a sibling .bat ----------------------------------- +Write-Host "`n[1] every .sh has a sibling .bat" -ForegroundColor Cyan +foreach ($sh in $shFiles) { + $sibling = [IO.Path]::ChangeExtension($sh.FullName, '.bat') + if (Test-Path -LiteralPath $sibling) { Pass (RelPath $sh.FullName) } + else { Fail "missing .bat sibling for $(RelPath $sh.FullName)" } +} + +# ---- [2] every .bat starts with @echo off -------------------------------- +Write-Host "`n[2] every .bat starts with '@echo off'" -ForegroundColor Cyan +foreach ($bat in $batFiles) { + $first = (Get-Content -LiteralPath $bat.FullName -TotalCount 1).Trim() + if ($first -eq '@echo off') { Pass (RelPath $bat.FullName) } + else { Fail "$(RelPath $bat.FullName) first line is '$first', expected '@echo off'" } +} + +# ---- [3] every .bat has an `exit /b` terminator -------------------------- +# common.bat is intentionally a placeholder and uses `exit /b 0` early; ok. +Write-Host "`n[3] every .bat contains 'exit /b' somewhere" -ForegroundColor Cyan +foreach ($bat in $batFiles) { + $content = Get-Content -LiteralPath $bat.FullName -Raw + if ($content -match 'exit\s+/b') { Pass (RelPath $bat.FullName) } + else { Fail "$(RelPath $bat.FullName) has no 'exit /b' terminator" } +} + +# ---- [4] delegate shims reference a real .sh ----------------------------- +# Skip _delegate_to_bash.bat itself — its REM comments contain example +# placeholders like "" that aren't +# actual code paths. +Write-Host "`n[4] delegate shims reference an existing .sh" -ForegroundColor Cyan +$delegateRegex = [regex]'_delegate_to_bash\.bat"\s+"([^"]+)"' +foreach ($bat in $batFiles) { + if ($bat.Name -eq '_delegate_to_bash.bat') { continue } + # Strip REM-prefixed lines so example syntax in comment blocks is ignored. + $codeLines = (Get-Content -LiteralPath $bat.FullName) | Where-Object { $_ -notmatch '^\s*(REM|::|@REM)\s' } + $content = $codeLines -join "`n" + $m = $delegateRegex.Match($content) + if (-not $m.Success) { continue } # not a delegate shim + $target = $m.Groups[1].Value + # Expand %_THIS% to the .bat's own directory, normalise separators. + $batDir = Split-Path -Parent $bat.FullName + $resolved = $target -replace '%_THIS%', $batDir + $resolved = $resolved -replace '\\', ([IO.Path]::DirectorySeparatorChar) + # Collapse parent traversals (Resolve-Path errors if the file is missing) + try { + $abs = [IO.Path]::GetFullPath($resolved) + } catch { $abs = $resolved } + if (Test-Path -LiteralPath $abs) { + Pass "$(RelPath $bat.FullName) -> $(Split-Path -Leaf $abs)" + } else { + Fail "$(RelPath $bat.FullName) delegates to missing $target (resolved: $abs)" + } +} + +# ---- [5] every delegate shim's _delegate_to_bash.bat actually exists ----- +# Skip the helper itself; its own REM block shows an example `call` statement. +Write-Host "`n[5] _delegate_to_bash.bat exists where shims expect it" -ForegroundColor Cyan +$delegateHelperRegex = [regex]'call\s+"([^"]*_delegate_to_bash\.bat)"' +foreach ($bat in $batFiles) { + if ($bat.Name -eq '_delegate_to_bash.bat') { continue } + $codeLines = (Get-Content -LiteralPath $bat.FullName) | Where-Object { $_ -notmatch '^\s*(REM|::|@REM)\s' } + $content = $codeLines -join "`n" + $m = $delegateHelperRegex.Match($content) + if (-not $m.Success) { continue } + $target = $m.Groups[1].Value + $batDir = Split-Path -Parent $bat.FullName + $resolved = $target -replace '%_THIS%', $batDir + $resolved = $resolved -replace '\\', ([IO.Path]::DirectorySeparatorChar) + try { $abs = [IO.Path]::GetFullPath($resolved) } catch { $abs = $resolved } + if (Test-Path -LiteralPath $abs) { + Pass (RelPath $bat.FullName) + } else { + Fail "$(RelPath $bat.FullName) calls missing $target (resolved: $abs)" + } +} + +# ---- summary ------------------------------------------------------------- +Write-Host "`n=== Summary ===" -ForegroundColor Cyan +Write-Host ("checks passed: {0}" -f $passes) -ForegroundColor Green +Write-Host ("checks failed: {0}" -f $failures.Count) -ForegroundColor ($(if ($failures.Count -eq 0) { 'Green' } else { 'Red' })) +if ($failures.Count -gt 0) { + Write-Host "`nFailures:" -ForegroundColor Red + $failures | ForEach-Object { Write-Host " - $_" -ForegroundColor Red } + exit 1 +} +Write-Host "`nAll structural checks passed." +exit 0