Aroesler1 · Aroesler1 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/.github/workflows/project-ci.yml b/.github/workflows/project-ci.yml
@@ -0,0 +1,88 @@
+# managed by workflow os: project-ci
+name: project-ci
+
+on:
+  pull_request:
+  merge_group:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  project-ci:
+    runs-on: ubuntu-latest
+    timeout-minutes: 25
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Set up Node
+        if: ${{ hashFiles('package.json') != '' }}
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - name: Set up Go
+        if: ${{ hashFiles('go.mod') != '' }}
+        uses: actions/setup-go@v5
+        with:
+          go-version: stable
+
+      - name: Set up Rust
+        if: ${{ hashFiles('Cargo.toml') != '' }}
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Run project CI
+        shell: bash
+        run: |
+          set -euo pipefail
+          ran_any=0
+
+          if [[ -f pyproject.toml || -f setup.py || -f requirements.txt || -d tests ]]; then
+            python -m pip install --upgrade pip
+            if [[ -f requirements.txt ]]; then python -m pip install -r requirements.txt; fi
+            if [[ -f requirements-dev.txt ]]; then python -m pip install -r requirements-dev.txt; fi
+            python -m pip install pytest
+            if [[ -f pyproject.toml || -f setup.py ]]; then python -m pip install -e . || true; fi
+            if [[ -d tests ]]; then
+              python -m pytest tests -q --tb=short
+              ran_any=1
+            fi
+          fi
+
+          if [[ -f package.json ]]; then
+            if [[ -f package-lock.json ]]; then npm ci; else npm install; fi
+            if npm run | grep -qE '(^|[[:space:]])test([[:space:]]|$)'; then
+              npm test -- --runInBand || npm test
+              ran_any=1
+            fi
+          fi
+
+          if [[ -f go.mod ]]; then
+            go test ./...
+            ran_any=1
+          fi
+
+          if [[ -f Cargo.toml ]]; then
+            cargo test --all-targets --all-features
+            ran_any=1
+          fi
+
+          if [[ -f CMakeLists.txt ]]; then
+            sudo apt-get update
+            sudo apt-get install -y cmake ninja-build
+            cmake -S . -B build -G Ninja
+            cmake --build build
+            ctest --test-dir build --output-on-failure
+            ran_any=1
+          fi
+
+          if [[ "$ran_any" -eq 0 ]]; then
+            echo "No recognized project CI harness found; passing as metadata-only repo."
+          fi
diff --git a/.github/workflows/secret-scan.yml b/.github/workflows/secret-scan.yml
@@ -0,0 +1,27 @@
+# managed by workflow os: secret-scan
+name: secret-scan
+
+on:
+  pull_request:
+  push:
+  merge_group:
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  secret-scan:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Run gitleaks
+        uses: gitleaks/gitleaks-action@v2
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITLEAKS_CONFIG: .gitleaks.toml
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,48 @@ __pycache__/
 .venv/
 venv/
 *.pyc
+
+# CMake and out-of-source build trees
 /build/
-.cmake-test-build/
-reports/
+/build-*/
+/build_*/
+/.cmake-test-build/
+/cmake-build-*/
+/out/
+/CMakeCache.txt
+/CMakeFiles/
+/CTestTestfile.cmake
+/CTestCostData.txt
+/DartConfiguration.tcl
+/Testing/
+/Makefile
+/build.ninja
+/.ninja_deps
+/.ninja_log
+/rules.ninja
+/cmake_install.cmake
+/compile_commands.json
+/lob_engine
+/lob_engine.exe
+/lob_benchmark
+/lob_benchmark.exe
+/test_parser
+/test_parser.exe
+/test_order_book
+/test_order_book.exe
+/test_analytics
+/test_analytics.exe
+
+# Generated benchmark, analytics, and report artifacts
+/benchmark/*.csv
+/benchmark/*.json
+/benchmark/*.log
+/benchmark/*.out
+/benchmark/*.txt
+/report/*.csv
+/report/*.json
+/report/*.log
+/report/*.out
+/report/*.txt
+/report/generated/
+/reports/
diff --git a/.gitleaks.toml b/.gitleaks.toml
@@ -0,0 +1,11 @@
+# managed by workflow os: gitleaks-config
+title = "workflow managed secret scan configuration"
+
+[extend]
+useDefault = true
+
+[[rules]]
+id = "openclaw-auth-token"
+description = "OpenClaw auth or gateway token"
+regex = '''(?i)(?:OPENCLAW_(?:AUTH|GATEWAY|API)_TOKEN|openclaw(?:[_-]?(?:auth|gateway|api))?[_-]?token)[^\n]{0,32}[=:][^\S\r\n]*["']?[A-Za-z0-9._\-]{12,}["']?'''
+keywords = ["openclaw", "OPENCLAW_"]
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,12 @@
+# managed by workflow os: pre-commit
+default_install_hook_types:
+  - pre-commit
+  - pre-push
+
+repos:
+  - repo: https://github.com/gitleaks/gitleaks
+    rev: v8.30.1
+    hooks:
+      - id: gitleaks
+        stages: [pre-commit, pre-push, manual]
+        args: ["--config=.gitleaks.toml"]
diff --git a/README.md b/README.md
@@ -1,14 +1,14 @@
 # Real-Time Limit Order Book Engine in C++
 
-This repository implements a small, deterministic C++ limit-order-book engine for LOBSTER-style message data. It includes:
+This repository implements a small, deterministic C++ limit-order-book engine for LOBSTER-style message data. The parser and replay code operate on the LOBSTER six-column message schema, but the checked-in CSVs are tiny synthetic/reduced fixtures for reproducibility, not full proprietary LOBSTER distributions. The repo includes:
 
 - typed CSV ingestion for LOBSTER message rows
 - order lifecycle processing for add, cancel, and execute events
 - aggregated bid/ask levels plus order-ID lookup
 - two price-level backends: `std::map` and flat sorted `std::vector`
 - rolling analytics and CSV export after every processed message
 - deterministic C++ and Python integration tests
-- replay benchmark tooling and a checked-in benchmark report
+- replay benchmark tooling and a hand-maintained benchmark reproducibility note
 
 ## Repository layout
 
@@ -19,29 +19,42 @@ This repository implements a small, deterministic C++ limit-order-book engine fo
 - `data/`: checked-in small sample datasets used for deterministic tests and reproducible benchmark captures
 - `report/`: benchmark and methodology notes
 
-## Build
+## Reproducible build
+
+From a fresh clone, run the build, verifier, and benchmark commands below in order. Start with a clean temporary build directory instead of an in-repo build tree:
+
+```bash
+build_dir="$(mktemp -d "${TMPDIR:-/tmp}/lob-engine-build.XXXXXX")"
+cmake -S . -B "$build_dir" -DCMAKE_BUILD_TYPE=Release
+cmake --build "$build_dir" --config Release
+```
+
+## Correctness verification
+
+Run the CMake/CTest verifier from that build directory, then run the existing Python test suite from the repo root:
 
 ```bash
-cmake -S . -B build -DCMAKE_BUILD_TYPE=Release
-cmake --build build
-ctest --test-dir build --output-on-failure
+ctest --test-dir "$build_dir" --output-on-failure -C Release
+python -m pytest tests -q --tb=short
 ```
 
+`ctest` runs the three C++ test executables plus the `lob_benchmark_smoke` path. `python -m pytest tests -q --tb=short` configures and reuses a separate `.cmake-test-build/` directory under the repo root; that directory and the analytics CSVs produced there are ignored local test artifacts.
+
 ## CLI usage
 
 Replay a dataset and print final top-of-book state:
 
 ```bash
-./build/lob_engine data/AAPL_sample_messages.csv --backend both --depth 10 --repeat 5
+"$build_dir/lob_engine" data/AAPL_sample_messages.csv --backend both --depth 10 --repeat 5
 ```
 
 Export analytics rows after every processed message:
 
 ```bash
-./build/lob_engine \
+"$build_dir/lob_engine" \
   data/AAPL_sample_messages.csv \
   --backend both \
-  --analytics-out build/analytics.csv \
+  --analytics-out "$build_dir/analytics.csv" \
   --trade-window-messages 1000 \
   --realized-vol-window-seconds 300
 ```
@@ -81,15 +94,13 @@ Deterministic parity tests assert that both backends produce identical book snap
 
 ## Benchmarking
 
-The benchmark harness focuses on replay throughput and simple preallocation effects:
+The benchmark harness focuses on replay throughput and simple preallocation effects on the checked-in reduced fixtures. These four commands are the final step in the fresh-clone verification sequence documented above:
 
 ```bash
-./build/lob_benchmark \
-  --dataset data/AAPL_sample_messages.csv \
-  --backend both \
-  --reserve both \
-  --depth 5 \
-  --repeat 100000
+"$build_dir/lob_benchmark" --dataset data/AAPL_sample_messages.csv --backend both --reserve both --depth 5 --repeat 100000
+"$build_dir/lob_benchmark" --dataset data/MSFT_sample_messages.csv --backend both --reserve both --depth 5 --repeat 100000
+"$build_dir/lob_benchmark" --dataset data/NVDA_sample_messages.csv --backend both --reserve both --depth 5 --repeat 100000
+"$build_dir/lob_benchmark" --dataset data/TSLA_sample_messages.csv --backend both --reserve both --depth 5 --repeat 100000
 ```
 
 What the benchmark compares:
@@ -102,20 +113,21 @@ What the benchmark compares:
 - `unordered_map::reserve()` for order lookup
 - vector capacity reservation for the flat backend
 
-This is the bounded hot-path allocation reduction implemented in the repo. The benchmark report records the measured effect on the checked-in sample datasets.
-
-On a fresh build of this repository on a 4-core AMD EPYC-Rome VM, the fastest AAPL replay configuration processed `60.1 million messages/second` with the flat-vector backend and reserve disabled.
+This is the bounded hot-path allocation reduction implemented in the repo. Throughput numbers are host-dependent and should be treated as local measurements on the checked-in reduced fixtures, not as publishable claims about full vendor datasets. See `report/benchmark_report.md` for the exact datasets and commands used for reproducible reruns.
 
 ## Dataset note
 
-The repo ships small checked-in reproducibility datasets:
+The repo ships five checked-in reproducibility fixtures:
 
 - `AAPL_sample_messages.csv`
 - `MSFT_sample_messages.csv`
 - `NVDA_sample_messages.csv`
 - `TSLA_sample_messages.csv`
+- `sample_messages.csv`
+
+The four ticker-named files are 25-line reduced fixtures with 20 valid messages plus 5 intentionally malformed rows each. `sample_messages.csv` is a legacy generic fixture with the same contents as `AAPL_sample_messages.csv`, kept because the parser and Python integration tests reference it directly.
 
-They are intentionally tiny and deterministic so the build, tests, and benchmark report can run in CI or on a fresh clone without external data dependencies. They are suitable for correctness checks and relative replay comparisons, not production-grade market simulation.
+These files are intentionally tiny and deterministic so the build, tests, and benchmark workflow can run on a fresh clone without external data dependencies. They are suitable for correctness checks and relative replay comparisons, not production-grade market simulation or claims about full vendor data.
 
 ## Why this is useful for quant / HFT workflows