trilogy-group · kumanday · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,118 @@
+# StackPerf CI Pipeline
+# Runs quality gates on all PRs and main branch pushes
+
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  quality:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: "pip"
+
+      - name: Sync dependencies
+        run: uv sync --all-extras
+
+      - name: Run linter
+        run: uv run ruff check src/ tests/
+
+      - name: Check formatting
+        run: uv run ruff format --check src/ tests/
+
+      - name: Run type checker
+        run: uv run mypy src/
+
+      - name: Run tests
+        run: uv run pytest tests/ -v
+
+      - name: Upload coverage
+        uses: codecov/codecov-action@v4
+        if: success()
+        with:
+          directory: ./coverage
+          fail_ci_if_error: false
+          files: ./coverage.xml
+
+  config-validation:
+    runs-on: ubuntu-latest
+    needs: quality
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Sync dependencies
+        run: uv sync --all-extras
+
+      - name: Validate configs
+        run: uv run stackperf validate --all-configs
+        continue-on-error: true
+
+  migration-smoke:
+    runs-on: ubuntu-latest
+    needs: quality
+    services:
+      postgres:
+        image: postgres:16
+        env:
+          POSTGRES_USER: test
+          POSTGRES_PASSWORD: test
+          POSTGRES_DB: stackperf_test
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+        with:
+          version: "latest"
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Sync dependencies
+        run: uv sync --all-extras
+
+      - name: Run migration smoke test
+        run: uv run pytest tests/integration/test_migrations.py -v
+        env:
+          DATABASE_URL: postgresql+asyncpg://test:test@localhost:5432/stackperf_test
+        continue-on-error: true
diff --git a/.gitignore b/.gitignore
@@ -1 +1,95 @@
-old
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.nox/
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+
+# Type checking
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Linting
+.ruff_cache/
+
+# Build artifacts
+*.manifest
+*.spec
+
+# Secrets - NEVER commit these
+.env
+.env.local
+.env.*.local
+*.pem
+*.key
+secrets/
+ configs/secrets/
+
+# Generated session artifacts (security)
+.session-artifacts/
+exports/
+*.env.generated
+
+# Database
+*.db
+*.sqlite
+*.sqlite3
+
+# Logs
+logs/
+*.log
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Project-specific ignores
+# Generated harness environment snippets should be ignored
+harness-env-*.sh
+harness-env-*.env
+
+# LiteLLM local data (if running locally)
+litellm-data/
diff --git a/Makefile b/Makefile
@@ -0,0 +1,72 @@
+# StackPerf Makefile
+# CI-aligned commands for local development
+
+.PHONY: help sync lint type test check ci clean build
+
+# Default target
+help:
+	@echo "StackPerf Development Commands"
+	@echo "==============================="
+	@echo ""
+	@echo "Setup & Sync:"
+	@echo "  sync       Sync dependencies with uv"
+	@echo "  clean      Remove build artifacts and caches"
+	@echo ""
+	@echo "Quality Gates:"
+	@echo "  lint       Run ruff linter"
+	@echo "  type       Run mypy type checker"
+	@echo "  test       Run pytest test suite"
+	@echo "  check      Run all quality gates (lint + type + test)"
+	@echo "  ci         Run full CI pipeline (same as check)"
+	@echo ""
+	@echo "Build:"
+	@echo "  build      Build distribution packages"
+	@echo ""
+
+# Setup & Sync
+sync:
+	uv sync --all-extras
+
+# Quality Gates
+lint:
+	uv run ruff check src/ tests/
+
+lint-fix:
+	uv run ruff check --fix src/ tests/
+
+format:
+	uv run ruff format src/ tests/
+
+format-check:
+	uv run ruff format --check src/ tests/
+
+type:
+	uv run mypy src/
+
+test:
+	uv run pytest tests/ -v
+
+test-cov:
+	uv run pytest tests/ --cov=src --cov-report=term-missing
+
+# Full CI pipeline (runs all checks)
+check: lint type test
+	@echo "All quality gates passed ✓"
+
+ci: check
+	@echo "CI pipeline completed ✓"
+
+# Build
+build:
+	uv build
+
+# Clean
+clean:
+	rm -rf .pytest_cache/
+	rm -rf .mypy_cache/
+	rm -rf .ruff_cache/
+	rm -rf htmlcov/
+	rm -rf dist/
+	rm -rf *.egg-info/
+	find . -type d -name "__pycache__" -exec rm -rf {} +
+	find . -type f -name "*.pyc" -delete
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,106 @@
+[project]
+name = "stackperf"
+version = "0.1.0"
+description = "Harness-agnostic benchmarking system for comparing providers, models, and harnesses"
+readme = "README.md"
+requires-python = ">=3.11"
+license = { text = "Proprietary" }
+authors = [{ name = "Trilogy AI COE" }]
+dependencies = [
+    "pydantic>=2.5.0",
+    "pyyaml>=6.0.1",
+    "sqlalchemy>=2.0.25",
+    "alembic>=1.13.0",
+    "asyncpg>=0.29.0",
+    "httpx>=0.26.0",
+    "click>=8.1.7",
+    "rich>=13.7.0",
+    "prometheus-client>=0.19.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.4.3",
+    "pytest-asyncio>=0.23.2",
+    "pytest-cov>=4.1.0",
+    "ruff>=0.1.9",
+    "mypy>=1.8.0",
+    "types-pyyaml>=6.0.12",
+]
+
+[project.scripts]
+stackperf = "cli:main"
+bench = "cli:main"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src"]
+
+[tool.ruff]
+target-version = "py311"
+line-length = 100
+
+[tool.ruff.lint]
+select = [
+    "E",      # pycodestyle errors
+    "F",      # Pyflakes
+    "I",      # isort
+    "UP",     # pyupgrade
+    "B",      # flake8-bugbear
+    "C4",     # flake8-comprehensions
+    "SIM",    # flake8-simplify
+    "TCH",    # flake8-type-checking
+    "RUF",    # Ruff-specific rules
+    "D",      # pydocstyle
+]
+ignore = [
+    "D100",   # Missing docstring in public module
+    "D104",   # Missing docstring in public package
+    "D107",   # Missing docstring in __init__
+    "UP042",  # Use StrEnum (keep str, Enum for broader compatibility)
+]
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"
+
+[tool.ruff.lint.isort]
+known-first-party = ["benchmark_core", "cli", "collectors", "reporting", "api"]
+
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = ["E501"]  # Allow long lines in test fixtures (synthetic secrets)
+"src/benchmark_core/security/secrets.py" = ["E501"]  # Synthetic secrets are long
+
+[tool.mypy]
+python_version = "3.11"
+strict = true
+warn_return_any = true
+warn_unused_ignores = true
+disallow_untyped_defs = true
+plugins = ["pydantic.mypy"]
+
+[[tool.mypy.overrides]]
+module = ["prometheus_client.*", "asyncpg.*"]
+ignore_missing_imports = true
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+asyncio_mode = "auto"
+addopts = "-v --tb=short"
+filterwarnings = [
+    "ignore::DeprecationWarning",
+]
+
+[tool.coverage.run]
+source = ["src"]
+branch = true
+omit = ["tests/*", "*/__main__.py"]
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "if TYPE_CHECKING:",
+    "raise NotImplementedError",
+]
diff --git a/src/__init__.py b/src/__init__.py
@@ -0,0 +1,3 @@
+"""StackPerf benchmarking system."""
+
+__version__ = "0.1.0"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		"""StackPerf benchmarking system."""

		__version__ = "0.1.0"