diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
new file mode 100644
index 0000000..0bba817
--- /dev/null
+++ b/.github/workflows/test.yaml
@@ -0,0 +1,126 @@
+name: tests
+on:
+  push:
+    branches: [master]
+  pull_request:
+
+jobs:
+  python-tests:
+    name: Python tests (${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: ${{ github.repository == 'knostic/OpenAnt' && fromJSON('["ubuntu-latest", "macos-latest", "windows-latest"]') || fromJSON('["ubuntu-latest"]') }}
+
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.11"
+          cache: "pip"
+          cache-dependency-path: libs/openant-core/requirements.txt
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v6
+        with:
+          node-version: "22"
+          cache: "npm"
+          cache-dependency-path: libs/openant-core/parsers/javascript/package-lock.json
+
+      - name: Install Python dependencies
+        working-directory: libs/openant-core
+        run: pip install -r requirements.txt && pip install pytest
+
+      - name: Cache JS parser node_modules
+        id: cache-node-modules
+        uses: actions/cache@v4
+        with:
+          path: libs/openant-core/parsers/javascript/node_modules
+          key: ${{ runner.os }}-jsparser-nodemodules-${{ hashFiles('libs/openant-core/parsers/javascript/package-lock.json') }}
+
+      - name: Install JS parser dependencies
+        if: steps.cache-node-modules.outputs.cache-hit != 'true'
+        working-directory: libs/openant-core/parsers/javascript
+        run: npm ci
+
+      - name: Run Python and parser tests
+        working-directory: libs/openant-core
+        run: python -m pytest tests/test_token_tracker.py tests/test_parser_adapter.py tests/test_python_parser.py tests/test_js_parser.py -v
+
+  go-tests:
+    name: Go build + integration (${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: ${{ github.repository == 'knostic/OpenAnt' && fromJSON('["ubuntu-latest", "macos-latest", "windows-latest"]') || fromJSON('["ubuntu-latest"]') }}
+
+    steps:
+      - uses: actions/checkout@v6
+
+      - name: Set up Go
+        uses: actions/setup-go@v6
+        with:
+          go-version-file: apps/openant-cli/go.mod
+          cache-dependency-path: apps/openant-cli/go.sum
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.11"
+          cache: "pip"
+          cache-dependency-path: libs/openant-core/requirements.txt
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v6
+        with:
+          node-version: "22"
+          cache: "npm"
+          cache-dependency-path: libs/openant-core/parsers/javascript/package-lock.json
+
+      - name: Vet
+        working-directory: apps/openant-cli
+        run: go vet ./...
+
+      - name: Build (Linux/macOS)
+        if: runner.os != 'Windows'
+        working-directory: apps/openant-cli
+        run: go build -o openant .
+
+      - name: Build (Windows)
+        if: runner.os == 'Windows'
+        working-directory: apps/openant-cli
+        run: go build -o openant.exe .
+
+      - name: Verify binary exists
+        working-directory: apps/openant-cli
+        shell: bash
+        run: |
+          if [ -f openant ] || [ -f openant.exe ]; then
+            echo "Binary built successfully"
+          else
+            echo "ERROR: Binary not found" && exit 1
+          fi
+
+      - name: Install Python dependencies
+        working-directory: libs/openant-core
+        run: pip install -r requirements.txt && pip install pytest
+
+      - name: Cache JS parser node_modules
+        id: cache-node-modules
+        uses: actions/cache@v4
+        with:
+          path: libs/openant-core/parsers/javascript/node_modules
+          key: ${{ runner.os }}-jsparser-nodemodules-${{ hashFiles('libs/openant-core/parsers/javascript/package-lock.json') }}
+
+      - name: Install JS parser dependencies
+        if: steps.cache-node-modules.outputs.cache-hit != 'true'
+        working-directory: libs/openant-core/parsers/javascript
+        run: npm ci
+
+      - name: Run Go CLI integration tests
+        working-directory: libs/openant-core
+        run: python -m pytest tests/test_go_cli.py -v --tb=short
diff --git a/libs/openant-core/pytest.ini b/libs/openant-core/pytest.ini
new file mode 100644
index 0000000..4584de7
--- /dev/null
+++ b/libs/openant-core/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+testpaths = tests
+pythonpath = .
diff --git a/libs/openant-core/tests/__init__.py b/libs/openant-core/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/libs/openant-core/tests/conftest.py b/libs/openant-core/tests/conftest.py
new file mode 100644
index 0000000..affe238
--- /dev/null
+++ b/libs/openant-core/tests/conftest.py
@@ -0,0 +1,32 @@
+"""Shared fixtures for OpenAnt tests."""
+import sys
+from pathlib import Path
+
+import pytest
+
+# Ensure the project root is on sys.path so imports like `from utilities...` work
+PROJECT_ROOT = Path(__file__).parent.parent
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+FIXTURES_DIR = Path(__file__).parent / "fixtures"
+SAMPLE_PYTHON_REPO = FIXTURES_DIR / "sample_python_repo"
+SAMPLE_JS_REPO = FIXTURES_DIR / "sample_js_repo"
+
+
+@pytest.fixture
+def sample_python_repo():
+    """Path to the sample Python repository fixture."""
+    return str(SAMPLE_PYTHON_REPO)
+
+
+@pytest.fixture
+def sample_js_repo():
+    """Path to the sample JavaScript repository fixture."""
+    return str(SAMPLE_JS_REPO)
+
+
+@pytest.fixture
+def tmp_output_dir(tmp_path):
+    """Temporary output directory for parser results."""
+    return str(tmp_path / "output")
diff --git a/libs/openant-core/tests/fixtures/sample_js_repo/src/app.js b/libs/openant-core/tests/fixtures/sample_js_repo/src/app.js
new file mode 100644
index 0000000..2a47f9f
--- /dev/null
+++ b/libs/openant-core/tests/fixtures/sample_js_repo/src/app.js
@@ -0,0 +1,24 @@
+const express = require("express");
+const { getUser, createUser } = require("./db");
+
+const app = express();
+app.use(express.json());
+
+app.get("/users/:id", async (req, res) => {
+  const user = await getUser(req.params.id);
+  if (!user) {
+    return res.status(404).json({ error: "Not found" });
+  }
+  res.json(user);
+});
+
+app.post("/users", async (req, res) => {
+  const { name } = req.body;
+  if (!name) {
+    return res.status(400).json({ error: "Name required" });
+  }
+  const user = await createUser(name);
+  res.status(201).json(user);
+});
+
+module.exports = app;
diff --git a/libs/openant-core/tests/fixtures/sample_js_repo/src/db.js b/libs/openant-core/tests/fixtures/sample_js_repo/src/db.js
new file mode 100644
index 0000000..530810c
--- /dev/null
+++ b/libs/openant-core/tests/fixtures/sample_js_repo/src/db.js
@@ -0,0 +1,29 @@
+const sqlite3 = require("sqlite3");
+
+function getConnection() {
+  return new sqlite3.Database("app.db");
+}
+
+async function getUser(id) {
+  const db = getConnection();
+  return new Promise((resolve, reject) => {
+    db.get("SELECT * FROM users WHERE id = ?", [id], (err, row) => {
+      db.close();
+      if (err) reject(err);
+      else resolve(row || null);
+    });
+  });
+}
+
+async function createUser(name) {
+  const db = getConnection();
+  return new Promise((resolve, reject) => {
+    db.run("INSERT INTO users (name) VALUES (?)", [name], function (err) {
+      db.close();
+      if (err) reject(err);
+      else resolve({ id: this.lastID, name });
+    });
+  });
+}
+
+module.exports = { getUser, createUser, getConnection };
diff --git a/libs/openant-core/tests/fixtures/sample_js_repo/src/utils.js b/libs/openant-core/tests/fixtures/sample_js_repo/src/utils.js
new file mode 100644
index 0000000..9b3b986
--- /dev/null
+++ b/libs/openant-core/tests/fixtures/sample_js_repo/src/utils.js
@@ -0,0 +1,12 @@
+function sanitizeInput(value) {
+  if (typeof value !== "string") {
+    return String(value);
+  }
+  return value.trim();
+}
+
+function validateEmail(email) {
+  return email.includes("@") && email.includes(".");
+}
+
+module.exports = { sanitizeInput, validateEmail };
diff --git a/libs/openant-core/tests/fixtures/sample_python_repo/app.py b/libs/openant-core/tests/fixtures/sample_python_repo/app.py
new file mode 100644
index 0000000..fac8da3
--- /dev/null
+++ b/libs/openant-core/tests/fixtures/sample_python_repo/app.py
@@ -0,0 +1,23 @@
+"""Sample Flask app for testing."""
+from flask import Flask, request, jsonify
+from .db import get_user, create_user
+
+app = Flask(__name__)
+
+
+@app.route("/users/<int:user_id>")
+def get_user_endpoint(user_id):
+    user = get_user(user_id)
+    if not user:
+        return jsonify({"error": "Not found"}), 404
+    return jsonify(user)
+
+
+@app.route("/users", methods=["POST"])
+def create_user_endpoint():
+    data = request.get_json()
+    name = data.get("name")
+    if not name:
+        return jsonify({"error": "Name required"}), 400
+    user = create_user(name)
+    return jsonify(user), 201
diff --git a/libs/openant-core/tests/fixtures/sample_python_repo/db.py b/libs/openant-core/tests/fixtures/sample_python_repo/db.py
new file mode 100644
index 0000000..25ad871
--- /dev/null
+++ b/libs/openant-core/tests/fixtures/sample_python_repo/db.py
@@ -0,0 +1,25 @@
+"""Sample database module for testing."""
+import sqlite3
+
+
+def get_connection():
+    return sqlite3.connect("app.db")
+
+
+def get_user(user_id):
+    conn = get_connection()
+    cursor = conn.execute("SELECT * FROM users WHERE id = ?", (user_id,))
+    row = cursor.fetchone()
+    conn.close()
+    if row:
+        return {"id": row[0], "name": row[1]}
+    return None
+
+
+def create_user(name):
+    conn = get_connection()
+    cursor = conn.execute("INSERT INTO users (name) VALUES (?)", (name,))
+    conn.commit()
+    user_id = cursor.lastrowid
+    conn.close()
+    return {"id": user_id, "name": name}
diff --git a/libs/openant-core/tests/fixtures/sample_python_repo/utils.py b/libs/openant-core/tests/fixtures/sample_python_repo/utils.py
new file mode 100644
index 0000000..4fded7f
--- /dev/null
+++ b/libs/openant-core/tests/fixtures/sample_python_repo/utils.py
@@ -0,0 +1,11 @@
+"""Sample utility module for testing."""
+
+
+def sanitize_input(value):
+    if not isinstance(value, str):
+        return str(value)
+    return value.strip()
+
+
+def validate_email(email):
+    return "@" in email and "." in email
diff --git a/libs/openant-core/tests/test_go_cli.py b/libs/openant-core/tests/test_go_cli.py
new file mode 100644
index 0000000..9484d4b
--- /dev/null
+++ b/libs/openant-core/tests/test_go_cli.py
@@ -0,0 +1,158 @@
+"""Integration tests for the Go CLI wrapper (openant.exe).
+
+These tests invoke the real compiled binary and verify it correctly
+delegates to the Python core. They test the wrapper, not the LLM pipeline —
+so they use parse-only commands that don't require an API key.
+"""
+import json
+import os
+import subprocess
+import shutil
+import sys
+from pathlib import Path
+
+import pytest
+
+CLI_DIR = Path(__file__).parent.parent.parent.parent / "apps" / "openant-cli"
+BINARY_NAME = "openant.exe" if sys.platform == "win32" else "openant"
+BINARY = CLI_DIR / BINARY_NAME
+
+pytestmark = pytest.mark.skipif(
+    not BINARY.exists(),
+    reason=f"Go binary not built at {BINARY}. Run: cd apps/openant-cli && go build -o {BINARY_NAME} .",
+)
+
+
+def run_cli(*args, env_override=None):
+    """Run the openant CLI binary and return the CompletedProcess."""
+    env = os.environ.copy()
+    # Don't let the test hit any real API
+    env.pop("ANTHROPIC_API_KEY", None)
+    env.pop("OPENANT_LOCAL_CLAUDE", None)
+    if env_override:
+        env.update(env_override)
+    return subprocess.run(
+        [str(BINARY)] + list(args),
+        capture_output=True,
+        text=True,
+        timeout=30,
+        env=env,
+    )
+
+
+class TestVersion:
+    def test_version_runs(self):
+        result = run_cli("version")
+        assert result.returncode == 0
+        assert "openant" in result.stderr.lower() or "openant" in result.stdout.lower()
+
+    def test_version_subcommand(self):
+        result = run_cli("version")
+        assert result.returncode == 0
+
+
+class TestHelp:
+    def test_help(self):
+        result = run_cli("--help")
+        assert result.returncode == 0
+        output = result.stdout + result.stderr
+        assert "scan" in output
+        assert "parse" in output
+
+    def test_parse_help(self):
+        result = run_cli("parse", "--help")
+        assert result.returncode == 0
+        output = result.stdout + result.stderr
+        assert "repository" in output.lower()
+
+    def test_scan_help(self):
+        result = run_cli("scan", "--help")
+        assert result.returncode == 0
+        output = result.stdout + result.stderr
+        assert "pipeline" in output.lower()
+
+
+class TestParse:
+    def test_parse_python_repo(self, sample_python_repo, tmp_path):
+        output_dir = str(tmp_path / "output")
+        result = run_cli(
+            "parse", sample_python_repo,
+            "--output", output_dir,
+            "--language", "python",
+            "--json",
+        )
+        assert result.returncode == 0
+
+        envelope = json.loads(result.stdout)
+        assert envelope["status"] == "success"
+
+    def test_parse_produces_dataset(self, sample_python_repo, tmp_path):
+        output_dir = str(tmp_path / "output")
+        run_cli(
+            "parse", sample_python_repo,
+            "--output", output_dir,
+            "--language", "python",
+        )
+        dataset = Path(output_dir) / "dataset.json"
+        assert dataset.exists()
+        data = json.loads(dataset.read_text())
+        assert "units" in data
+        assert len(data["units"]) > 0
+
+    def test_parse_auto_detect(self, sample_python_repo, tmp_path):
+        output_dir = str(tmp_path / "output")
+        result = run_cli(
+            "parse", sample_python_repo,
+            "--output", output_dir,
+            "--json",
+        )
+        assert result.returncode == 0
+        envelope = json.loads(result.stdout)
+        assert envelope["status"] == "success"
+
+    def test_parse_js_repo(self, sample_js_repo, tmp_path):
+        """JS parsing via Go CLI. May fail if the Go CLI finds system Python
+        instead of the venv (missing anthropic package).
+        """
+        output_dir = str(tmp_path / "output")
+        result = run_cli(
+            "parse", sample_js_repo,
+            "--output", output_dir,
+            "--language", "javascript",
+            "--json",
+        )
+        if result.returncode != 0:
+            if "No module named" in result.stderr:
+                pytest.skip("Go CLI using system Python without required packages")
+            if "UnicodeEncodeError" in result.stderr:
+                pytest.skip("Pre-existing Unicode bug in JS test_pipeline.py on Windows")
+        assert result.returncode == 0
+        envelope = json.loads(result.stdout)
+        assert envelope["status"] == "success"
+
+    def test_parse_missing_repo(self, tmp_path):
+        result = run_cli(
+            "parse", str(tmp_path / "nonexistent"),
+            "--output", str(tmp_path / "out"),
+        )
+        assert result.returncode != 0
+
+    def test_parse_json_output_is_valid(self, sample_python_repo, tmp_path):
+        output_dir = str(tmp_path / "output")
+        result = run_cli(
+            "parse", sample_python_repo,
+            "--output", output_dir,
+            "--json",
+        )
+        # Should always produce valid JSON on stdout when --json is used
+        envelope = json.loads(result.stdout)
+        assert "status" in envelope
+
+
+class TestApiKeyHandling:
+    def test_scan_requires_api_key(self, sample_python_repo):
+        """Scan should fail without an API key."""
+        result = run_cli("scan", sample_python_repo)
+        output = result.stderr + result.stdout
+        assert result.returncode != 0
+        assert "api key" in output.lower()
diff --git a/libs/openant-core/tests/test_js_parser.py b/libs/openant-core/tests/test_js_parser.py
new file mode 100644
index 0000000..25bf951
--- /dev/null
+++ b/libs/openant-core/tests/test_js_parser.py
@@ -0,0 +1,226 @@
+"""Tests for the JavaScript parser pipeline.
+
+Requires Node.js and npm dependencies installed:
+  cd parsers/javascript && npm install
+"""
+import json
+import subprocess
+import shutil
+import sys
+from pathlib import Path
+
+import pytest
+
+PARSERS_JS_DIR = Path(__file__).parent.parent / "parsers" / "javascript"
+NODE_MODULES = PARSERS_JS_DIR / "node_modules"
+
+# Skip all tests if node or npm deps aren't available
+pytestmark = pytest.mark.skipif(
+    not shutil.which("node") or not NODE_MODULES.exists(),
+    reason="Node.js or JS parser npm dependencies not available",
+)
+
+
+def run_node(script_name, *args):
+    """Run a Node.js script from the JS parsers directory."""
+    cmd = ["node", str(PARSERS_JS_DIR / script_name)] + list(args)
+    result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
+    return result
+
+
+class TestRepositoryScanner:
+    def test_scans_js_repo(self, sample_js_repo, tmp_path):
+        output = tmp_path / "scan_results.json"
+        result = run_node("repository_scanner.js", sample_js_repo, "--output", str(output))
+        assert result.returncode == 0
+        assert output.exists()
+
+        data = json.loads(output.read_text())
+        assert data["statistics"]["totalFiles"] == 3
+
+    def test_finds_js_files(self, sample_js_repo, tmp_path):
+        output = tmp_path / "scan_results.json"
+        run_node("repository_scanner.js", sample_js_repo, "--output", str(output))
+        data = json.loads(output.read_text())
+
+        paths = [f["path"] for f in data["files"]]
+        assert any("app.js" in p for p in paths)
+        assert any("db.js" in p for p in paths)
+        assert any("utils.js" in p for p in paths)
+
+    def test_skip_tests_flag(self, tmp_path):
+        repo = tmp_path / "repo"
+        repo.mkdir()
+        (repo / "index.js").write_text("module.exports = {};")
+        test_dir = repo / "__tests__"
+        test_dir.mkdir()
+        (test_dir / "index.test.js").write_text("test('foo', () => {});")
+
+        output = tmp_path / "scan.json"
+        run_node("repository_scanner.js", str(repo), "--output", str(output), "--skip-tests")
+        data = json.loads(output.read_text())
+
+        paths = [f["path"] for f in data["files"]]
+        assert any("index.js" in p for p in paths)
+        assert not any("test" in p.lower() for p in paths)
+
+
+class TestTypeScriptAnalyzer:
+    # Known issue: ts-morph fails to resolve files with backslash paths on Windows
+    _windows_path_xfail = pytest.mark.xfail(
+        sys.platform == "win32",
+        reason="ts-morph path resolution issue with Windows backslash paths",
+        strict=False,
+    )
+
+    def test_analyzes_files(self, sample_js_repo, tmp_path):
+        # First scan to get file list
+        scan_output = tmp_path / "scan.json"
+        run_node("repository_scanner.js", sample_js_repo, "--output", str(scan_output))
+        scan_data = json.loads(scan_output.read_text())
+
+        # Write file list
+        file_list = tmp_path / "files.txt"
+        file_list.write_text("\n".join(f["path"] for f in scan_data["files"]))
+
+        # Run analyzer
+        analyzer_output = tmp_path / "analyzer_output.json"
+        result = run_node(
+            "typescript_analyzer.js",
+            sample_js_repo,
+            "--files-from", str(file_list),
+            "--output", str(analyzer_output),
+        )
+        assert result.returncode == 0
+        assert analyzer_output.exists()
+
+    @_windows_path_xfail
+    def test_extracts_functions(self, sample_js_repo, tmp_path):
+        scan_output = tmp_path / "scan.json"
+        run_node("repository_scanner.js", sample_js_repo, "--output", str(scan_output))
+        scan_data = json.loads(scan_output.read_text())
+
+        file_list = tmp_path / "files.txt"
+        file_list.write_text("\n".join(f["path"] for f in scan_data["files"]))
+
+        analyzer_output = tmp_path / "analyzer_output.json"
+        run_node(
+            "typescript_analyzer.js",
+            sample_js_repo,
+            "--files-from", str(file_list),
+            "--output", str(analyzer_output),
+        )
+        data = json.loads(analyzer_output.read_text())
+
+        assert "functions" in data
+        func_names = [f.get("name", "") for f in data["functions"].values()]
+        assert "getUser" in func_names
+        assert "createUser" in func_names
+        assert "getConnection" in func_names
+
+    def test_builds_call_graph(self, sample_js_repo, tmp_path):
+        scan_output = tmp_path / "scan.json"
+        run_node("repository_scanner.js", sample_js_repo, "--output", str(scan_output))
+        scan_data = json.loads(scan_output.read_text())
+
+        file_list = tmp_path / "files.txt"
+        file_list.write_text("\n".join(f["path"] for f in scan_data["files"]))
+
+        analyzer_output = tmp_path / "analyzer_output.json"
+        run_node(
+            "typescript_analyzer.js",
+            sample_js_repo,
+            "--files-from", str(file_list),
+            "--output", str(analyzer_output),
+        )
+        data = json.loads(analyzer_output.read_text())
+
+        assert "callGraph" in data
+        # Call graph keys should match extracted functions
+        assert len(data["callGraph"]) == len(data["functions"])
+
+
+class TestUnitGenerator:
+    @pytest.fixture
+    def analyzer_output(self, sample_js_repo, tmp_path):
+        scan_output = tmp_path / "scan.json"
+        run_node("repository_scanner.js", sample_js_repo, "--output", str(scan_output))
+        scan_data = json.loads(scan_output.read_text())
+
+        file_list = tmp_path / "files.txt"
+        file_list.write_text("\n".join(f["path"] for f in scan_data["files"]))
+
+        output = tmp_path / "analyzer_output.json"
+        run_node(
+            "typescript_analyzer.js",
+            sample_js_repo,
+            "--files-from", str(file_list),
+            "--output", str(output),
+        )
+        return str(output)
+
+    def test_generates_dataset(self, analyzer_output, tmp_path):
+        dataset_output = tmp_path / "dataset.json"
+        result = run_node(
+            "unit_generator.js",
+            analyzer_output,
+            "--output", str(dataset_output),
+        )
+        assert result.returncode == 0
+        assert Path(dataset_output).exists()
+
+        data = json.loads(Path(dataset_output).read_text())
+        assert "units" in data
+        assert len(data["units"]) > 0
+
+    def test_units_have_required_fields(self, analyzer_output, tmp_path):
+        dataset_output = tmp_path / "dataset.json"
+        run_node(
+            "unit_generator.js",
+            analyzer_output,
+            "--output", str(dataset_output),
+        )
+        data = json.loads(Path(dataset_output).read_text())
+
+        for unit in data["units"]:
+            assert "id" in unit
+            assert "code" in unit
+
+
+class TestFullPipeline:
+    """End-to-end test through parser_adapter."""
+
+    # Known issue: test_pipeline.py uses Unicode checkmarks that fail on Windows cp1252
+    _windows_unicode_xfail = pytest.mark.xfail(
+        sys.platform == "win32",
+        reason="JS test_pipeline.py Unicode chars fail on Windows cp1252 encoding",
+        strict=False,
+    )
+
+    @_windows_unicode_xfail
+    def test_parse_js_repo(self, sample_js_repo, tmp_output_dir):
+        from core.parser_adapter import parse_repository
+
+        result = parse_repository(
+            repo_path=sample_js_repo,
+            output_dir=tmp_output_dir,
+            language="javascript",
+            processing_level="all",
+        )
+        assert result.language == "javascript"
+        assert result.units_count > 0
+        assert Path(result.dataset_path).exists()
+        assert result.analyzer_output_path is not None
+        assert Path(result.analyzer_output_path).exists()
+
+    @_windows_unicode_xfail
+    def test_auto_detects_javascript(self, sample_js_repo, tmp_output_dir):
+        from core.parser_adapter import parse_repository
+
+        result = parse_repository(
+            repo_path=sample_js_repo,
+            output_dir=tmp_output_dir,
+            language="auto",
+            processing_level="all",
+        )
+        assert result.language == "javascript"
diff --git a/libs/openant-core/tests/test_parser_adapter.py b/libs/openant-core/tests/test_parser_adapter.py
new file mode 100644
index 0000000..af209cb
--- /dev/null
+++ b/libs/openant-core/tests/test_parser_adapter.py
@@ -0,0 +1,106 @@
+"""Tests for core/parser_adapter.py — language detection and Python parsing."""
+import json
+import os
+from pathlib import Path
+
+import pytest
+
+from core.parser_adapter import detect_language, parse_repository
+
+
+class TestDetectLanguage:
+    def test_python_repo(self, sample_python_repo):
+        assert detect_language(sample_python_repo) == "python"
+
+    def test_empty_dir_raises(self, tmp_path):
+        with pytest.raises(ValueError, match="No supported source files"):
+            detect_language(str(tmp_path))
+
+    def test_javascript_repo(self, tmp_path):
+        (tmp_path / "index.js").write_text("console.log('hi');")
+        (tmp_path / "utils.js").write_text("export function foo() {}")
+        assert detect_language(str(tmp_path)) == "javascript"
+
+    def test_mixed_repo_picks_majority(self, tmp_path):
+        # 3 Python files, 1 JS file — should pick Python
+        for name in ["a.py", "b.py", "c.py"]:
+            (tmp_path / name).write_text("pass")
+        (tmp_path / "d.js").write_text("//js")
+        assert detect_language(str(tmp_path)) == "python"
+
+    def test_ignores_node_modules(self, tmp_path):
+        (tmp_path / "app.py").write_text("pass")
+        nm = tmp_path / "node_modules" / "pkg"
+        nm.mkdir(parents=True)
+        (nm / "index.js").write_text("//js")
+        (nm / "util.js").write_text("//js")
+        (nm / "helper.js").write_text("//js")
+        assert detect_language(str(tmp_path)) == "python"
+
+    def test_ignores_venv(self, tmp_path):
+        (tmp_path / "app.go").write_text("package main")
+        venv = tmp_path / ".venv" / "lib"
+        venv.mkdir(parents=True)
+        for i in range(10):
+            (venv / f"mod{i}.py").write_text("pass")
+        assert detect_language(str(tmp_path)) == "go"
+
+
+class TestParseRepositoryPython:
+    def test_parses_sample_repo(self, sample_python_repo, tmp_output_dir):
+        result = parse_repository(
+            repo_path=sample_python_repo,
+            output_dir=tmp_output_dir,
+            language="python",
+            processing_level="all",
+        )
+        assert result.language == "python"
+        assert result.units_count > 0
+        assert Path(result.dataset_path).exists()
+
+    def test_dataset_json_valid(self, sample_python_repo, tmp_output_dir):
+        result = parse_repository(
+            repo_path=sample_python_repo,
+            output_dir=tmp_output_dir,
+            language="python",
+            processing_level="all",
+        )
+        with open(result.dataset_path) as f:
+            dataset = json.load(f)
+        assert "units" in dataset
+        assert len(dataset["units"]) > 0
+
+    def test_units_have_required_fields(self, sample_python_repo, tmp_output_dir):
+        result = parse_repository(
+            repo_path=sample_python_repo,
+            output_dir=tmp_output_dir,
+            language="python",
+            processing_level="all",
+        )
+        with open(result.dataset_path) as f:
+            dataset = json.load(f)
+        for unit in dataset["units"]:
+            assert "id" in unit
+            assert "code" in unit
+
+    def test_auto_detect_language(self, sample_python_repo, tmp_output_dir):
+        result = parse_repository(
+            repo_path=sample_python_repo,
+            output_dir=tmp_output_dir,
+            language="auto",
+            processing_level="all",
+        )
+        assert result.language == "python"
+
+    def test_analyzer_output_generated(self, sample_python_repo, tmp_output_dir):
+        result = parse_repository(
+            repo_path=sample_python_repo,
+            output_dir=tmp_output_dir,
+            language="python",
+            processing_level="all",
+        )
+        assert result.analyzer_output_path is not None
+        assert Path(result.analyzer_output_path).exists()
+        with open(result.analyzer_output_path) as f:
+            data = json.load(f)
+        assert "functions" in data
diff --git a/libs/openant-core/tests/test_python_parser.py b/libs/openant-core/tests/test_python_parser.py
new file mode 100644
index 0000000..aaf5b55
--- /dev/null
+++ b/libs/openant-core/tests/test_python_parser.py
@@ -0,0 +1,220 @@
+"""Tests for the Python parser phases (scanner, extractor, call graph, unit generator)."""
+import sys
+from pathlib import Path
+
+import pytest
+
+# The parser modules use relative imports, so we need to add the parsers/python dir
+PARSERS_DIR = Path(__file__).parent.parent / "parsers" / "python"
+if str(PARSERS_DIR) not in sys.path:
+    sys.path.insert(0, str(PARSERS_DIR))
+
+from repository_scanner import RepositoryScanner
+from function_extractor import FunctionExtractor
+from call_graph_builder import CallGraphBuilder
+from unit_generator import UnitGenerator
+
+
+class TestRepositoryScanner:
+    def test_finds_python_files(self, sample_python_repo):
+        scanner = RepositoryScanner(sample_python_repo)
+        result = scanner.scan()
+        assert result["statistics"]["total_files"] == 3
+        paths = [f["path"] for f in result["files"]]
+        assert any("app.py" in p for p in paths)
+        assert any("db.py" in p for p in paths)
+        assert any("utils.py" in p for p in paths)
+
+    def test_skip_tests_option(self, tmp_path):
+        (tmp_path / "main.py").write_text("pass")
+        (tmp_path / "test_main.py").write_text("pass")
+        (tmp_path / "tests").mkdir()
+        (tmp_path / "tests" / "test_foo.py").write_text("pass")
+
+        scanner = RepositoryScanner(str(tmp_path), {"skip_tests": True})
+        result = scanner.scan()
+        paths = [f["path"] for f in result["files"]]
+        assert any("main.py" in p for p in paths)
+        assert not any("test_main.py" in p for p in paths)
+        assert not any("test_foo.py" in p for p in paths)
+
+    def test_records_file_sizes(self, sample_python_repo):
+        scanner = RepositoryScanner(sample_python_repo)
+        result = scanner.scan()
+        for f in result["files"]:
+            assert "size" in f
+            assert f["size"] > 0
+
+    def test_empty_repo(self, tmp_path):
+        scanner = RepositoryScanner(str(tmp_path))
+        result = scanner.scan()
+        assert result["statistics"]["total_files"] == 0
+        assert result["files"] == []
+
+
+class TestFunctionExtractor:
+    def test_extracts_functions(self, sample_python_repo):
+        scanner = RepositoryScanner(sample_python_repo)
+        scan_result = scanner.scan()
+        extractor = FunctionExtractor(sample_python_repo)
+        result = extractor.extract_from_scan(scan_result)
+
+        assert "functions" in result
+        assert len(result["functions"]) > 0
+
+    def test_finds_known_functions(self, sample_python_repo):
+        scanner = RepositoryScanner(sample_python_repo)
+        scan_result = scanner.scan()
+        extractor = FunctionExtractor(sample_python_repo)
+        result = extractor.extract_from_scan(scan_result)
+
+        func_names = [f["name"] for f in result["functions"].values()]
+        assert "get_user" in func_names
+        assert "create_user" in func_names
+        assert "get_connection" in func_names
+        assert "sanitize_input" in func_names
+        assert "validate_email" in func_names
+
+    def test_extracts_route_handlers(self, sample_python_repo):
+        scanner = RepositoryScanner(sample_python_repo)
+        scan_result = scanner.scan()
+        extractor = FunctionExtractor(sample_python_repo)
+        result = extractor.extract_from_scan(scan_result)
+
+        func_names = [f["name"] for f in result["functions"].values()]
+        assert "get_user_endpoint" in func_names
+        assert "create_user_endpoint" in func_names
+
+    def test_captures_decorators(self, sample_python_repo):
+        scanner = RepositoryScanner(sample_python_repo)
+        scan_result = scanner.scan()
+        extractor = FunctionExtractor(sample_python_repo)
+        result = extractor.extract_from_scan(scan_result)
+
+        # Find the get_user_endpoint function
+        endpoint_funcs = [
+            f for f in result["functions"].values()
+            if f["name"] == "get_user_endpoint"
+        ]
+        assert len(endpoint_funcs) == 1
+        assert len(endpoint_funcs[0]["decorators"]) > 0
+
+    def test_statistics(self, sample_python_repo):
+        scanner = RepositoryScanner(sample_python_repo)
+        scan_result = scanner.scan()
+        extractor = FunctionExtractor(sample_python_repo)
+        result = extractor.extract_from_scan(scan_result)
+
+        stats = result["statistics"]
+        assert stats["total_functions"] > 0
+        assert stats["files_processed"] == 3
+
+    def test_function_has_code(self, sample_python_repo):
+        scanner = RepositoryScanner(sample_python_repo)
+        scan_result = scanner.scan()
+        extractor = FunctionExtractor(sample_python_repo)
+        result = extractor.extract_from_scan(scan_result)
+
+        for func in result["functions"].values():
+            assert "code" in func
+            assert len(func["code"]) > 0
+
+
+class TestCallGraphBuilder:
+    @pytest.fixture
+    def extractor_result(self, sample_python_repo):
+        scanner = RepositoryScanner(sample_python_repo)
+        scan_result = scanner.scan()
+        extractor = FunctionExtractor(sample_python_repo)
+        return extractor.extract_from_scan(scan_result)
+
+    def test_builds_call_graph(self, extractor_result):
+        builder = CallGraphBuilder(extractor_result)
+        builder.build_call_graph()
+        result = builder.export()
+
+        assert "call_graph" in result
+        assert "reverse_call_graph" in result
+
+    def test_detects_calls(self, extractor_result):
+        builder = CallGraphBuilder(extractor_result)
+        builder.build_call_graph()
+        result = builder.export()
+
+        # get_user_endpoint calls get_user
+        endpoint_key = [k for k in result["call_graph"] if "get_user_endpoint" in k]
+        assert len(endpoint_key) > 0
+        callees = result["call_graph"][endpoint_key[0]]
+        callee_names = [c.split(":")[-1] for c in callees]
+        assert "get_user" in callee_names
+
+    def test_reverse_graph(self, extractor_result):
+        builder = CallGraphBuilder(extractor_result)
+        builder.build_call_graph()
+        result = builder.export()
+
+        # get_user should be called by get_user_endpoint
+        get_user_key = [k for k in result["reverse_call_graph"] if k.endswith(":get_user")]
+        assert len(get_user_key) > 0
+
+    def test_statistics(self, extractor_result):
+        builder = CallGraphBuilder(extractor_result)
+        builder.build_call_graph()
+        result = builder.export()
+
+        stats = result["statistics"]
+        assert stats["total_edges"] > 0
+        assert "avg_out_degree" in stats
+
+
+class TestUnitGenerator:
+    @pytest.fixture
+    def call_graph_result(self, sample_python_repo):
+        scanner = RepositoryScanner(sample_python_repo)
+        scan_result = scanner.scan()
+        extractor = FunctionExtractor(sample_python_repo)
+        extractor_result = extractor.extract_from_scan(scan_result)
+        builder = CallGraphBuilder(extractor_result)
+        builder.build_call_graph()
+        return builder.export()
+
+    def test_generates_units(self, call_graph_result):
+        generator = UnitGenerator(call_graph_result)
+        dataset = generator.generate_units()
+
+        assert "units" in dataset
+        assert len(dataset["units"]) > 0
+
+    def test_units_have_id_and_code(self, call_graph_result):
+        generator = UnitGenerator(call_graph_result)
+        dataset = generator.generate_units()
+
+        for unit in dataset["units"]:
+            assert "id" in unit
+            assert "code" in unit
+            assert "primary_code" in unit["code"]
+
+    def test_units_have_metadata(self, call_graph_result):
+        generator = UnitGenerator(call_graph_result)
+        dataset = generator.generate_units()
+
+        for unit in dataset["units"]:
+            assert "metadata" in unit
+            assert "unit_type" in unit
+
+    def test_enhanced_code_includes_dependencies(self, call_graph_result):
+        generator = UnitGenerator(call_graph_result)
+        dataset = generator.generate_units()
+
+        # get_user_endpoint should have get_user's code included
+        endpoint_units = [u for u in dataset["units"] if "get_user_endpoint" in u["id"]]
+        assert len(endpoint_units) == 1
+        code = endpoint_units[0]["code"]["primary_code"]
+        assert "get_user" in code
+
+    def test_statistics(self, call_graph_result):
+        generator = UnitGenerator(call_graph_result)
+        dataset = generator.generate_units()
+
+        assert "statistics" in dataset
+        assert dataset["statistics"]["total_units"] == len(dataset["units"])
diff --git a/libs/openant-core/tests/test_token_tracker.py b/libs/openant-core/tests/test_token_tracker.py
new file mode 100644
index 0000000..08fdc9c
--- /dev/null
+++ b/libs/openant-core/tests/test_token_tracker.py
@@ -0,0 +1,73 @@
+"""Tests for TokenTracker."""
+from utilities.llm_client import TokenTracker, MODEL_PRICING
+
+
+class TestTokenTracker:
+    def test_initial_state(self):
+        tracker = TokenTracker()
+        assert tracker.total_input_tokens == 0
+        assert tracker.total_output_tokens == 0
+        assert tracker.total_tokens == 0
+        assert tracker.total_cost_usd == 0.0
+        assert tracker.calls == []
+
+    def test_record_call_known_model(self):
+        tracker = TokenTracker()
+        result = tracker.record_call("claude-sonnet-4-20250514", 1000, 500)
+
+        assert result["model"] == "claude-sonnet-4-20250514"
+        assert result["input_tokens"] == 1000
+        assert result["output_tokens"] == 500
+        # Sonnet: $3/M input, $15/M output
+        expected_cost = (1000 / 1_000_000) * 3.0 + (500 / 1_000_000) * 15.0
+        assert result["cost_usd"] == round(expected_cost, 6)
+
+    def test_record_call_unknown_model_uses_default(self):
+        tracker = TokenTracker()
+        result = tracker.record_call("some-future-model", 100, 50)
+        default_pricing = MODEL_PRICING["default"]
+        expected_cost = (100 / 1_000_000) * default_pricing["input"] + (50 / 1_000_000) * default_pricing["output"]
+        assert result["cost_usd"] == round(expected_cost, 6)
+
+    def test_cumulative_tracking(self):
+        tracker = TokenTracker()
+        tracker.record_call("claude-sonnet-4-20250514", 1000, 500)
+        tracker.record_call("claude-sonnet-4-20250514", 2000, 1000)
+
+        assert tracker.total_input_tokens == 3000
+        assert tracker.total_output_tokens == 1500
+        assert tracker.total_tokens == 4500
+        assert len(tracker.calls) == 2
+
+    def test_reset(self):
+        tracker = TokenTracker()
+        tracker.record_call("claude-sonnet-4-20250514", 1000, 500)
+        tracker.reset()
+
+        assert tracker.total_input_tokens == 0
+        assert tracker.total_output_tokens == 0
+        assert tracker.total_cost_usd == 0.0
+        assert tracker.calls == []
+
+    def test_get_summary_includes_calls(self):
+        tracker = TokenTracker()
+        tracker.record_call("claude-sonnet-4-20250514", 100, 50)
+        summary = tracker.get_summary()
+
+        assert summary["total_calls"] == 1
+        assert "calls" in summary
+        assert len(summary["calls"]) == 1
+
+    def test_get_totals_excludes_calls(self):
+        tracker = TokenTracker()
+        tracker.record_call("claude-sonnet-4-20250514", 100, 50)
+        totals = tracker.get_totals()
+
+        assert totals["total_calls"] == 1
+        assert "calls" not in totals
+
+    def test_opus_pricing(self):
+        tracker = TokenTracker()
+        result = tracker.record_call("claude-opus-4-20250514", 1_000_000, 1_000_000)
+        # Opus: $15/M input, $75/M output
+        assert result["cost_usd"] == 90.0