diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..0bba817 --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,126 @@ +name: tests +on: + push: + branches: [master] + pull_request: + +jobs: + python-tests: + name: Python tests (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ${{ github.repository == 'knostic/OpenAnt' && fromJSON('["ubuntu-latest", "macos-latest", "windows-latest"]') || fromJSON('["ubuntu-latest"]') }} + + steps: + - uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.11" + cache: "pip" + cache-dependency-path: libs/openant-core/requirements.txt + + - name: Set up Node.js + uses: actions/setup-node@v6 + with: + node-version: "22" + cache: "npm" + cache-dependency-path: libs/openant-core/parsers/javascript/package-lock.json + + - name: Install Python dependencies + working-directory: libs/openant-core + run: pip install -r requirements.txt && pip install pytest + + - name: Cache JS parser node_modules + id: cache-node-modules + uses: actions/cache@v4 + with: + path: libs/openant-core/parsers/javascript/node_modules + key: ${{ runner.os }}-jsparser-nodemodules-${{ hashFiles('libs/openant-core/parsers/javascript/package-lock.json') }} + + - name: Install JS parser dependencies + if: steps.cache-node-modules.outputs.cache-hit != 'true' + working-directory: libs/openant-core/parsers/javascript + run: npm ci + + - name: Run Python and parser tests + working-directory: libs/openant-core + run: python -m pytest tests/test_token_tracker.py tests/test_parser_adapter.py tests/test_python_parser.py tests/test_js_parser.py -v + + go-tests: + name: Go build + integration (${{ matrix.os }}) + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ${{ github.repository == 'knostic/OpenAnt' && fromJSON('["ubuntu-latest", "macos-latest", "windows-latest"]') || fromJSON('["ubuntu-latest"]') }} + + steps: + - uses: actions/checkout@v6 + + - name: Set up Go + uses: actions/setup-go@v6 + with: + go-version-file: apps/openant-cli/go.mod + cache-dependency-path: apps/openant-cli/go.sum + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.11" + cache: "pip" + cache-dependency-path: libs/openant-core/requirements.txt + + - name: Set up Node.js + uses: actions/setup-node@v6 + with: + node-version: "22" + cache: "npm" + cache-dependency-path: libs/openant-core/parsers/javascript/package-lock.json + + - name: Vet + working-directory: apps/openant-cli + run: go vet ./... + + - name: Build (Linux/macOS) + if: runner.os != 'Windows' + working-directory: apps/openant-cli + run: go build -o openant . + + - name: Build (Windows) + if: runner.os == 'Windows' + working-directory: apps/openant-cli + run: go build -o openant.exe . + + - name: Verify binary exists + working-directory: apps/openant-cli + shell: bash + run: | + if [ -f openant ] || [ -f openant.exe ]; then + echo "Binary built successfully" + else + echo "ERROR: Binary not found" && exit 1 + fi + + - name: Install Python dependencies + working-directory: libs/openant-core + run: pip install -r requirements.txt && pip install pytest + + - name: Cache JS parser node_modules + id: cache-node-modules + uses: actions/cache@v4 + with: + path: libs/openant-core/parsers/javascript/node_modules + key: ${{ runner.os }}-jsparser-nodemodules-${{ hashFiles('libs/openant-core/parsers/javascript/package-lock.json') }} + + - name: Install JS parser dependencies + if: steps.cache-node-modules.outputs.cache-hit != 'true' + working-directory: libs/openant-core/parsers/javascript + run: npm ci + + - name: Run Go CLI integration tests + working-directory: libs/openant-core + run: python -m pytest tests/test_go_cli.py -v --tb=short diff --git a/libs/openant-core/pytest.ini b/libs/openant-core/pytest.ini new file mode 100644 index 0000000..4584de7 --- /dev/null +++ b/libs/openant-core/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +pythonpath = . diff --git a/libs/openant-core/tests/__init__.py b/libs/openant-core/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/openant-core/tests/conftest.py b/libs/openant-core/tests/conftest.py new file mode 100644 index 0000000..affe238 --- /dev/null +++ b/libs/openant-core/tests/conftest.py @@ -0,0 +1,32 @@ +"""Shared fixtures for OpenAnt tests.""" +import sys +from pathlib import Path + +import pytest + +# Ensure the project root is on sys.path so imports like `from utilities...` work +PROJECT_ROOT = Path(__file__).parent.parent +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + +FIXTURES_DIR = Path(__file__).parent / "fixtures" +SAMPLE_PYTHON_REPO = FIXTURES_DIR / "sample_python_repo" +SAMPLE_JS_REPO = FIXTURES_DIR / "sample_js_repo" + + +@pytest.fixture +def sample_python_repo(): + """Path to the sample Python repository fixture.""" + return str(SAMPLE_PYTHON_REPO) + + +@pytest.fixture +def sample_js_repo(): + """Path to the sample JavaScript repository fixture.""" + return str(SAMPLE_JS_REPO) + + +@pytest.fixture +def tmp_output_dir(tmp_path): + """Temporary output directory for parser results.""" + return str(tmp_path / "output") diff --git a/libs/openant-core/tests/fixtures/sample_js_repo/src/app.js b/libs/openant-core/tests/fixtures/sample_js_repo/src/app.js new file mode 100644 index 0000000..2a47f9f --- /dev/null +++ b/libs/openant-core/tests/fixtures/sample_js_repo/src/app.js @@ -0,0 +1,24 @@ +const express = require("express"); +const { getUser, createUser } = require("./db"); + +const app = express(); +app.use(express.json()); + +app.get("/users/:id", async (req, res) => { + const user = await getUser(req.params.id); + if (!user) { + return res.status(404).json({ error: "Not found" }); + } + res.json(user); +}); + +app.post("/users", async (req, res) => { + const { name } = req.body; + if (!name) { + return res.status(400).json({ error: "Name required" }); + } + const user = await createUser(name); + res.status(201).json(user); +}); + +module.exports = app; diff --git a/libs/openant-core/tests/fixtures/sample_js_repo/src/db.js b/libs/openant-core/tests/fixtures/sample_js_repo/src/db.js new file mode 100644 index 0000000..530810c --- /dev/null +++ b/libs/openant-core/tests/fixtures/sample_js_repo/src/db.js @@ -0,0 +1,29 @@ +const sqlite3 = require("sqlite3"); + +function getConnection() { + return new sqlite3.Database("app.db"); +} + +async function getUser(id) { + const db = getConnection(); + return new Promise((resolve, reject) => { + db.get("SELECT * FROM users WHERE id = ?", [id], (err, row) => { + db.close(); + if (err) reject(err); + else resolve(row || null); + }); + }); +} + +async function createUser(name) { + const db = getConnection(); + return new Promise((resolve, reject) => { + db.run("INSERT INTO users (name) VALUES (?)", [name], function (err) { + db.close(); + if (err) reject(err); + else resolve({ id: this.lastID, name }); + }); + }); +} + +module.exports = { getUser, createUser, getConnection }; diff --git a/libs/openant-core/tests/fixtures/sample_js_repo/src/utils.js b/libs/openant-core/tests/fixtures/sample_js_repo/src/utils.js new file mode 100644 index 0000000..9b3b986 --- /dev/null +++ b/libs/openant-core/tests/fixtures/sample_js_repo/src/utils.js @@ -0,0 +1,12 @@ +function sanitizeInput(value) { + if (typeof value !== "string") { + return String(value); + } + return value.trim(); +} + +function validateEmail(email) { + return email.includes("@") && email.includes("."); +} + +module.exports = { sanitizeInput, validateEmail }; diff --git a/libs/openant-core/tests/fixtures/sample_python_repo/app.py b/libs/openant-core/tests/fixtures/sample_python_repo/app.py new file mode 100644 index 0000000..fac8da3 --- /dev/null +++ b/libs/openant-core/tests/fixtures/sample_python_repo/app.py @@ -0,0 +1,23 @@ +"""Sample Flask app for testing.""" +from flask import Flask, request, jsonify +from .db import get_user, create_user + +app = Flask(__name__) + + +@app.route("/users/") +def get_user_endpoint(user_id): + user = get_user(user_id) + if not user: + return jsonify({"error": "Not found"}), 404 + return jsonify(user) + + +@app.route("/users", methods=["POST"]) +def create_user_endpoint(): + data = request.get_json() + name = data.get("name") + if not name: + return jsonify({"error": "Name required"}), 400 + user = create_user(name) + return jsonify(user), 201 diff --git a/libs/openant-core/tests/fixtures/sample_python_repo/db.py b/libs/openant-core/tests/fixtures/sample_python_repo/db.py new file mode 100644 index 0000000..25ad871 --- /dev/null +++ b/libs/openant-core/tests/fixtures/sample_python_repo/db.py @@ -0,0 +1,25 @@ +"""Sample database module for testing.""" +import sqlite3 + + +def get_connection(): + return sqlite3.connect("app.db") + + +def get_user(user_id): + conn = get_connection() + cursor = conn.execute("SELECT * FROM users WHERE id = ?", (user_id,)) + row = cursor.fetchone() + conn.close() + if row: + return {"id": row[0], "name": row[1]} + return None + + +def create_user(name): + conn = get_connection() + cursor = conn.execute("INSERT INTO users (name) VALUES (?)", (name,)) + conn.commit() + user_id = cursor.lastrowid + conn.close() + return {"id": user_id, "name": name} diff --git a/libs/openant-core/tests/fixtures/sample_python_repo/utils.py b/libs/openant-core/tests/fixtures/sample_python_repo/utils.py new file mode 100644 index 0000000..4fded7f --- /dev/null +++ b/libs/openant-core/tests/fixtures/sample_python_repo/utils.py @@ -0,0 +1,11 @@ +"""Sample utility module for testing.""" + + +def sanitize_input(value): + if not isinstance(value, str): + return str(value) + return value.strip() + + +def validate_email(email): + return "@" in email and "." in email diff --git a/libs/openant-core/tests/test_go_cli.py b/libs/openant-core/tests/test_go_cli.py new file mode 100644 index 0000000..9484d4b --- /dev/null +++ b/libs/openant-core/tests/test_go_cli.py @@ -0,0 +1,158 @@ +"""Integration tests for the Go CLI wrapper (openant.exe). + +These tests invoke the real compiled binary and verify it correctly +delegates to the Python core. They test the wrapper, not the LLM pipeline — +so they use parse-only commands that don't require an API key. +""" +import json +import os +import subprocess +import shutil +import sys +from pathlib import Path + +import pytest + +CLI_DIR = Path(__file__).parent.parent.parent.parent / "apps" / "openant-cli" +BINARY_NAME = "openant.exe" if sys.platform == "win32" else "openant" +BINARY = CLI_DIR / BINARY_NAME + +pytestmark = pytest.mark.skipif( + not BINARY.exists(), + reason=f"Go binary not built at {BINARY}. Run: cd apps/openant-cli && go build -o {BINARY_NAME} .", +) + + +def run_cli(*args, env_override=None): + """Run the openant CLI binary and return the CompletedProcess.""" + env = os.environ.copy() + # Don't let the test hit any real API + env.pop("ANTHROPIC_API_KEY", None) + env.pop("OPENANT_LOCAL_CLAUDE", None) + if env_override: + env.update(env_override) + return subprocess.run( + [str(BINARY)] + list(args), + capture_output=True, + text=True, + timeout=30, + env=env, + ) + + +class TestVersion: + def test_version_runs(self): + result = run_cli("version") + assert result.returncode == 0 + assert "openant" in result.stderr.lower() or "openant" in result.stdout.lower() + + def test_version_subcommand(self): + result = run_cli("version") + assert result.returncode == 0 + + +class TestHelp: + def test_help(self): + result = run_cli("--help") + assert result.returncode == 0 + output = result.stdout + result.stderr + assert "scan" in output + assert "parse" in output + + def test_parse_help(self): + result = run_cli("parse", "--help") + assert result.returncode == 0 + output = result.stdout + result.stderr + assert "repository" in output.lower() + + def test_scan_help(self): + result = run_cli("scan", "--help") + assert result.returncode == 0 + output = result.stdout + result.stderr + assert "pipeline" in output.lower() + + +class TestParse: + def test_parse_python_repo(self, sample_python_repo, tmp_path): + output_dir = str(tmp_path / "output") + result = run_cli( + "parse", sample_python_repo, + "--output", output_dir, + "--language", "python", + "--json", + ) + assert result.returncode == 0 + + envelope = json.loads(result.stdout) + assert envelope["status"] == "success" + + def test_parse_produces_dataset(self, sample_python_repo, tmp_path): + output_dir = str(tmp_path / "output") + run_cli( + "parse", sample_python_repo, + "--output", output_dir, + "--language", "python", + ) + dataset = Path(output_dir) / "dataset.json" + assert dataset.exists() + data = json.loads(dataset.read_text()) + assert "units" in data + assert len(data["units"]) > 0 + + def test_parse_auto_detect(self, sample_python_repo, tmp_path): + output_dir = str(tmp_path / "output") + result = run_cli( + "parse", sample_python_repo, + "--output", output_dir, + "--json", + ) + assert result.returncode == 0 + envelope = json.loads(result.stdout) + assert envelope["status"] == "success" + + def test_parse_js_repo(self, sample_js_repo, tmp_path): + """JS parsing via Go CLI. May fail if the Go CLI finds system Python + instead of the venv (missing anthropic package). + """ + output_dir = str(tmp_path / "output") + result = run_cli( + "parse", sample_js_repo, + "--output", output_dir, + "--language", "javascript", + "--json", + ) + if result.returncode != 0: + if "No module named" in result.stderr: + pytest.skip("Go CLI using system Python without required packages") + if "UnicodeEncodeError" in result.stderr: + pytest.skip("Pre-existing Unicode bug in JS test_pipeline.py on Windows") + assert result.returncode == 0 + envelope = json.loads(result.stdout) + assert envelope["status"] == "success" + + def test_parse_missing_repo(self, tmp_path): + result = run_cli( + "parse", str(tmp_path / "nonexistent"), + "--output", str(tmp_path / "out"), + ) + assert result.returncode != 0 + + def test_parse_json_output_is_valid(self, sample_python_repo, tmp_path): + output_dir = str(tmp_path / "output") + result = run_cli( + "parse", sample_python_repo, + "--output", output_dir, + "--json", + ) + # Should always produce valid JSON on stdout when --json is used + envelope = json.loads(result.stdout) + assert "status" in envelope + + +class TestApiKeyHandling: + def test_scan_requires_api_key(self, sample_python_repo): + """Scan should fail without an API key.""" + result = run_cli("scan", sample_python_repo) + output = result.stderr + result.stdout + assert result.returncode != 0 + assert "api key" in output.lower() diff --git a/libs/openant-core/tests/test_js_parser.py b/libs/openant-core/tests/test_js_parser.py new file mode 100644 index 0000000..25bf951 --- /dev/null +++ b/libs/openant-core/tests/test_js_parser.py @@ -0,0 +1,226 @@ +"""Tests for the JavaScript parser pipeline. + +Requires Node.js and npm dependencies installed: + cd parsers/javascript && npm install +""" +import json +import subprocess +import shutil +import sys +from pathlib import Path + +import pytest + +PARSERS_JS_DIR = Path(__file__).parent.parent / "parsers" / "javascript" +NODE_MODULES = PARSERS_JS_DIR / "node_modules" + +# Skip all tests if node or npm deps aren't available +pytestmark = pytest.mark.skipif( + not shutil.which("node") or not NODE_MODULES.exists(), + reason="Node.js or JS parser npm dependencies not available", +) + + +def run_node(script_name, *args): + """Run a Node.js script from the JS parsers directory.""" + cmd = ["node", str(PARSERS_JS_DIR / script_name)] + list(args) + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) + return result + + +class TestRepositoryScanner: + def test_scans_js_repo(self, sample_js_repo, tmp_path): + output = tmp_path / "scan_results.json" + result = run_node("repository_scanner.js", sample_js_repo, "--output", str(output)) + assert result.returncode == 0 + assert output.exists() + + data = json.loads(output.read_text()) + assert data["statistics"]["totalFiles"] == 3 + + def test_finds_js_files(self, sample_js_repo, tmp_path): + output = tmp_path / "scan_results.json" + run_node("repository_scanner.js", sample_js_repo, "--output", str(output)) + data = json.loads(output.read_text()) + + paths = [f["path"] for f in data["files"]] + assert any("app.js" in p for p in paths) + assert any("db.js" in p for p in paths) + assert any("utils.js" in p for p in paths) + + def test_skip_tests_flag(self, tmp_path): + repo = tmp_path / "repo" + repo.mkdir() + (repo / "index.js").write_text("module.exports = {};") + test_dir = repo / "__tests__" + test_dir.mkdir() + (test_dir / "index.test.js").write_text("test('foo', () => {});") + + output = tmp_path / "scan.json" + run_node("repository_scanner.js", str(repo), "--output", str(output), "--skip-tests") + data = json.loads(output.read_text()) + + paths = [f["path"] for f in data["files"]] + assert any("index.js" in p for p in paths) + assert not any("test" in p.lower() for p in paths) + + +class TestTypeScriptAnalyzer: + # Known issue: ts-morph fails to resolve files with backslash paths on Windows + _windows_path_xfail = pytest.mark.xfail( + sys.platform == "win32", + reason="ts-morph path resolution issue with Windows backslash paths", + strict=False, + ) + + def test_analyzes_files(self, sample_js_repo, tmp_path): + # First scan to get file list + scan_output = tmp_path / "scan.json" + run_node("repository_scanner.js", sample_js_repo, "--output", str(scan_output)) + scan_data = json.loads(scan_output.read_text()) + + # Write file list + file_list = tmp_path / "files.txt" + file_list.write_text("\n".join(f["path"] for f in scan_data["files"])) + + # Run analyzer + analyzer_output = tmp_path / "analyzer_output.json" + result = run_node( + "typescript_analyzer.js", + sample_js_repo, + "--files-from", str(file_list), + "--output", str(analyzer_output), + ) + assert result.returncode == 0 + assert analyzer_output.exists() + + @_windows_path_xfail + def test_extracts_functions(self, sample_js_repo, tmp_path): + scan_output = tmp_path / "scan.json" + run_node("repository_scanner.js", sample_js_repo, "--output", str(scan_output)) + scan_data = json.loads(scan_output.read_text()) + + file_list = tmp_path / "files.txt" + file_list.write_text("\n".join(f["path"] for f in scan_data["files"])) + + analyzer_output = tmp_path / "analyzer_output.json" + run_node( + "typescript_analyzer.js", + sample_js_repo, + "--files-from", str(file_list), + "--output", str(analyzer_output), + ) + data = json.loads(analyzer_output.read_text()) + + assert "functions" in data + func_names = [f.get("name", "") for f in data["functions"].values()] + assert "getUser" in func_names + assert "createUser" in func_names + assert "getConnection" in func_names + + def test_builds_call_graph(self, sample_js_repo, tmp_path): + scan_output = tmp_path / "scan.json" + run_node("repository_scanner.js", sample_js_repo, "--output", str(scan_output)) + scan_data = json.loads(scan_output.read_text()) + + file_list = tmp_path / "files.txt" + file_list.write_text("\n".join(f["path"] for f in scan_data["files"])) + + analyzer_output = tmp_path / "analyzer_output.json" + run_node( + "typescript_analyzer.js", + sample_js_repo, + "--files-from", str(file_list), + "--output", str(analyzer_output), + ) + data = json.loads(analyzer_output.read_text()) + + assert "callGraph" in data + # Call graph keys should match extracted functions + assert len(data["callGraph"]) == len(data["functions"]) + + +class TestUnitGenerator: + @pytest.fixture + def analyzer_output(self, sample_js_repo, tmp_path): + scan_output = tmp_path / "scan.json" + run_node("repository_scanner.js", sample_js_repo, "--output", str(scan_output)) + scan_data = json.loads(scan_output.read_text()) + + file_list = tmp_path / "files.txt" + file_list.write_text("\n".join(f["path"] for f in scan_data["files"])) + + output = tmp_path / "analyzer_output.json" + run_node( + "typescript_analyzer.js", + sample_js_repo, + "--files-from", str(file_list), + "--output", str(output), + ) + return str(output) + + def test_generates_dataset(self, analyzer_output, tmp_path): + dataset_output = tmp_path / "dataset.json" + result = run_node( + "unit_generator.js", + analyzer_output, + "--output", str(dataset_output), + ) + assert result.returncode == 0 + assert Path(dataset_output).exists() + + data = json.loads(Path(dataset_output).read_text()) + assert "units" in data + assert len(data["units"]) > 0 + + def test_units_have_required_fields(self, analyzer_output, tmp_path): + dataset_output = tmp_path / "dataset.json" + run_node( + "unit_generator.js", + analyzer_output, + "--output", str(dataset_output), + ) + data = json.loads(Path(dataset_output).read_text()) + + for unit in data["units"]: + assert "id" in unit + assert "code" in unit + + +class TestFullPipeline: + """End-to-end test through parser_adapter.""" + + # Known issue: test_pipeline.py uses Unicode checkmarks that fail on Windows cp1252 + _windows_unicode_xfail = pytest.mark.xfail( + sys.platform == "win32", + reason="JS test_pipeline.py Unicode chars fail on Windows cp1252 encoding", + strict=False, + ) + + @_windows_unicode_xfail + def test_parse_js_repo(self, sample_js_repo, tmp_output_dir): + from core.parser_adapter import parse_repository + + result = parse_repository( + repo_path=sample_js_repo, + output_dir=tmp_output_dir, + language="javascript", + processing_level="all", + ) + assert result.language == "javascript" + assert result.units_count > 0 + assert Path(result.dataset_path).exists() + assert result.analyzer_output_path is not None + assert Path(result.analyzer_output_path).exists() + + @_windows_unicode_xfail + def test_auto_detects_javascript(self, sample_js_repo, tmp_output_dir): + from core.parser_adapter import parse_repository + + result = parse_repository( + repo_path=sample_js_repo, + output_dir=tmp_output_dir, + language="auto", + processing_level="all", + ) + assert result.language == "javascript" diff --git a/libs/openant-core/tests/test_parser_adapter.py b/libs/openant-core/tests/test_parser_adapter.py new file mode 100644 index 0000000..af209cb --- /dev/null +++ b/libs/openant-core/tests/test_parser_adapter.py @@ -0,0 +1,106 @@ +"""Tests for core/parser_adapter.py — language detection and Python parsing.""" +import json +import os +from pathlib import Path + +import pytest + +from core.parser_adapter import detect_language, parse_repository + + +class TestDetectLanguage: + def test_python_repo(self, sample_python_repo): + assert detect_language(sample_python_repo) == "python" + + def test_empty_dir_raises(self, tmp_path): + with pytest.raises(ValueError, match="No supported source files"): + detect_language(str(tmp_path)) + + def test_javascript_repo(self, tmp_path): + (tmp_path / "index.js").write_text("console.log('hi');") + (tmp_path / "utils.js").write_text("export function foo() {}") + assert detect_language(str(tmp_path)) == "javascript" + + def test_mixed_repo_picks_majority(self, tmp_path): + # 3 Python files, 1 JS file — should pick Python + for name in ["a.py", "b.py", "c.py"]: + (tmp_path / name).write_text("pass") + (tmp_path / "d.js").write_text("//js") + assert detect_language(str(tmp_path)) == "python" + + def test_ignores_node_modules(self, tmp_path): + (tmp_path / "app.py").write_text("pass") + nm = tmp_path / "node_modules" / "pkg" + nm.mkdir(parents=True) + (nm / "index.js").write_text("//js") + (nm / "util.js").write_text("//js") + (nm / "helper.js").write_text("//js") + assert detect_language(str(tmp_path)) == "python" + + def test_ignores_venv(self, tmp_path): + (tmp_path / "app.go").write_text("package main") + venv = tmp_path / ".venv" / "lib" + venv.mkdir(parents=True) + for i in range(10): + (venv / f"mod{i}.py").write_text("pass") + assert detect_language(str(tmp_path)) == "go" + + +class TestParseRepositoryPython: + def test_parses_sample_repo(self, sample_python_repo, tmp_output_dir): + result = parse_repository( + repo_path=sample_python_repo, + output_dir=tmp_output_dir, + language="python", + processing_level="all", + ) + assert result.language == "python" + assert result.units_count > 0 + assert Path(result.dataset_path).exists() + + def test_dataset_json_valid(self, sample_python_repo, tmp_output_dir): + result = parse_repository( + repo_path=sample_python_repo, + output_dir=tmp_output_dir, + language="python", + processing_level="all", + ) + with open(result.dataset_path) as f: + dataset = json.load(f) + assert "units" in dataset + assert len(dataset["units"]) > 0 + + def test_units_have_required_fields(self, sample_python_repo, tmp_output_dir): + result = parse_repository( + repo_path=sample_python_repo, + output_dir=tmp_output_dir, + language="python", + processing_level="all", + ) + with open(result.dataset_path) as f: + dataset = json.load(f) + for unit in dataset["units"]: + assert "id" in unit + assert "code" in unit + + def test_auto_detect_language(self, sample_python_repo, tmp_output_dir): + result = parse_repository( + repo_path=sample_python_repo, + output_dir=tmp_output_dir, + language="auto", + processing_level="all", + ) + assert result.language == "python" + + def test_analyzer_output_generated(self, sample_python_repo, tmp_output_dir): + result = parse_repository( + repo_path=sample_python_repo, + output_dir=tmp_output_dir, + language="python", + processing_level="all", + ) + assert result.analyzer_output_path is not None + assert Path(result.analyzer_output_path).exists() + with open(result.analyzer_output_path) as f: + data = json.load(f) + assert "functions" in data diff --git a/libs/openant-core/tests/test_python_parser.py b/libs/openant-core/tests/test_python_parser.py new file mode 100644 index 0000000..aaf5b55 --- /dev/null +++ b/libs/openant-core/tests/test_python_parser.py @@ -0,0 +1,220 @@ +"""Tests for the Python parser phases (scanner, extractor, call graph, unit generator).""" +import sys +from pathlib import Path + +import pytest + +# The parser modules use relative imports, so we need to add the parsers/python dir +PARSERS_DIR = Path(__file__).parent.parent / "parsers" / "python" +if str(PARSERS_DIR) not in sys.path: + sys.path.insert(0, str(PARSERS_DIR)) + +from repository_scanner import RepositoryScanner +from function_extractor import FunctionExtractor +from call_graph_builder import CallGraphBuilder +from unit_generator import UnitGenerator + + +class TestRepositoryScanner: + def test_finds_python_files(self, sample_python_repo): + scanner = RepositoryScanner(sample_python_repo) + result = scanner.scan() + assert result["statistics"]["total_files"] == 3 + paths = [f["path"] for f in result["files"]] + assert any("app.py" in p for p in paths) + assert any("db.py" in p for p in paths) + assert any("utils.py" in p for p in paths) + + def test_skip_tests_option(self, tmp_path): + (tmp_path / "main.py").write_text("pass") + (tmp_path / "test_main.py").write_text("pass") + (tmp_path / "tests").mkdir() + (tmp_path / "tests" / "test_foo.py").write_text("pass") + + scanner = RepositoryScanner(str(tmp_path), {"skip_tests": True}) + result = scanner.scan() + paths = [f["path"] for f in result["files"]] + assert any("main.py" in p for p in paths) + assert not any("test_main.py" in p for p in paths) + assert not any("test_foo.py" in p for p in paths) + + def test_records_file_sizes(self, sample_python_repo): + scanner = RepositoryScanner(sample_python_repo) + result = scanner.scan() + for f in result["files"]: + assert "size" in f + assert f["size"] > 0 + + def test_empty_repo(self, tmp_path): + scanner = RepositoryScanner(str(tmp_path)) + result = scanner.scan() + assert result["statistics"]["total_files"] == 0 + assert result["files"] == [] + + +class TestFunctionExtractor: + def test_extracts_functions(self, sample_python_repo): + scanner = RepositoryScanner(sample_python_repo) + scan_result = scanner.scan() + extractor = FunctionExtractor(sample_python_repo) + result = extractor.extract_from_scan(scan_result) + + assert "functions" in result + assert len(result["functions"]) > 0 + + def test_finds_known_functions(self, sample_python_repo): + scanner = RepositoryScanner(sample_python_repo) + scan_result = scanner.scan() + extractor = FunctionExtractor(sample_python_repo) + result = extractor.extract_from_scan(scan_result) + + func_names = [f["name"] for f in result["functions"].values()] + assert "get_user" in func_names + assert "create_user" in func_names + assert "get_connection" in func_names + assert "sanitize_input" in func_names + assert "validate_email" in func_names + + def test_extracts_route_handlers(self, sample_python_repo): + scanner = RepositoryScanner(sample_python_repo) + scan_result = scanner.scan() + extractor = FunctionExtractor(sample_python_repo) + result = extractor.extract_from_scan(scan_result) + + func_names = [f["name"] for f in result["functions"].values()] + assert "get_user_endpoint" in func_names + assert "create_user_endpoint" in func_names + + def test_captures_decorators(self, sample_python_repo): + scanner = RepositoryScanner(sample_python_repo) + scan_result = scanner.scan() + extractor = FunctionExtractor(sample_python_repo) + result = extractor.extract_from_scan(scan_result) + + # Find the get_user_endpoint function + endpoint_funcs = [ + f for f in result["functions"].values() + if f["name"] == "get_user_endpoint" + ] + assert len(endpoint_funcs) == 1 + assert len(endpoint_funcs[0]["decorators"]) > 0 + + def test_statistics(self, sample_python_repo): + scanner = RepositoryScanner(sample_python_repo) + scan_result = scanner.scan() + extractor = FunctionExtractor(sample_python_repo) + result = extractor.extract_from_scan(scan_result) + + stats = result["statistics"] + assert stats["total_functions"] > 0 + assert stats["files_processed"] == 3 + + def test_function_has_code(self, sample_python_repo): + scanner = RepositoryScanner(sample_python_repo) + scan_result = scanner.scan() + extractor = FunctionExtractor(sample_python_repo) + result = extractor.extract_from_scan(scan_result) + + for func in result["functions"].values(): + assert "code" in func + assert len(func["code"]) > 0 + + +class TestCallGraphBuilder: + @pytest.fixture + def extractor_result(self, sample_python_repo): + scanner = RepositoryScanner(sample_python_repo) + scan_result = scanner.scan() + extractor = FunctionExtractor(sample_python_repo) + return extractor.extract_from_scan(scan_result) + + def test_builds_call_graph(self, extractor_result): + builder = CallGraphBuilder(extractor_result) + builder.build_call_graph() + result = builder.export() + + assert "call_graph" in result + assert "reverse_call_graph" in result + + def test_detects_calls(self, extractor_result): + builder = CallGraphBuilder(extractor_result) + builder.build_call_graph() + result = builder.export() + + # get_user_endpoint calls get_user + endpoint_key = [k for k in result["call_graph"] if "get_user_endpoint" in k] + assert len(endpoint_key) > 0 + callees = result["call_graph"][endpoint_key[0]] + callee_names = [c.split(":")[-1] for c in callees] + assert "get_user" in callee_names + + def test_reverse_graph(self, extractor_result): + builder = CallGraphBuilder(extractor_result) + builder.build_call_graph() + result = builder.export() + + # get_user should be called by get_user_endpoint + get_user_key = [k for k in result["reverse_call_graph"] if k.endswith(":get_user")] + assert len(get_user_key) > 0 + + def test_statistics(self, extractor_result): + builder = CallGraphBuilder(extractor_result) + builder.build_call_graph() + result = builder.export() + + stats = result["statistics"] + assert stats["total_edges"] > 0 + assert "avg_out_degree" in stats + + +class TestUnitGenerator: + @pytest.fixture + def call_graph_result(self, sample_python_repo): + scanner = RepositoryScanner(sample_python_repo) + scan_result = scanner.scan() + extractor = FunctionExtractor(sample_python_repo) + extractor_result = extractor.extract_from_scan(scan_result) + builder = CallGraphBuilder(extractor_result) + builder.build_call_graph() + return builder.export() + + def test_generates_units(self, call_graph_result): + generator = UnitGenerator(call_graph_result) + dataset = generator.generate_units() + + assert "units" in dataset + assert len(dataset["units"]) > 0 + + def test_units_have_id_and_code(self, call_graph_result): + generator = UnitGenerator(call_graph_result) + dataset = generator.generate_units() + + for unit in dataset["units"]: + assert "id" in unit + assert "code" in unit + assert "primary_code" in unit["code"] + + def test_units_have_metadata(self, call_graph_result): + generator = UnitGenerator(call_graph_result) + dataset = generator.generate_units() + + for unit in dataset["units"]: + assert "metadata" in unit + assert "unit_type" in unit + + def test_enhanced_code_includes_dependencies(self, call_graph_result): + generator = UnitGenerator(call_graph_result) + dataset = generator.generate_units() + + # get_user_endpoint should have get_user's code included + endpoint_units = [u for u in dataset["units"] if "get_user_endpoint" in u["id"]] + assert len(endpoint_units) == 1 + code = endpoint_units[0]["code"]["primary_code"] + assert "get_user" in code + + def test_statistics(self, call_graph_result): + generator = UnitGenerator(call_graph_result) + dataset = generator.generate_units() + + assert "statistics" in dataset + assert dataset["statistics"]["total_units"] == len(dataset["units"]) diff --git a/libs/openant-core/tests/test_token_tracker.py b/libs/openant-core/tests/test_token_tracker.py new file mode 100644 index 0000000..08fdc9c --- /dev/null +++ b/libs/openant-core/tests/test_token_tracker.py @@ -0,0 +1,73 @@ +"""Tests for TokenTracker.""" +from utilities.llm_client import TokenTracker, MODEL_PRICING + + +class TestTokenTracker: + def test_initial_state(self): + tracker = TokenTracker() + assert tracker.total_input_tokens == 0 + assert tracker.total_output_tokens == 0 + assert tracker.total_tokens == 0 + assert tracker.total_cost_usd == 0.0 + assert tracker.calls == [] + + def test_record_call_known_model(self): + tracker = TokenTracker() + result = tracker.record_call("claude-sonnet-4-20250514", 1000, 500) + + assert result["model"] == "claude-sonnet-4-20250514" + assert result["input_tokens"] == 1000 + assert result["output_tokens"] == 500 + # Sonnet: $3/M input, $15/M output + expected_cost = (1000 / 1_000_000) * 3.0 + (500 / 1_000_000) * 15.0 + assert result["cost_usd"] == round(expected_cost, 6) + + def test_record_call_unknown_model_uses_default(self): + tracker = TokenTracker() + result = tracker.record_call("some-future-model", 100, 50) + default_pricing = MODEL_PRICING["default"] + expected_cost = (100 / 1_000_000) * default_pricing["input"] + (50 / 1_000_000) * default_pricing["output"] + assert result["cost_usd"] == round(expected_cost, 6) + + def test_cumulative_tracking(self): + tracker = TokenTracker() + tracker.record_call("claude-sonnet-4-20250514", 1000, 500) + tracker.record_call("claude-sonnet-4-20250514", 2000, 1000) + + assert tracker.total_input_tokens == 3000 + assert tracker.total_output_tokens == 1500 + assert tracker.total_tokens == 4500 + assert len(tracker.calls) == 2 + + def test_reset(self): + tracker = TokenTracker() + tracker.record_call("claude-sonnet-4-20250514", 1000, 500) + tracker.reset() + + assert tracker.total_input_tokens == 0 + assert tracker.total_output_tokens == 0 + assert tracker.total_cost_usd == 0.0 + assert tracker.calls == [] + + def test_get_summary_includes_calls(self): + tracker = TokenTracker() + tracker.record_call("claude-sonnet-4-20250514", 100, 50) + summary = tracker.get_summary() + + assert summary["total_calls"] == 1 + assert "calls" in summary + assert len(summary["calls"]) == 1 + + def test_get_totals_excludes_calls(self): + tracker = TokenTracker() + tracker.record_call("claude-sonnet-4-20250514", 100, 50) + totals = tracker.get_totals() + + assert totals["total_calls"] == 1 + assert "calls" not in totals + + def test_opus_pricing(self): + tracker = TokenTracker() + result = tracker.record_call("claude-opus-4-20250514", 1_000_000, 1_000_000) + # Opus: $15/M input, $75/M output + assert result["cost_usd"] == 90.0