From a46da0fdde390fd234decbe3a45f0e9e30958599 Mon Sep 17 00:00:00 2001 From: Rudra Date: Tue, 26 May 2026 14:02:40 +0530 Subject: [PATCH 1/2] Expand ANCP language adapter coverage --- .gitignore | 1 + CHANGELOG.md | 2 +- MANIFEST.in | 2 + README.md | 7 +- RELEASE_NOTES.md | 3 +- docs/final-readiness-report.md | 12 +- docs/github-release-plan.md | 2 +- docs/invisible-compiler-layer.md | 18 +- docs/repository-structure.md | 2 +- docs/toolchains.md | 115 ++++ docs/verification.md | 3 + docs/vision-doctrine.md | 3 +- examples/buggy/clojure/broken.clj | 4 + examples/buggy/dockerfile/Dockerfile | 4 + examples/buggy/elixir/broken.ex | 5 + examples/buggy/erlang/broken.erl | 5 + examples/buggy/haskell/Main.hs | 7 + examples/buggy/json/broken.json | 5 + examples/buggy/lua/broken.lua | 4 + examples/buggy/nix/broken.nix | 7 + examples/buggy/ocaml/main.ml | 4 + examples/buggy/perl/broken.pl | 7 + examples/buggy/powershell/broken.ps1 | 5 + examples/buggy/r/broken.R | 7 + examples/buggy/shell/broken.sh | 5 + examples/buggy/sql/broken.sql | 4 + examples/buggy/terraform/main.tf | 8 + examples/buggy/toml/broken.toml | 6 + examples/buggy/yaml/broken.yaml | 5 + pyproject.toml | 20 +- requirements-dev.txt | 1 + research/languages/expanded-ecosystems.md | 65 +++ research/source-docs/sources.json | 162 ++++++ research/tooling-matrix.md | 14 + spec/ancp-1.0.md | 4 +- src/ancp/adapters/base.py | 647 +++++++++++++++++++++- src/ancp/adapters/registry.py | 49 +- src/ancp/cli.py | 28 +- src/ancp/native.py | 3 +- src/ancp/proxy.py | 41 +- src/ancp/shim.py | 17 + tests/test_cli_documents.py | 19 + tests/test_schema_examples.py | 3 +- tools/audit_contracts.py | 19 + tools/check_toolchains.py | 111 ++++ tools/run_bug_corpus.py | 17 + tools/verify_repo.py | 7 +- 47 files changed, 1463 insertions(+), 26 deletions(-) create mode 100644 docs/toolchains.md create mode 100644 examples/buggy/clojure/broken.clj create mode 100644 examples/buggy/dockerfile/Dockerfile create mode 100644 examples/buggy/elixir/broken.ex create mode 100644 examples/buggy/erlang/broken.erl create mode 100644 examples/buggy/haskell/Main.hs create mode 100644 examples/buggy/json/broken.json create mode 100644 examples/buggy/lua/broken.lua create mode 100644 examples/buggy/nix/broken.nix create mode 100644 examples/buggy/ocaml/main.ml create mode 100644 examples/buggy/perl/broken.pl create mode 100644 examples/buggy/powershell/broken.ps1 create mode 100644 examples/buggy/r/broken.R create mode 100644 examples/buggy/shell/broken.sh create mode 100644 examples/buggy/sql/broken.sql create mode 100644 examples/buggy/terraform/main.tf create mode 100644 examples/buggy/toml/broken.toml create mode 100644 examples/buggy/yaml/broken.yaml create mode 100644 research/languages/expanded-ecosystems.md create mode 100644 tools/check_toolchains.py diff --git a/.gitignore b/.gitignore index 2bf39a2..76ed6be 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # Generated verification and downloaded source snapshots verification-report.json /.ancp/ +**/.ancp/ /research/source-docs/snapshots/ /research/source-docs/fetch-report.json /research/source-docs/index.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 4addc91..36f4daa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ Initial public release. - Python reference package with `ancp` CLI. - Compiler-facing proxy entry points such as `ancp-cargo`, `ancp-tsc`, `ancp-python`, `ancp-kotlinc`, and `ancp-julia`. - Native-name shim installer via `ancp install-shims`. -- Native-tool adapters for Python, TypeScript, JavaScript, Rust, Go, C/C++, Java, Kotlin, C#/.NET, Swift, Zig, Ruby, PHP, Dart, Scala, and Julia. +- Native-tool adapters for Python, TypeScript, JavaScript, Rust, Go, C/C++, Java, Kotlin, C#/.NET, Swift, Zig, Ruby, PHP, Dart, Scala, Julia, Shell, PowerShell, Lua, Perl, R, Haskell, OCaml, Erlang, Elixir, Clojure, Nix, Terraform, Dockerfile, SQL, JSON, TOML, and YAML. - Markdown renderer for compact agent-facing diagnostic summaries. - Multilingual intentionally broken bug corpus. - Repository validation, contract audit, source fetch, and bug corpus scripts. diff --git a/MANIFEST.in b/MANIFEST.in index 713b0c3..b456009 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -10,8 +10,10 @@ recursive-include schemas *.json recursive-include taxonomies *.json recursive-include examples *.json recursive-include examples/buggy * +recursive-exclude examples/buggy .ancp/* native-output*.txt recursive-include docs *.md recursive-include spec *.md +recursive-include tools *.py include research/README.md include research/tooling-matrix.md include research/source-docs/sources.json diff --git a/README.md b/README.md index 3482050..d361c98 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The core idea is simple: > Existing tools may keep their native internals. ANCP defines the stable machine contract that adapters expose to agents. -ANCP turns tool output into structured diagnostics, repair hints, repair plans, verification steps, code graph facts, effect/capability metadata, and version-matched agent guidance. It is designed to sit above current languages such as TypeScript, Python, Rust, Go, Java, C, C++, C#, Swift, Kotlin, Zig, Ruby, PHP, and others. +ANCP turns tool output into structured diagnostics, repair hints, repair plans, verification steps, code graph facts, effect/capability metadata, and version-matched agent guidance. It is designed to sit above current languages such as TypeScript, Python, Rust, Go, Java, C, C++, C#, Swift, Kotlin, Julia, Zig, Ruby, PHP, Lua, Perl, R, Haskell, OCaml, Erlang, Elixir, Clojure, Nix, Terraform, Dockerfile, SQL, JSON, TOML, YAML, and others. ## What This Repository Contains @@ -30,6 +30,7 @@ This repository is the public ANCP 1.0 contract and Python reference implementat | Native-tool adapters | Implemented | [src/ancp/adapters](src/ancp/adapters) | | Bug corpus | Implemented | [examples/buggy](examples/buggy) | | Repository verifier | Implemented | [tools/verify_repo.py](tools/verify_repo.py) | +| Toolchain availability checker | Implemented | [tools/check_toolchains.py](tools/check_toolchains.py) | ## Why ANCP Exists @@ -165,6 +166,9 @@ julia app.jl kotlinc Main.kt gcc -fsyntax-only main.c clang++ -fsyntax-only main.cpp +bash -n script.sh +pwsh -NoProfile -File script.ps1 +terraform validate ``` The native compiler output and exit code are preserved. ANCP writes a structured sidecar: @@ -247,6 +251,7 @@ python tools/audit_contracts.py python tools/verify_repo.py pytest python tools/run_bug_corpus.py +python tools/check_toolchains.py python -m build python -m twine check dist/* ``` diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index aa2bb31..4464e7f 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -9,7 +9,8 @@ ANCP 1.0.0 ships the Agent Native Compiler Protocol contract and Python referenc - Diagnostic, repair, and effect taxonomies. - Reference CLI: `ancp`. - Compiler-name shims for invisible compiler-layer usage. -- Native-tool adapters for Python, TypeScript, JavaScript, Rust, Go, C/C++, Java, Kotlin, C#/.NET, Swift, Zig, Ruby, PHP, Dart, Scala, and Julia. +- Native-tool adapters for Python, TypeScript, JavaScript, Rust, Go, C/C++, Java, Kotlin, C#/.NET, Swift, Zig, Ruby, PHP, Dart, Scala, Julia, Shell, PowerShell, Lua, Perl, R, Haskell, OCaml, Erlang, Elixir, Clojure, Nix, Terraform, Dockerfile, and SQL. +- Built-in parser adapters for JSON, TOML, and YAML. - Compact Markdown rendering for agent context. - Multilingual broken-code corpus for adapter smoke testing. - CI, packaging, verification scripts, and OSS release checklist. diff --git a/docs/final-readiness-report.md b/docs/final-readiness-report.md index 7388e81..d8a7b34 100644 --- a/docs/final-readiness-report.md +++ b/docs/final-readiness-report.md @@ -13,9 +13,9 @@ ANCP now contains: - diagnostic, repair, and effect taxonomies, - a Python package named `ancp`, - the `ancp` CLI, -- prefixed compiler proxies such as `ancp-cargo`, `ancp-tsc`, `ancp-python`, `ancp-kotlinc`, and `ancp-julia`, +- prefixed compiler proxies such as `ancp-cargo`, `ancp-tsc`, `ancp-python`, `ancp-kotlinc`, `ancp-julia`, `ancp-pwsh`, `ancp-terraform`, and `ancp-sqlfluff`, - native-name compiler shims installed with `ancp install-shims`, -- adapters for Python, TypeScript, JavaScript, Rust, Go, C/C++, Java, Kotlin, C#/.NET, Swift, Zig, Ruby, PHP, Dart, Scala, and Julia, +- adapters for Python, TypeScript, JavaScript, Rust, Go, C/C++, Java, Kotlin, C#/.NET, Swift, Zig, Ruby, PHP, Dart, Scala, Julia, Shell, PowerShell, Lua, Perl, R, Haskell, OCaml, Erlang, Elixir, Clojure, Nix, Terraform, Dockerfile, SQL, JSON, TOML, and YAML, - compact Markdown rendering for agents, - a multilingual intentionally broken bug corpus, - research notes and source document fetch tooling, @@ -29,7 +29,7 @@ The local release was checked with these passes. | --- | --- | --- | | Syntax | `python -m compileall -q src tests tools` | passed | | Unit tests | `pytest` | 14 passed | -| Source corpus | `python tools/fetch_sources.py` | fetched 59/59 source documents | +| Source corpus | `python tools/fetch_sources.py` | fetched 77/77 source documents | | Contract audit | `python tools/audit_contracts.py` | passed | | Repo validator | `python tools/verify_repo.py` | passed, wrote `verification-report.json` | | CLI JSON | `ancp manifest \| python -m json.tool` | passed | @@ -37,15 +37,15 @@ The local release was checked with these passes. | CLI JSON | `ancp schema \| python -m json.tool` | passed | | Shim install | `ancp install-shims --dir .ancp/bin --force` | passed | | Shim smoke | normal `python -m py_compile bad.py` through shim | exit 1, ANCP sidecar status `failed`, 1 diagnostic | -| Bug corpus | `python tools/run_bug_corpus.py` | emitted ANCP JSON/Markdown for all 16 cases | +| Bug corpus | `python tools/run_bug_corpus.py` | emitted ANCP JSON/Markdown for all 33 cases | | Bug corpus validation | `ancp validate .ancp/bug-corpus` | passed | | Package build | `python -m build` | built wheel and sdist | | Package metadata | `python -m twine check dist/*` | both artifacts passed | Local tool availability during verification: -- Installed and producing diagnostics: Python, Go, Julia. -- Not installed or not available in this environment: TypeScript compiler, Rust/Cargo, GCC/Clang, Java, Kotlin, .NET, Swift, Zig, Ruby, PHP, Dart, Scala. +- Installed and producing diagnostics: Python, Go, Julia, JSON, TOML, YAML, PowerShell. +- Not installed or not available in this environment: TypeScript compiler, Rust/Cargo, GCC/Clang, Java, Kotlin, .NET, Swift, Zig, Ruby, PHP, Dart, Scala, ShellCheck/Bash, Lua, Perl, R, GHC, OCaml, Erlang, Elixir, clj-kondo, Nix, Terraform, hadolint, SQLFluff. - Missing native tools correctly report `tool_failed`; they do not pretend to pass. ## Scope Boundary diff --git a/docs/github-release-plan.md b/docs/github-release-plan.md index 31b86d5..1486285 100644 --- a/docs/github-release-plan.md +++ b/docs/github-release-plan.md @@ -127,7 +127,7 @@ Highlights: - diagnostic, repair, and effect taxonomies - reference CLI - compiler-name shim installation -- native-tool adapters for Python, TypeScript, JavaScript, Rust, Go, C/C++, Java, Kotlin, .NET, Swift, Zig, Ruby, PHP, Dart, Scala, and Julia +- native-tool adapters for Python, TypeScript, JavaScript, Rust, Go, C/C++, Java, Kotlin, .NET, Swift, Zig, Ruby, PHP, Dart, Scala, Julia, Shell, PowerShell, Lua, Perl, R, Haskell, OCaml, Erlang, Elixir, Clojure, Nix, Terraform, Dockerfile, SQL, JSON, TOML, and YAML - validation and conformance tooling - GitHub Actions CI - research-backed language/tooling matrix diff --git a/docs/invisible-compiler-layer.md b/docs/invisible-compiler-layer.md index d632633..ab67806 100644 --- a/docs/invisible-compiler-layer.md +++ b/docs/invisible-compiler-layer.md @@ -95,6 +95,23 @@ The reference implementation can create shims for: - `scala-cli` - `scalac` - `julia` +- `shellcheck` +- `bash` +- `pwsh` +- `powershell` +- `luac` +- `lua` +- `perl` +- `Rscript` +- `ghc` +- `ocamlc` +- `erlc` +- `elixirc` +- `clj-kondo` +- `nix-instantiate` +- `terraform` +- `hadolint` +- `sqlfluff` These shims do not replace compilers. They find the real compiler later in PATH, execute it, preserve its output and exit code, then emit ANCP JSON. @@ -125,4 +142,3 @@ A shim is production-acceptable when it obeys these rules: - it validates emitted ANCP documents. That gives agents a stable protocol without forcing every language compiler project to accept upstream patches first. - diff --git a/docs/repository-structure.md b/docs/repository-structure.md index 7f0ba25..b117406 100644 --- a/docs/repository-structure.md +++ b/docs/repository-structure.md @@ -89,7 +89,7 @@ - `ancp`: the reference CLI for manifest, capability, check, explain, repair-plan, verify, graph, skills, validate, render, schema, and shim installation. - `ancp.proxy`: prefixed compiler proxies such as `ancp-cargo`, `ancp-tsc`, `ancp-python`, `ancp-kotlinc`, and `ancp-julia`. - `ancp.shim`: native-name wrappers installed by `ancp install-shims`, so normal commands such as `cargo check` and `python -m py_compile app.py` can emit ANCP sidecars. -- `ancp.adapters`: native-tool adapters for Python, TypeScript, JavaScript, Rust, Go, C/C++, Java, Kotlin, C#/.NET, Swift, Zig, Ruby, PHP, Dart, Scala, and Julia. +- `ancp.adapters`: native-tool adapters for Python, TypeScript, JavaScript, Rust, Go, C/C++, Java, Kotlin, C#/.NET, Swift, Zig, Ruby, PHP, Dart, Scala, Julia, Shell, PowerShell, Lua, Perl, R, Haskell, OCaml, Erlang, Elixir, Clojure, Nix, Terraform, Dockerfile, SQL, JSON, TOML, and YAML. - `ancp.resources`: packaged copies of the ANCP schema and taxonomies for installed environments. ## Contract Artifacts diff --git a/docs/toolchains.md b/docs/toolchains.md new file mode 100644 index 0000000..451c8e8 --- /dev/null +++ b/docs/toolchains.md @@ -0,0 +1,115 @@ +# Toolchain Coverage + +ANCP adapters are split into two layers: + +1. built-in protocol and parser support that ships with the Python package, +2. native compiler/checker integrations that require the relevant ecosystem tool to be installed. + +The reference package is useful even when some compilers are missing. Missing tools are reported as ANCP `tool_failed` results instead of invalid JSON or fake success. + +## Check Local Availability + +Run: + +```bash +python tools/check_toolchains.py +``` + +For machine-readable output: + +```bash +python tools/check_toolchains.py --json +``` + +By default the checker returns exit code `0` so it can be used as a report in local verification. Add `--strict` when a CI job expects every tool in that job to be installed: + +```bash +python tools/check_toolchains.py --language rust --language go --strict +``` + +## Built In + +These adapters do not require an external compiler for syntax validation: + +- JSON +- TOML +- YAML + +Python and PowerShell usually exist on Windows machines already, but they are still external runtime/toolchain integrations. + +## Recommended Native Tools + +| Language family | Tool | +| --- | --- | +| Python | `python` | +| TypeScript | `tsc` | +| JavaScript | `eslint` | +| Rust | `cargo`, `rustc` | +| Go | `go` | +| C | `gcc` or `clang` | +| C++ | `g++` or `clang++` | +| Java | `javac` | +| Kotlin | `kotlinc` | +| C#/.NET | `dotnet` | +| Swift | `swift` | +| Zig | `zig` | +| Ruby | `ruby` | +| PHP | `php` | +| Dart | `dart` | +| Scala | `scala-cli` or `scalac` | +| Julia | `julia` | +| Shell | `shellcheck` preferred, `bash` fallback | +| PowerShell | `pwsh` or Windows PowerShell | +| Lua | `luac` preferred, `lua` fallback | +| Perl | `perl` | +| R | `Rscript` | +| Haskell | `ghc` | +| OCaml | `ocamlc` | +| Erlang | `erlc` | +| Elixir | `elixirc` | +| Clojure | `clj-kondo` | +| Nix | `nix-instantiate` | +| Terraform | `terraform` | +| Dockerfile | `hadolint` | +| SQL | `sqlfluff` | + +## Windows User-Level Install Examples + +Use these only for tools you want to exercise locally. CI should install toolchains in dedicated jobs rather than relying on one giant image. + +```powershell +scoop install shellcheck lua perl terraform hadolint +npm install -g typescript eslint +python -m pip install sqlfluff +``` + +Heavier ecosystems are better installed with their official installers or CI setup: + +- Rust: `rustup` +- Java/Kotlin/Scala: JDK plus Kotlin/Scala tooling +- Haskell: GHCup +- OCaml: opam +- Erlang/Elixir: official installers or package manager images +- R: CRAN R installer +- Swift, Zig, Dart, Ruby, PHP, Nix: ecosystem-specific installers + +## CI Strategy + +Do not require every compiler in one job. Use a matrix: + +- core job: schema validation, unit tests, package build, JSON/TOML/YAML/Python checks, +- systems job: Rust, Go, C/C++, Zig, +- JVM/.NET job: Java, Kotlin, Scala, C#, +- dynamic job: JavaScript, TypeScript, Ruby, PHP, Dart, Julia, Lua, Perl, R, +- functional job: Haskell, OCaml, Erlang, Elixir, Clojure, +- infrastructure job: Shell, PowerShell, Nix, Terraform, Dockerfile, SQL. + +Each job should run: + +```bash +python tools/check_toolchains.py --json --strict --language rust --language go +python tools/run_bug_corpus.py +ancp validate .ancp/bug-corpus +``` + +The matrix should fail when an expected installed compiler returns `tool_failed`, but the local all-language corpus may still pass schema validation when a developer has partial tooling. diff --git a/docs/verification.md b/docs/verification.md index dcfc8f6..ad84102 100644 --- a/docs/verification.md +++ b/docs/verification.md @@ -78,6 +78,7 @@ ancp capabilities | python -m json.tool ancp schema | python -m json.tool python -m build python -m twine check dist/* +python tools/check_toolchains.py ``` Purpose: @@ -87,6 +88,7 @@ Purpose: - confirms top-level CLI commands emit parseable JSON, - confirms wheel and source distributions can be built, - confirms package metadata is acceptable to Python packaging tools. +- reports which native language toolchains are available for real compiler-backed smoke coverage. Expected result: @@ -95,6 +97,7 @@ Expected result: - CLI JSON commands parse, - build succeeds, - `twine check` reports `PASSED` for every built artifact. +- installed native toolchains appear in the availability report; missing optional toolchains are expected on partial developer machines. ## Pass 5: Invisible Compiler Layer And Bug Corpus diff --git a/docs/vision-doctrine.md b/docs/vision-doctrine.md index 0151941..c2e7910 100644 --- a/docs/vision-doctrine.md +++ b/docs/vision-doctrine.md @@ -57,7 +57,7 @@ The developer should not need to learn a new workflow just to make code agent-re If a compiler/tool is not installed or a language lacks stable structured output, ANCP must report that honestly as valid ANCP JSON. It must not fabricate successful checks. 6. **Multi-language from the start.** - The important targets include Rust, Kotlin, Python, Julia, TypeScript, C, C++, Go, Java, C#/.NET, Swift, Zig, Ruby, PHP, Dart, Scala, and JavaScript. + The important targets include Rust, Kotlin, Python, Julia, TypeScript, C, C++, Go, Java, C#/.NET, Swift, Zig, Ruby, PHP, Dart, Scala, JavaScript, Shell, PowerShell, Lua, Perl, R, Haskell, OCaml, Erlang, Elixir, Clojure, Nix, Terraform, Dockerfile, SQL, JSON, TOML, and YAML. 7. **Security is part of the protocol.** Build/test/repair commands can execute arbitrary code. ANCP must label effects, safety levels, and risky operations instead of hiding them. @@ -271,4 +271,3 @@ The first public release succeeds if someone can: 9. get a compact, useful diagnosis instead of a noisy compiler dump. That is the revolution: compilers become agent-readable without developers changing how they code. - diff --git a/examples/buggy/clojure/broken.clj b/examples/buggy/clojure/broken.clj new file mode 100644 index 0000000..2f5c645 --- /dev/null +++ b/examples/buggy/clojure/broken.clj @@ -0,0 +1,4 @@ +(ns broken.core) + +(defn run [] + (println "hello clojure") diff --git a/examples/buggy/dockerfile/Dockerfile b/examples/buggy/dockerfile/Dockerfile new file mode 100644 index 0000000..be01f27 --- /dev/null +++ b/examples/buggy/dockerfile/Dockerfile @@ -0,0 +1,4 @@ +FROM ubuntu:latest +RUN apt-get update && apt-get install -y curl +COPY missing-file /app/ +CMD ["bash"] diff --git a/examples/buggy/elixir/broken.ex b/examples/buggy/elixir/broken.ex new file mode 100644 index 0000000..323e05d --- /dev/null +++ b/examples/buggy/elixir/broken.ex @@ -0,0 +1,5 @@ +defmodule Broken do + def run(name) do + IO.puts("hello #{name}" + end +end diff --git a/examples/buggy/erlang/broken.erl b/examples/buggy/erlang/broken.erl new file mode 100644 index 0000000..9dcb36a --- /dev/null +++ b/examples/buggy/erlang/broken.erl @@ -0,0 +1,5 @@ +-module(broken). +-export([main/0]). + +main() -> + io:format("broken erlang~n" diff --git a/examples/buggy/haskell/Main.hs b/examples/buggy/haskell/Main.hs new file mode 100644 index 0000000..80e576d --- /dev/null +++ b/examples/buggy/haskell/Main.hs @@ -0,0 +1,7 @@ +module Main where + +main :: IO () +main = do + putStrLn "broken haskell" + let x = + print x diff --git a/examples/buggy/json/broken.json b/examples/buggy/json/broken.json new file mode 100644 index 0000000..bc987bd --- /dev/null +++ b/examples/buggy/json/broken.json @@ -0,0 +1,5 @@ +{ + "name": "broken-json", + "enabled": true, + "items": [1, 2,] +} diff --git a/examples/buggy/lua/broken.lua b/examples/buggy/lua/broken.lua new file mode 100644 index 0000000..f94ee4f --- /dev/null +++ b/examples/buggy/lua/broken.lua @@ -0,0 +1,4 @@ +local function broken(name) + print("hello " .. name) +if true then + broken("lua") diff --git a/examples/buggy/nix/broken.nix b/examples/buggy/nix/broken.nix new file mode 100644 index 0000000..f67977d --- /dev/null +++ b/examples/buggy/nix/broken.nix @@ -0,0 +1,7 @@ +{ pkgs }: + +pkgs.mkShell { + packages = [ + pkgs.git + ; +} diff --git a/examples/buggy/ocaml/main.ml b/examples/buggy/ocaml/main.ml new file mode 100644 index 0000000..f6dccbc --- /dev/null +++ b/examples/buggy/ocaml/main.ml @@ -0,0 +1,4 @@ +let greet name = + print_endline ("hello " ^ name + +let () = greet "ocaml" diff --git a/examples/buggy/perl/broken.pl b/examples/buggy/perl/broken.pl new file mode 100644 index 0000000..104b5ad --- /dev/null +++ b/examples/buggy/perl/broken.pl @@ -0,0 +1,7 @@ +use strict; +use warnings; + +my $name = "perl"; +if ($name eq "perl" { + print "broken\n"; +} diff --git a/examples/buggy/powershell/broken.ps1 b/examples/buggy/powershell/broken.ps1 new file mode 100644 index 0000000..bb1a6d1 --- /dev/null +++ b/examples/buggy/powershell/broken.ps1 @@ -0,0 +1,5 @@ +function Invoke-Broken { + param( + [string]$Name + Write-Output "Hello $Name" +} diff --git a/examples/buggy/r/broken.R b/examples/buggy/r/broken.R new file mode 100644 index 0000000..688ad24 --- /dev/null +++ b/examples/buggy/r/broken.R @@ -0,0 +1,7 @@ +broken <- function(x) { + if (x > 0) { + print("positive") + else { + print("negative") + } +} diff --git a/examples/buggy/shell/broken.sh b/examples/buggy/shell/broken.sh new file mode 100644 index 0000000..7e7b3fa --- /dev/null +++ b/examples/buggy/shell/broken.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +if [ "$1" = "run" ]; then + echo "running" +else + echo "missing fi" diff --git a/examples/buggy/sql/broken.sql b/examples/buggy/sql/broken.sql new file mode 100644 index 0000000..a2593ef --- /dev/null +++ b/examples/buggy/sql/broken.sql @@ -0,0 +1,4 @@ +SELECT id, name +FROM users +WHERE created_at > +ORDER BY name; diff --git a/examples/buggy/terraform/main.tf b/examples/buggy/terraform/main.tf new file mode 100644 index 0000000..71f1d02 --- /dev/null +++ b/examples/buggy/terraform/main.tf @@ -0,0 +1,8 @@ +terraform { + required_version = ">= 1.6" +} + +resource "null_resource" "broken" { + triggers = { + value = "missing brace" +} diff --git a/examples/buggy/toml/broken.toml b/examples/buggy/toml/broken.toml new file mode 100644 index 0000000..cf9e18b --- /dev/null +++ b/examples/buggy/toml/broken.toml @@ -0,0 +1,6 @@ +[package] +name = "broken-toml" +version = "1.0.0" + +[dependencies +jsonschema = "4" diff --git a/examples/buggy/yaml/broken.yaml b/examples/buggy/yaml/broken.yaml new file mode 100644 index 0000000..5030bd4 --- /dev/null +++ b/examples/buggy/yaml/broken.yaml @@ -0,0 +1,5 @@ +name: broken-yaml +items: + - one + - two + nested: bad diff --git a/pyproject.toml b/pyproject.toml index 406ea41..0b82979 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,8 @@ classifiers = [ "Topic :: Software Development :: Quality Assurance" ] dependencies = [ - "jsonschema>=4.22.0" + "jsonschema>=4.22.0", + "PyYAML>=6.0.0" ] [project.optional-dependencies] @@ -73,6 +74,23 @@ ancp-dart = "ancp.proxy:dart_main" ancp-scala-cli = "ancp.proxy:scala_cli_main" ancp-scalac = "ancp.proxy:scalac_main" ancp-julia = "ancp.proxy:julia_main" +ancp-shellcheck = "ancp.proxy:shellcheck_main" +ancp-bash = "ancp.proxy:bash_main" +ancp-pwsh = "ancp.proxy:pwsh_main" +ancp-powershell = "ancp.proxy:powershell_main" +ancp-luac = "ancp.proxy:luac_main" +ancp-lua = "ancp.proxy:lua_main" +ancp-perl = "ancp.proxy:perl_main" +ancp-rscript = "ancp.proxy:rscript_main" +ancp-ghc = "ancp.proxy:ghc_main" +ancp-ocamlc = "ancp.proxy:ocamlc_main" +ancp-erlc = "ancp.proxy:erlc_main" +ancp-elixirc = "ancp.proxy:elixirc_main" +ancp-clj-kondo = "ancp.proxy:clj_kondo_main" +ancp-nix-instantiate = "ancp.proxy:nix_instantiate_main" +ancp-terraform = "ancp.proxy:terraform_main" +ancp-hadolint = "ancp.proxy:hadolint_main" +ancp-sqlfluff = "ancp.proxy:sqlfluff_main" [tool.setuptools] package-dir = {"" = "src"} diff --git a/requirements-dev.txt b/requirements-dev.txt index 18a6bf3..13dea66 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,4 +1,5 @@ jsonschema>=4.22.0 +PyYAML>=6.0.0 pytest>=8.0.0 build>=1.2.0 twine>=5.0.0 diff --git a/research/languages/expanded-ecosystems.md b/research/languages/expanded-ecosystems.md new file mode 100644 index 0000000..34cf4f9 --- /dev/null +++ b/research/languages/expanded-ecosystems.md @@ -0,0 +1,65 @@ +# Expanded Ecosystem Notes + +ANCP now includes adapters beyond the first mainstream compiler set. These adapters cover automation languages, functional languages, infrastructure-as-code, and config/data formats that coding agents edit constantly. + +## Shell And PowerShell + +Shell scripts and PowerShell scripts are high-impact because they sit in CI, release, deployment, and local automation paths. + +Adapter strategy: + +- Shell: prefer ShellCheck JSON when available; fall back to `bash -n` for syntax checks. +- PowerShell: use the built-in Parser API to emit syntax diagnostics without executing the script. + +ANCP impact: + +- script diagnostics must be treated as compiler-facing because broken automation can block builds even when application code is correct, +- analyzers may expose suggestions, but applying them should remain review-required. + +## Lua, Perl, And R + +These dynamic ecosystems have useful parser/compile-only entry points: + +- Lua: `luac -p` +- Perl: `perl -c` +- R: `parse(file=...)` through `Rscript` + +ANCP impact: + +- a parser-only adapter is still valuable when it emits valid diagnostics and labels its scope honestly, +- richer semantic diagnostics should be optional and toolchain-specific. + +## Haskell, OCaml, Erlang, Elixir, And Clojure + +These ecosystems are important for compiler/tooling breadth and for projects with functional-language services. + +Adapter strategy: + +- Haskell: `ghc -fno-code` +- OCaml: `ocamlc -c` +- Erlang: `erlc` +- Elixir: `elixirc` +- Clojure: clj-kondo JSON where installed + +ANCP impact: + +- adapters must preserve module/build context where available, +- build-system integration should be optional because Stack, Cabal, Dune, Rebar, Mix, deps.edn, and Leiningen have different assumptions. + +## Config, Data, And Infrastructure Languages + +Agents edit JSON, TOML, YAML, Nix, Terraform, Dockerfiles, and SQL constantly. Broken config often blocks the whole project. + +Adapter strategy: + +- JSON/TOML/YAML: use embedded parsers for no-external-tool syntax diagnostics. +- Nix: `nix-instantiate --parse` +- Terraform: `terraform validate -json` +- Dockerfile: hadolint JSON +- SQL: sqlfluff JSON + +ANCP impact: + +- config diagnostics should be first-class `result.check` documents, +- missing external tools should report `tool_failed`, +- embedded parser adapters should be preferred when they are deterministic and safe. diff --git a/research/source-docs/sources.json b/research/source-docs/sources.json index bf489ae..ba05163 100644 --- a/research/source-docs/sources.json +++ b/research/source-docs/sources.json @@ -529,5 +529,167 @@ "url": "https://www.julia-vscode.org/StaticLint.jl/v4.4/", "local": "julia/staticlint.html", "relevance": "Julia static analysis used by LanguageServer.jl for project-wide diagnostics." + }, + { + "id": "shell-bash-invocation", + "category": "language", + "language": "shell", + "title": "Bash Invocation", + "url": "https://www.gnu.org/software/bash/manual/html_node/Invoking-Bash.html", + "local": "shell/bash-invocation.html", + "relevance": "Shell syntax-check mode through Bash invocation flags." + }, + { + "id": "shell-shellcheck-json", + "category": "tool", + "language": "shell", + "title": "ShellCheck Output Formats", + "url": "https://github.com/koalaman/shellcheck/wiki/Integration", + "local": "shell/shellcheck-integration.html", + "relevance": "ShellCheck JSON output and diagnostic integration." + }, + { + "id": "powershell-parser-api", + "category": "language", + "language": "powershell", + "title": "PowerShell Parser API", + "url": "https://learn.microsoft.com/en-us/dotnet/api/system.management.automation.language.parser.parsefile", + "local": "powershell/parser-parsefile.html", + "relevance": "PowerShell parser diagnostics from ParseFile." + }, + { + "id": "lua-luac", + "category": "language", + "language": "lua", + "title": "Lua Standalone Compiler", + "url": "https://www.lua.org/manual/5.4/luac.html", + "local": "lua/luac.html", + "relevance": "Lua syntax checking with luac -p." + }, + { + "id": "perl-run", + "category": "language", + "language": "perl", + "title": "perlrun", + "url": "https://perldoc.perl.org/perlrun", + "local": "perl/perlrun.html", + "relevance": "Perl command-line compile check behavior." + }, + { + "id": "r-parse", + "category": "language", + "language": "r", + "title": "R parse", + "url": "https://stat.ethz.ch/R-manual/R-devel/library/base/html/parse.html", + "local": "r/parse.html", + "relevance": "R parser behavior and parse diagnostics." + }, + { + "id": "haskell-ghc-using", + "category": "language", + "language": "haskell", + "title": "Using GHC", + "url": "https://downloads.haskell.org/ghc/latest/docs/users_guide/using.html", + "local": "haskell/using-ghc.html", + "relevance": "GHC command-line compilation and no-code checking." + }, + { + "id": "ocaml-ocamlc", + "category": "language", + "language": "ocaml", + "title": "OCaml ocamlc", + "url": "https://ocaml.org/manual/5.3/comp.html", + "local": "ocaml/ocamlc.html", + "relevance": "OCaml bytecode compiler diagnostics and compile-only behavior." + }, + { + "id": "erlang-erlc", + "category": "language", + "language": "erlang", + "title": "erlc", + "url": "https://www.erlang.org/doc/man/erlc.html", + "local": "erlang/erlc.html", + "relevance": "Erlang compiler command-line behavior." + }, + { + "id": "elixir-compiler-cli", + "category": "language", + "language": "elixir", + "title": "elixirc", + "url": "https://hexdocs.pm/elixir/main/Kernel.ParallelCompiler.html", + "local": "elixir/compiler.html", + "relevance": "Elixir compiler diagnostics and parallel compiler behavior." + }, + { + "id": "clojure-clj-kondo", + "category": "tool", + "language": "clojure", + "title": "clj-kondo", + "url": "https://cljdoc.org/d/clj-kondo/clj-kondo/2026.04.15/doc/configuration", + "local": "clojure/clj-kondo-configuration.html", + "relevance": "Clojure static lint diagnostics and JSON output." + }, + { + "id": "nix-instantiate", + "category": "language", + "language": "nix", + "title": "nix-instantiate", + "url": "https://nix.dev/manual/nix/2.24/command-ref/nix-instantiate", + "local": "nix/nix-instantiate.html", + "relevance": "Nix expression parsing and evaluation command surface." + }, + { + "id": "terraform-validate", + "category": "tool", + "language": "terraform", + "title": "terraform validate", + "url": "https://developer.hashicorp.com/terraform/cli/commands/validate", + "local": "terraform/validate.html", + "relevance": "Terraform JSON validation diagnostics." + }, + { + "id": "dockerfile-hadolint", + "category": "tool", + "language": "dockerfile", + "title": "Hadolint", + "url": "https://raw.githubusercontent.com/hadolint/hadolint/master/README.md", + "local": "dockerfile/hadolint-readme.md", + "relevance": "Dockerfile lint diagnostics and JSON output." + }, + { + "id": "sql-sqlfluff-cli", + "category": "tool", + "language": "sql", + "title": "SQLFluff CLI", + "url": "https://docs.sqlfluff.com/en/stable/reference/cli.html", + "local": "sql/sqlfluff-cli.html", + "relevance": "SQL lint diagnostics and JSON output." + }, + { + "id": "json-rfc8259", + "category": "language", + "language": "json", + "title": "RFC 8259 JSON", + "url": "https://www.rfc-editor.org/rfc/rfc8259.html", + "local": "json/rfc8259.html", + "relevance": "JSON syntax and interchange requirements." + }, + { + "id": "toml-spec", + "category": "language", + "language": "toml", + "title": "TOML Specification", + "url": "https://toml.io/en/v1.0.0", + "local": "toml/spec.html", + "relevance": "TOML syntax requirements for config files." + }, + { + "id": "yaml-spec", + "category": "language", + "language": "yaml", + "title": "YAML Specification", + "url": "https://yaml.org/spec/1.2.2/", + "local": "yaml/spec.html", + "relevance": "YAML syntax requirements for config files." } ] diff --git a/research/tooling-matrix.md b/research/tooling-matrix.md index 48ed5e2..0dd9019 100644 --- a/research/tooling-matrix.md +++ b/research/tooling-matrix.md @@ -26,6 +26,10 @@ The key conclusion: | Dart | `dart analyze`, `dart fix` | Analyzer output is CLI-oriented; analysis server/LSP structured | `dart fix --dry-run` and `--apply` | ANCP repair plan split matches Dart's dry-run/apply model. | | Scala | `scalac`, Scala CLI, sbt, Metals/BSP | Compiler/build output often text; BSP/Metals structured | Scala CLI actionable diagnostics; Scalafix | ANCP should represent build server protocol/source generator/dependency context. | | Julia | `julia`, `Meta.parse`, LanguageServer.jl, StaticLint.jl, Pkg/test workflows | Parser errors are text/exception based; LanguageServer.jl/StaticLint.jl provide richer structured diagnostic paths | Mostly IDE/LSP actions and package-specific tooling; core syntax repair is manual | ANCP should treat Julia as a dynamic compiler/JIT ecosystem: parse first, then layer LSP/static lint and test execution when installed. | +| Shell/PowerShell | `bash -n`, ShellCheck, PowerShell Parser API | ShellCheck JSON; PowerShell parser errors can be converted from objects | ShellCheck suggestions, PSScriptAnalyzer fixes | ANCP must support automation languages because agents frequently edit scripts and CI glue. | +| Lua/Perl/R | `luac -p`, `perl -c`, R `parse()` | Mostly text diagnostics; R parser exposes conditions | Mostly manual or linter-driven | Dynamic language adapters can start with parser gates and layer linters later. | +| Haskell/OCaml/Erlang/Elixir/Clojure | GHC, ocamlc, erlc, elixirc, clj-kondo | Compiler text; clj-kondo JSON | Ecosystem-specific formatters/lint fixers | Functional ecosystems fit ANCP if adapters preserve native compiler output and module/build context. | +| Config/data/IaC | JSON, TOML, YAML, Nix, Terraform, Dockerfile, SQL | JSON/TOML/YAML parsers; Terraform JSON; hadolint/sqlfluff JSON | Formatters and linters, usually review-required | Agents edit config constantly; ANCP should normalize syntax/config failures as first-class diagnostics. | ## Common Fields Found Across Ecosystems @@ -109,6 +113,10 @@ Every adapter needs a native validation path: - Ruby/PHP: syntax check plus test runner and linter/static analyzer. - Dart: `dart analyze`, `dart test`. - Julia: `julia --startup-file=no --history-file=no`, parser checks through `Meta.parse`/`Meta.parseall`, LanguageServer.jl/StaticLint.jl when available, and `Pkg.test` for package verification. +- Shell/PowerShell: `bash -n`, ShellCheck, PowerShell Parser API, PSScriptAnalyzer. +- Lua/Perl/R: `luac -p`, `perl -c`, `Rscript -e parse(...)`. +- Haskell/OCaml/Erlang/Elixir/Clojure: compiler syntax/type checks and clj-kondo JSON. +- Config/data/IaC: parser validation for JSON/TOML/YAML, `nix-instantiate --parse`, `terraform validate -json`, hadolint, sqlfluff. ANCP must model verification as commands and diagnostic delta, not as a boolean. @@ -171,3 +179,9 @@ The research confirms these core abstractions are necessary: | Dart | Use `dart analyze`; map `dart fix --dry-run` to plans and `--apply` to review-required apply. | | Scala | Use Scala CLI/sbt/BSP/Metals; expose SemanticDB support under graph profile. | | Julia | Use a no-startup parser pass for fast syntax diagnostics; integrate LanguageServer.jl/StaticLint.jl for semantic diagnostics; treat package tests and Pkg operations as effectful verification steps. | +| Shell | Prefer ShellCheck JSON; fall back to `bash -n` for syntax-only checks. | +| PowerShell | Use the parser API for syntax diagnostics; PSScriptAnalyzer can be layered as lint/repair metadata. | +| Lua/Perl/R | Use native parser/compile-only commands first; add language-specific linters as optional richer adapters. | +| Haskell/OCaml/Erlang/Elixir/Clojure | Use native compilers or clj-kondo; keep build-system integration optional because project setup differs heavily. | +| JSON/TOML/YAML | Use embedded parsers for deterministic syntax diagnostics with no external tool dependency. | +| Nix/Terraform/Dockerfile/SQL | Prefer native JSON-output tooling where available; report missing tools honestly. | diff --git a/spec/ancp-1.0.md b/spec/ancp-1.0.md index 6e835de..7e63a56 100644 --- a/spec/ancp-1.0.md +++ b/spec/ancp-1.0.md @@ -44,7 +44,9 @@ ANCP 1.0 is designed for these language families: - statically typed compiled languages such as Rust, Go, C, C++, Zig, Swift, Kotlin, Java, C#, - gradually typed languages such as TypeScript and Python with type checkers, -- dynamic/compiler-JIT ecosystems such as JavaScript, Ruby, PHP, Julia, Lua, and Python without type checking, +- dynamic/compiler-JIT ecosystems such as JavaScript, Ruby, PHP, Julia, Lua, Perl, R, Elixir, Erlang, Clojure, and Python without type checking, +- functional and systems ecosystems such as Haskell and OCaml, +- shell and automation languages such as POSIX shell/Bash and PowerShell, - configuration and data languages such as JSON, YAML, TOML, Nix, Terraform, Dockerfile, SQL, and GraphQL, - mixed repositories containing multiple languages and toolchains. diff --git a/src/ancp/adapters/base.py b/src/ancp/adapters/base.py index eef4b16..a0890df 100644 --- a/src/ancp/adapters/base.py +++ b/src/ancp/adapters/base.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import pathlib import tempfile from dataclasses import dataclass @@ -22,7 +23,7 @@ parse_rust_json_lines, parse_text_lines, ) -from ancp.util import CommandResult, command_run_object, find_executable, list_files, run_command +from ancp.util import CommandResult, command_run_object, find_executable, list_files, run_command, utc_now @dataclass(frozen=True) @@ -184,6 +185,67 @@ def verification_steps(self, root: pathlib.Path) -> list[dict[str, Any]]: ] +class InternalSyntaxAdapter(Adapter): + """Adapter base for syntax checks implemented inside ANCP.""" + + tool_name = "ancp-internal" + + def available_tool(self) -> ToolSpec | None: + return ToolSpec(self.tool_name, "compiler", ["ancp", "internal-check", self.key], transport="api", version_args=["ancp", "--version"]) + + def toolchain_entries(self) -> list[dict[str, Any]]: + return [{"name": self.tool_name, "role": "compiler", "command": ["ancp", "internal-check", self.key], "transport": "api"}] + + def check(self, root: pathlib.Path, timeout: int = 60) -> dict[str, Any]: + started = utc_now() + diagnostics = self.parse_internal(root) + result = CommandResult( + argv=["ancp", "internal-check", self.key], + cwd=root, + started_at=started, + ended_at=utc_now(), + duration_ms=0, + exit_code=1 if diagnostics else 0, + stdout="", + stderr="", + ) + document = doc.envelope("result.check", f"ancp-{self.key}-adapter") + document.update( + { + "status": "failed" if diagnostics else "passed", + "workspace": doc.workspace_object(root), + "run": command_run_object(result), + "toolchain": self.toolchain_entries(), + "diagnostics": diagnostics, + } + ) + return document + + def parse_internal(self, root: pathlib.Path) -> list[dict[str, Any]]: + return [] + + def internal_diagnostic( + self, + index: int, + path: pathlib.Path, + native_code: str, + message: str, + line: int = 0, + col: int = 0, + ) -> dict[str, Any]: + return doc.diagnostic( + f"diag-{self.key}-{index:04d}", + "ancp.diag.syntax.invalid", + native_code, + "error", + "syntax", + message, + doc.location(path, self.language_id, line, col, line, col + 1), + self.tool_name, + [doc.repair_hint("ancp.repair.syntax.insert_token", f"Fix {self.display_name} syntax", 0.4)], + ) + + class TypeScriptAdapter(Adapter): key = "typescript" language_id = "typescript" @@ -621,3 +683,586 @@ def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec {"stderrSummary": text[-2000:]}, ) ] + + +class JsonAdapter(InternalSyntaxAdapter): + key = "json" + language_id = "json" + display_name = "JSON" + file_extensions = {".json", ".jsonc"} + markers = {"package.json", "tsconfig.json", "composer.json"} + tool_name = "python-json" + + def parse_internal(self, root: pathlib.Path) -> list[dict[str, Any]]: + diagnostics: list[dict[str, Any]] = [] + for path in list_files(root, self.file_extensions, limit=200): + try: + json.loads(path.read_text(encoding="utf-8-sig")) + except json.JSONDecodeError as exc: + diagnostics.append(self.internal_diagnostic(len(diagnostics) + 1, path, "JSONDecodeError", exc.msg, exc.lineno - 1, exc.colno - 1)) + except UnicodeDecodeError as exc: + diagnostics.append(self.internal_diagnostic(len(diagnostics) + 1, path, "UnicodeDecodeError", str(exc), 0, 0)) + return diagnostics + + +class TomlAdapter(InternalSyntaxAdapter): + key = "toml" + language_id = "toml" + display_name = "TOML" + file_extensions = {".toml"} + markers = {"pyproject.toml", "Cargo.toml", "Project.toml"} + tool_name = "python-tomllib" + + def parse_internal(self, root: pathlib.Path) -> list[dict[str, Any]]: + import tomllib + + diagnostics: list[dict[str, Any]] = [] + for path in list_files(root, self.file_extensions, limit=200): + try: + tomllib.loads(path.read_text(encoding="utf-8")) + except tomllib.TOMLDecodeError as exc: + line = max(int(getattr(exc, "lineno", 1) or 1) - 1, 0) + col = max(int(getattr(exc, "colno", 1) or 1) - 1, 0) + diagnostics.append(self.internal_diagnostic(len(diagnostics) + 1, path, "TOMLDecodeError", str(exc).splitlines()[0], line, col)) + except UnicodeDecodeError as exc: + diagnostics.append(self.internal_diagnostic(len(diagnostics) + 1, path, "UnicodeDecodeError", str(exc), 0, 0)) + return diagnostics + + +class YamlAdapter(InternalSyntaxAdapter): + key = "yaml" + language_id = "yaml" + display_name = "YAML" + file_extensions = {".yaml", ".yml"} + markers = {".github", "docker-compose.yml", "docker-compose.yaml"} + tool_name = "pyyaml" + + def parse_internal(self, root: pathlib.Path) -> list[dict[str, Any]]: + import yaml + + diagnostics: list[dict[str, Any]] = [] + for path in list_files(root, self.file_extensions, limit=200): + try: + list(yaml.safe_load_all(path.read_text(encoding="utf-8"))) + except yaml.YAMLError as exc: + mark = getattr(exc, "problem_mark", None) + line = int(getattr(mark, "line", 0) or 0) + col = int(getattr(mark, "column", 0) or 0) + message = getattr(exc, "problem", None) or str(exc).splitlines()[0] + diagnostics.append(self.internal_diagnostic(len(diagnostics) + 1, path, exc.__class__.__name__, message, line, col)) + except UnicodeDecodeError as exc: + diagnostics.append(self.internal_diagnostic(len(diagnostics) + 1, path, "UnicodeDecodeError", str(exc), 0, 0)) + return diagnostics + + +class ShellAdapter(Adapter): + key = "shell" + language_id = "shellscript" + display_name = "Shell" + file_extensions = {".sh", ".bash", ".zsh", ".ksh"} + markers = {".shellcheckrc"} + tools = [ + ToolSpec("shellcheck", "linter", ["shellcheck", "--format=json", "."]), + ToolSpec("bash", "compiler", ["bash", "-n"]), + ] + + def run_check(self, root: pathlib.Path, tool: ToolSpec, timeout: int) -> CommandResult: + files = list_files(root, self.file_extensions, limit=100) + if not files: + return run_command([tool.command[0], "--version"], root, timeout=timeout) + if tool.name == "shellcheck": + return run_command(["shellcheck", "--format=json", *[str(path) for path in files]], root, timeout=timeout) + stdout: list[str] = [] + stderr: list[str] = [] + last: CommandResult | None = None + for path in files: + last = run_command(["bash", "-n", str(path)], root, timeout=timeout) + stdout.append(last.stdout) + stderr.append(last.stderr) + if last.exit_code not in (0, None): + break + assert last is not None + return CommandResult(last.argv, root, last.started_at, last.ended_at, last.duration_ms, last.exit_code, "\n".join(stdout), "\n".join(stderr)) + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + if tool.name == "shellcheck": + try: + payload = json.loads(result.stdout) + except json.JSONDecodeError: + payload = {} + diagnostics = [] + for index, item in enumerate(payload.get("comments", []), start=1): + path = pathlib.Path(item.get("file") or root) + if not path.is_absolute(): + path = root / path + code = f"SC{item.get('code')}" if item.get("code") else None + message = item.get("message", "") + canonical, kind, hints = canonical_for_native(code, message, "ancp.diag.lint.rule_violation") + diagnostics.append( + doc.diagnostic( + f"diag-shellcheck-{index:04d}", + canonical, + code, + "error" if item.get("level") == "error" else "warning", + "lint" if canonical == "ancp.diag.lint.rule_violation" else kind, + message, + doc.location(path, "shellscript", int(item.get("line", 1)) - 1, int(item.get("column", 1)) - 1), + "shellcheck", + hints, + {"native": item}, + ) + ) + return diagnostics + regex = __import__("re").compile(r"^(?P.+?):\s+line\s+(?P\d+):\s+(?P.+)$") + return parse_text_lines(result.stderr + "\n" + result.stdout, regex, root, "shellscript", "bash", "diag-shell") + + +class PowerShellAdapter(Adapter): + key = "powershell" + language_id = "powershell" + display_name = "PowerShell" + file_extensions = {".ps1", ".psm1", ".psd1"} + markers = {"PSScriptAnalyzerSettings.psd1"} + tools = [ + ToolSpec("pwsh", "compiler", ["pwsh", "-NoProfile"], version_args=["pwsh", "-NoProfile", "-Command", "$PSVersionTable.PSVersion.ToString()"]), + ToolSpec("powershell", "compiler", ["powershell", "-NoProfile"], version_args=["powershell", "-NoProfile", "-Command", "$PSVersionTable.PSVersion.ToString()"]), + ] + + def run_check(self, root: pathlib.Path, tool: ToolSpec, timeout: int) -> CommandResult: + root_literal = "'" + str(root).replace("'", "''") + "'" + program = ( + f"$root={root_literal};" + "$items=Get-ChildItem -LiteralPath $root -Recurse -Include *.ps1,*.psm1,*.psd1 -File;" + "$out=@();" + "foreach($item in $items){" + "$tokens=$null;$errors=$null;" + "[System.Management.Automation.Language.Parser]::ParseFile($item.FullName,[ref]$tokens,[ref]$errors)|Out-Null;" + "foreach($err in $errors){$out += [pscustomobject]@{file=$item.FullName;line=$err.Extent.StartLineNumber;col=$err.Extent.StartColumnNumber;message=$err.Message;code=$err.ErrorId}}" + "};" + "$out|ConvertTo-Json -Depth 4 -Compress" + ) + return run_command([tool.command[0], "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", program], root, timeout=timeout) + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + text = (result.stdout or "").strip() + combined_text = result.stderr + "\n" + result.stdout + if not text: + return self._parse_native_text(root, combined_text, tool.name) + try: + payload = json.loads(text) + except json.JSONDecodeError: + return self._parse_native_text(root, combined_text, tool.name) + items = payload if isinstance(payload, list) else [payload] + diagnostics = [] + for index, item in enumerate(items[:10], start=1): + path = pathlib.Path(item.get("file") or root) + message = item.get("message", "PowerShell parse error") + code = item.get("code") + canonical, kind, hints = canonical_for_native(code, message) + diagnostics.append( + doc.diagnostic( + f"diag-powershell-{index:04d}", + canonical, + code, + "error", + kind, + message, + doc.location(path, "powershell", int(item.get("line", 1)) - 1, int(item.get("col", 1)) - 1), + tool.name, + hints, + {"native": item}, + ) + ) + return diagnostics + + def _parse_native_text(self, root: pathlib.Path, text: str, source: str) -> list[dict[str, Any]]: + import re + + diagnostics: list[dict[str, Any]] = [] + pattern = re.compile(r"^At (?P.+?):(?P\d+) char:(?P\d+)$") + lines = text.splitlines() + for index, line in enumerate(lines): + match = pattern.match(line.strip()) + if not match: + continue + message = "PowerShell parse error" + for candidate in lines[index + 1 : index + 6]: + stripped = candidate.strip() + if stripped and not stripped.startswith("+") and not set(stripped) <= {"~"}: + message = stripped + break + code = None + for candidate in lines[index + 1 : index + 12]: + if "FullyQualifiedErrorId" in candidate: + code = candidate.split(":", 1)[-1].strip() + break + path = pathlib.Path(match.group("file")) + canonical, kind, hints = canonical_for_native(code, message) + diagnostics.append( + doc.diagnostic( + f"diag-powershell-{len(diagnostics) + 1:04d}", + canonical, + code, + "error", + kind, + message, + doc.location(path, "powershell", int(match.group("line")) - 1, int(match.group("col")) - 1), + source, + hints, + {"raw": "\n".join(lines[index : index + 8])}, + ) + ) + return diagnostics + + +class LuaAdapter(Adapter): + key = "lua" + language_id = "lua" + display_name = "Lua" + file_extensions = {".lua"} + markers = {".luacheckrc"} + tools = [ToolSpec("luac", "compiler", ["luac", "-p"]), ToolSpec("lua", "compiler", ["lua", "-e", ""])] + + def run_check(self, root: pathlib.Path, tool: ToolSpec, timeout: int) -> CommandResult: + files = list_files(root, self.file_extensions, limit=100) + if not files: + return run_command([tool.command[0], "-v"], root, timeout=timeout) + checker = "luac" if tool.name == "luac" else "lua" + stdout: list[str] = [] + stderr: list[str] = [] + last: CommandResult | None = None + for path in files: + argv = ["luac", "-p", str(path)] if checker == "luac" else ["lua", "-e", f"assert(loadfile([[{path}]]))"] + last = run_command(argv, root, timeout=timeout) + stdout.append(last.stdout) + stderr.append(last.stderr) + if last.exit_code not in (0, None): + break + assert last is not None + return CommandResult(last.argv, root, last.started_at, last.ended_at, last.duration_ms, last.exit_code, "\n".join(stdout), "\n".join(stderr)) + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + regex = __import__("re").compile(r"^(?:luac:\s+)?(?P.+?):(?P\d+):\s+(?P.+)$") + return parse_text_lines(result.stderr + "\n" + result.stdout, regex, root, "lua", tool.name, "diag-lua") + + +class PerlAdapter(Adapter): + key = "perl" + language_id = "perl" + display_name = "Perl" + file_extensions = {".pl", ".pm", ".t"} + markers = {"cpanfile", "Makefile.PL"} + tools = [ToolSpec("perl", "compiler", ["perl", "-c"])] + + def run_check(self, root: pathlib.Path, tool: ToolSpec, timeout: int) -> CommandResult: + files = list_files(root, self.file_extensions, limit=100) + if not files: + return run_command(["perl", "-v"], root, timeout=timeout) + outputs: list[str] = [] + errors: list[str] = [] + last: CommandResult | None = None + for path in files: + last = run_command(["perl", "-c", str(path)], root, timeout=timeout) + outputs.append(last.stdout) + errors.append(last.stderr) + if last.exit_code not in (0, None): + break + assert last is not None + return CommandResult(last.argv, root, last.started_at, last.ended_at, last.duration_ms, last.exit_code, "\n".join(outputs), "\n".join(errors)) + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + regex = __import__("re").compile(r"^(?P.+?)\s+at\s+(?P.+?)\s+line\s+(?P\d+)") + return parse_text_lines(result.stderr + "\n" + result.stdout, regex, root, "perl", "perl", "diag-perl") + + +class RAdapter(Adapter): + key = "r" + language_id = "r" + display_name = "R" + file_extensions = {".r", ".R"} + markers = {"DESCRIPTION", "renv.lock"} + tools = [ToolSpec("Rscript", "compiler", ["Rscript", "--vanilla"])] + + def run_check(self, root: pathlib.Path, tool: ToolSpec, timeout: int) -> CommandResult: + files = list_files(root, self.file_extensions, limit=100) + if not files: + return run_command(["Rscript", "--version"], root, timeout=timeout) + program = "for (f in commandArgs(TRUE)) parse(file=f)" + return run_command(["Rscript", "--vanilla", "-e", program, *[str(path) for path in files]], root, timeout=timeout) + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + regex = __import__("re").compile(r"^(?P.+?):(?P\d+):(?P\d+):\s*(?P.+)$") + return parse_text_lines(result.stderr + "\n" + result.stdout, regex, root, "r", "Rscript", "diag-r") + + +class HaskellAdapter(Adapter): + key = "haskell" + language_id = "haskell" + display_name = "Haskell" + file_extensions = {".hs", ".lhs"} + markers = {"stack.yaml", "cabal.project"} + tools = [ToolSpec("ghc", "compiler", ["ghc", "-fno-code"])] + + def run_check(self, root: pathlib.Path, tool: ToolSpec, timeout: int) -> CommandResult: + files = list_files(root, self.file_extensions, limit=100) + if not files: + return run_command(["ghc", "--version"], root, timeout=timeout) + return run_command(["ghc", "-fno-code", *[str(path) for path in files]], root, timeout=timeout) + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + regex = __import__("re").compile(r"^(?P.+?):(?P\d+):(?P\d+):\s+(?Perror|warning):\s+(?P.+)$", __import__("re").IGNORECASE) + return parse_text_lines(result.stderr + "\n" + result.stdout, regex, root, "haskell", "ghc", "diag-haskell") + + +class OcamlAdapter(Adapter): + key = "ocaml" + language_id = "ocaml" + display_name = "OCaml" + file_extensions = {".ml", ".mli"} + markers = {"dune-project", "dune"} + tools = [ToolSpec("ocamlc", "compiler", ["ocamlc", "-c"])] + + def run_check(self, root: pathlib.Path, tool: ToolSpec, timeout: int) -> CommandResult: + files = list_files(root, self.file_extensions, limit=100) + if not files: + return run_command(["ocamlc", "-version"], root, timeout=timeout) + return run_command(["ocamlc", "-c", *[str(path) for path in files]], root, timeout=timeout) + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + regex = __import__("re").compile(r'^File "(?P.+?)", line (?P\d+), characters (?P\d+)-\d+:\s*(?P.+)$') + return parse_text_lines(result.stderr + "\n" + result.stdout, regex, root, "ocaml", "ocamlc", "diag-ocaml") + + +class ErlangAdapter(Adapter): + key = "erlang" + language_id = "erlang" + display_name = "Erlang" + file_extensions = {".erl", ".hrl"} + markers = {"rebar.config"} + tools = [ToolSpec("erlc", "compiler", ["erlc"])] + + def run_check(self, root: pathlib.Path, tool: ToolSpec, timeout: int) -> CommandResult: + files = [path for path in list_files(root, self.file_extensions, limit=100) if path.suffix.lower() == ".erl"] + if not files: + return run_command(["erlc", "-v"], root, timeout=timeout) + out_dir = pathlib.Path(tempfile.mkdtemp(prefix="ancp-erlc-")) + return run_command(["erlc", "-o", str(out_dir), *[str(path) for path in files]], root, timeout=timeout) + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + regex = __import__("re").compile(r"^(?P.+?):(?P\d+):(?P\d+)?:?\s*(?P.+)$") + return parse_text_lines(result.stderr + "\n" + result.stdout, regex, root, "erlang", "erlc", "diag-erlang") + + +class ElixirAdapter(Adapter): + key = "elixir" + language_id = "elixir" + display_name = "Elixir" + file_extensions = {".ex", ".exs"} + markers = {"mix.exs"} + tools = [ToolSpec("elixirc", "compiler", ["elixirc"])] + + def run_check(self, root: pathlib.Path, tool: ToolSpec, timeout: int) -> CommandResult: + files = list_files(root, self.file_extensions, limit=100) + if not files: + return run_command(["elixirc", "--version"], root, timeout=timeout) + out_dir = pathlib.Path(tempfile.mkdtemp(prefix="ancp-elixirc-")) + return run_command(["elixirc", "-o", str(out_dir), *[str(path) for path in files]], root, timeout=timeout) + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + regex = __import__("re").compile(r"^\*\* \((?P.+?)\)\s+(?P.+?):(?P\d+):(?P\d+)?:?\s*(?P.+)$") + return parse_text_lines(result.stderr + "\n" + result.stdout, regex, root, "elixir", "elixirc", "diag-elixir") + + +class ClojureAdapter(Adapter): + key = "clojure" + language_id = "clojure" + display_name = "Clojure" + file_extensions = {".clj", ".cljs", ".cljc", ".edn"} + markers = {"deps.edn", "project.clj", ".clj-kondo"} + tools = [ToolSpec("clj-kondo", "linter", ["clj-kondo", "--lint", ".", "--config", "{:output {:format :json}}"])] + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + try: + payload = json.loads(result.stdout) + except json.JSONDecodeError: + return [] + diagnostics = [] + for index, item in enumerate(payload.get("findings", []), start=1): + path = pathlib.Path(item.get("filename") or root) + if not path.is_absolute(): + path = root / path + message = item.get("message", "") + native_code = item.get("type") + canonical, kind, hints = canonical_for_native(native_code, message, "ancp.diag.lint.rule_violation") + diagnostics.append( + doc.diagnostic( + f"diag-clj-kondo-{index:04d}", + canonical, + native_code, + "error" if item.get("level") == "error" else "warning", + "lint" if canonical == "ancp.diag.lint.rule_violation" else kind, + message, + doc.location(path, "clojure", int(item.get("row", 1)) - 1, int(item.get("col", 1)) - 1), + "clj-kondo", + hints, + {"native": item}, + ) + ) + return diagnostics + + +class NixAdapter(Adapter): + key = "nix" + language_id = "nix" + display_name = "Nix" + file_extensions = {".nix"} + markers = {"flake.nix"} + tools = [ToolSpec("nix-instantiate", "compiler", ["nix-instantiate", "--parse"])] + + def run_check(self, root: pathlib.Path, tool: ToolSpec, timeout: int) -> CommandResult: + files = list_files(root, self.file_extensions, limit=100) + if not files: + return run_command(["nix-instantiate", "--version"], root, timeout=timeout) + stdout: list[str] = [] + stderr: list[str] = [] + last: CommandResult | None = None + for path in files: + last = run_command(["nix-instantiate", "--parse", str(path)], root, timeout=timeout) + stdout.append(last.stdout) + stderr.append(last.stderr) + if last.exit_code not in (0, None): + break + assert last is not None + return CommandResult(last.argv, root, last.started_at, last.ended_at, last.duration_ms, last.exit_code, "\n".join(stdout), "\n".join(stderr)) + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + regex = __import__("re").compile(r"^\s*at\s+(?P.+?):(?P\d+):(?P\d+):\s*(?P.+)$") + return parse_text_lines(result.stderr + "\n" + result.stdout, regex, root, "nix", "nix-instantiate", "diag-nix") + + +class TerraformAdapter(Adapter): + key = "terraform" + language_id = "terraform" + display_name = "Terraform" + file_extensions = {".tf", ".tfvars"} + markers = {".terraform.lock.hcl"} + tools = [ToolSpec("terraform", "build", ["terraform", "validate", "-json"])] + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + try: + payload = json.loads(result.stdout) + except json.JSONDecodeError: + return [] + diagnostics = [] + for index, item in enumerate(payload.get("diagnostics", []), start=1): + rng = item.get("range") or {} + filename = rng.get("filename") or root + path = pathlib.Path(filename) + if not path.is_absolute(): + path = root / path + start = rng.get("start") or {} + message = item.get("detail") or item.get("summary") or "Terraform diagnostic" + native_code = item.get("summary") + canonical, kind, hints = canonical_for_native(native_code, message, "ancp.diag.configuration.invalid") + diagnostics.append( + doc.diagnostic( + f"diag-terraform-{index:04d}", + canonical, + native_code, + "warning" if item.get("severity") == "warning" else "error", + "configuration" if kind == "unknown" else kind, + message, + doc.location(path, "terraform", int(start.get("line", 1)) - 1, int(start.get("column", 1)) - 1), + "terraform", + hints, + {"native": item}, + ) + ) + return diagnostics + + +class DockerfileAdapter(Adapter): + key = "dockerfile" + language_id = "dockerfile" + display_name = "Dockerfile" + file_extensions: set[str] = set() + markers = {"Dockerfile", ".hadolint.yaml"} + tools = [ToolSpec("hadolint", "linter", ["hadolint", "--format", "json", "Dockerfile"])] + + def matches(self, root: pathlib.Path) -> bool: + ignored = {".git", ".ancp", "node_modules", "dist", "build", "__pycache__", ".pytest_cache"} + for current, dirs, files in __import__("os").walk(root): + dirs[:] = [item for item in dirs if item not in ignored] + if any(name == "Dockerfile" or name.endswith(".Dockerfile") for name in files): + return True + return False + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + try: + payload = json.loads(result.stdout) + except json.JSONDecodeError: + return [] + diagnostics = [] + for index, item in enumerate(payload if isinstance(payload, list) else [], start=1): + path = pathlib.Path(item.get("file") or root / "Dockerfile") + if not path.is_absolute(): + path = root / path + code = item.get("code") + message = item.get("message", "") + canonical, kind, hints = canonical_for_native(code, message, "ancp.diag.lint.rule_violation") + diagnostics.append( + doc.diagnostic( + f"diag-hadolint-{index:04d}", + canonical, + code, + "error" if item.get("level") == "error" else "warning", + "lint" if canonical == "ancp.diag.lint.rule_violation" else kind, + message, + doc.location(path, "dockerfile", int(item.get("line", 1)) - 1, int(item.get("column", 1)) - 1), + "hadolint", + hints, + {"native": item}, + ) + ) + return diagnostics + + +class SqlAdapter(Adapter): + key = "sql" + language_id = "sql" + display_name = "SQL" + file_extensions = {".sql"} + markers = {".sqlfluff", ".sqlfluffignore"} + tools = [ToolSpec("sqlfluff", "linter", ["sqlfluff", "lint", "--format", "json", "."])] + + def parse_result(self, root: pathlib.Path, result: CommandResult, tool: ToolSpec) -> list[dict[str, Any]]: + try: + payload = json.loads(result.stdout) + except json.JSONDecodeError: + return [] + diagnostics = [] + counter = 0 + for file_item in payload if isinstance(payload, list) else []: + path = pathlib.Path(file_item.get("filepath") or root) + if not path.is_absolute(): + path = root / path + for violation in file_item.get("violations", []): + counter += 1 + code = violation.get("code") + message = violation.get("description", "") + canonical, kind, hints = canonical_for_native(code, message, "ancp.diag.lint.rule_violation") + diagnostics.append( + doc.diagnostic( + f"diag-sqlfluff-{counter:04d}", + canonical, + code, + "warning", + "lint" if canonical == "ancp.diag.lint.rule_violation" else kind, + message, + doc.location(path, "sql", int(violation.get("start_line_no", 1)) - 1, int(violation.get("start_line_pos", 1)) - 1), + "sqlfluff", + hints, + {"native": violation}, + ) + ) + return diagnostics diff --git a/src/ancp/adapters/registry.py b/src/ancp/adapters/registry.py index 74f877b..1792e1b 100644 --- a/src/ancp/adapters/registry.py +++ b/src/ancp/adapters/registry.py @@ -7,20 +7,37 @@ from .base import ( Adapter, CCppAdapter, + ClojureAdapter, DartAdapter, + DockerfileAdapter, DotnetAdapter, + ElixirAdapter, + ErlangAdapter, GoAdapter, + HaskellAdapter, JavaAdapter, JavaScriptAdapter, + JsonAdapter, JuliaAdapter, KotlinAdapter, + LuaAdapter, + NixAdapter, + OcamlAdapter, + PerlAdapter, PhpAdapter, + PowerShellAdapter, PythonAdapter, + RAdapter, RubyAdapter, RustAdapter, ScalaAdapter, + ShellAdapter, + SqlAdapter, SwiftAdapter, + TerraformAdapter, + TomlAdapter, TypeScriptAdapter, + YamlAdapter, ZigAdapter, ) @@ -42,6 +59,23 @@ DartAdapter(), ScalaAdapter(), JuliaAdapter(), + JsonAdapter(), + TomlAdapter(), + YamlAdapter(), + ShellAdapter(), + PowerShellAdapter(), + LuaAdapter(), + PerlAdapter(), + RAdapter(), + HaskellAdapter(), + OcamlAdapter(), + ErlangAdapter(), + ElixirAdapter(), + ClojureAdapter(), + NixAdapter(), + TerraformAdapter(), + DockerfileAdapter(), + SqlAdapter(), ] @@ -58,6 +92,20 @@ def get_adapter(key: str) -> Adapter | None: "cs": "csharp", "rb": "ruby", "jl": "julia", + "jsonc": "json", + "yml": "yaml", + "ps1": "powershell", + "ps": "powershell", + "sh": "shell", + "bash": "shell", + "rscript": "r", + "hs": "haskell", + "ml": "ocaml", + "erl": "erlang", + "ex": "elixir", + "clj": "clojure", + "hcl": "terraform", + "tf": "terraform", } normalized = aliases.get(normalized, normalized) for adapter in ADAPTERS: @@ -69,4 +117,3 @@ def get_adapter(key: str) -> Adapter | None: def matching_adapters(root: pathlib.Path) -> list[Adapter]: matches = [adapter for adapter in ADAPTERS if adapter.matches(root)] return matches or ADAPTERS - diff --git a/src/ancp/cli.py b/src/ancp/cli.py index 053ca92..4f9c623 100644 --- a/src/ancp/cli.py +++ b/src/ancp/cli.py @@ -52,13 +52,39 @@ def manifest_document() -> dict[str, Any]: ], "compilerProxyShims": [ "ancp-tsc", + "ancp-eslint", + "ancp-pyright", + "ancp-ruff", + "ancp-python", "ancp-cargo", "ancp-rustc", "ancp-go", - "ancp-kotlinc", + "ancp-gcc", + "ancp-clang", "ancp-javac", + "ancp-kotlinc", "ancp-dotnet", + "ancp-swift", + "ancp-zig", + "ancp-ruby", + "ancp-php", + "ancp-dart", + "ancp-scalac", "ancp-julia", + "ancp-bash", + "ancp-pwsh", + "ancp-lua", + "ancp-perl", + "ancp-rscript", + "ancp-ghc", + "ancp-ocamlc", + "ancp-erlc", + "ancp-elixirc", + "ancp-clj-kondo", + "ancp-nix-instantiate", + "ancp-terraform", + "ancp-hadolint", + "ancp-sqlfluff", ], }, } diff --git a/src/ancp/native.py b/src/ancp/native.py index d7e93d0..8e4d238 100644 --- a/src/ancp/native.py +++ b/src/ancp/native.py @@ -29,7 +29,7 @@ def canonical_for_native(native_code: str | None, message: str, default: str = " return "ancp.diag.type.mismatch", "type", [doc.repair_hint("ancp.repair.type.convert_value", "Convert value or adjust type annotation", 0.45)] if any(token in text for token in ["unused", "never used"]): return "ancp.diag.symbol.unused", "symbol", [doc.repair_hint("ancp.repair.lint.apply_fix", "Remove or use the unused symbol", 0.7)] - if any(token in text for token in ["syntax", "parse", "expected", "unexpected token"]): + if any(token in text for token in ["syntax", "parse", "expected", "unexpected token", "missing ')'", "missing ']'", "missing '}'", "missing end"]): return "ancp.diag.syntax.invalid", "syntax", [doc.repair_hint("ancp.repair.syntax.insert_token", "Fix invalid syntax", 0.45)] if any(token in text for token in ["test", "assert", "failed"]): return "ancp.diag.test.assertion_failed", "test", [doc.repair_hint("ancp.repair.test.fix_subject", "Fix the code under test or the expectation", 0.35)] @@ -262,4 +262,3 @@ def parse_go_test_json(text: str, root: pathlib.Path) -> list[dict[str, Any]]: ) ) return diagnostics - diff --git a/src/ancp/proxy.py b/src/ancp/proxy.py index d576f93..2751e2b 100644 --- a/src/ancp/proxy.py +++ b/src/ancp/proxy.py @@ -41,6 +41,24 @@ "ancp-scala-cli": ("scala", "scala-cli"), "ancp-scalac": ("scala", "scalac"), "ancp-julia": ("julia", "julia"), + "ancp-shellcheck": ("shell", "shellcheck"), + "ancp-bash": ("shell", "bash"), + "ancp-pwsh": ("powershell", "pwsh"), + "ancp-powershell": ("powershell", "powershell"), + "ancp-luac": ("lua", "luac"), + "ancp-lua": ("lua", "lua"), + "ancp-perl": ("perl", "perl"), + "ancp-Rscript": ("r", "Rscript"), + "ancp-rscript": ("r", "Rscript"), + "ancp-ghc": ("haskell", "ghc"), + "ancp-ocamlc": ("ocaml", "ocamlc"), + "ancp-erlc": ("erlang", "erlc"), + "ancp-elixirc": ("elixir", "elixirc"), + "ancp-clj-kondo": ("clojure", "clj-kondo"), + "ancp-nix-instantiate": ("nix", "nix-instantiate"), + "ancp-terraform": ("terraform", "terraform"), + "ancp-hadolint": ("dockerfile", "hadolint"), + "ancp-sqlfluff": ("sql", "sqlfluff"), } @@ -54,7 +72,10 @@ def proxy_document( if adapter is None: raise SystemExit(f"Unknown adapter: {adapter_key}") result = run_command(native_command, root, timeout=timeout) - tool = ToolSpec(native_command[0], "compiler", native_command) + executable_name = pathlib.Path(native_command[0]).name + if executable_name.lower().endswith((".exe", ".cmd", ".bat")): + executable_name = pathlib.Path(executable_name).stem + tool = ToolSpec(executable_name, "compiler", native_command) diagnostics = adapter.parse_result(root, result, tool) document = doc.envelope("result.check", f"ancp-{adapter.key}-compiler-proxy") document.update( @@ -181,4 +202,20 @@ def _main() -> int: scala_cli_main = make_shim("ancp-scala-cli") scalac_main = make_shim("ancp-scalac") julia_main = make_shim("ancp-julia") - +shellcheck_main = make_shim("ancp-shellcheck") +bash_main = make_shim("ancp-bash") +pwsh_main = make_shim("ancp-pwsh") +powershell_main = make_shim("ancp-powershell") +luac_main = make_shim("ancp-luac") +lua_main = make_shim("ancp-lua") +perl_main = make_shim("ancp-perl") +rscript_main = make_shim("ancp-rscript") +ghc_main = make_shim("ancp-ghc") +ocamlc_main = make_shim("ancp-ocamlc") +erlc_main = make_shim("ancp-erlc") +elixirc_main = make_shim("ancp-elixirc") +clj_kondo_main = make_shim("ancp-clj-kondo") +nix_instantiate_main = make_shim("ancp-nix-instantiate") +terraform_main = make_shim("ancp-terraform") +hadolint_main = make_shim("ancp-hadolint") +sqlfluff_main = make_shim("ancp-sqlfluff") diff --git a/src/ancp/shim.py b/src/ancp/shim.py index eafc399..356b92c 100644 --- a/src/ancp/shim.py +++ b/src/ancp/shim.py @@ -44,6 +44,23 @@ "scala-cli": "scala", "scalac": "scala", "julia": "julia", + "shellcheck": "shell", + "bash": "shell", + "pwsh": "powershell", + "powershell": "powershell", + "luac": "lua", + "lua": "lua", + "perl": "perl", + "rscript": "r", + "ghc": "haskell", + "ocamlc": "ocaml", + "erlc": "erlang", + "elixirc": "elixir", + "clj-kondo": "clojure", + "nix-instantiate": "nix", + "terraform": "terraform", + "hadolint": "dockerfile", + "sqlfluff": "sql", } diff --git a/tests/test_cli_documents.py b/tests/test_cli_documents.py index 6cdba41..47b6ce2 100644 --- a/tests/test_cli_documents.py +++ b/tests/test_cli_documents.py @@ -3,6 +3,7 @@ from pathlib import Path from ancp.cli import aggregate_status, capabilities_document, graph_document, resolve_workspace, skills_document, verify_document +from ancp.adapters import get_adapter from ancp.schema import validate_document @@ -45,3 +46,21 @@ def test_verify_document_does_not_pass_missing_tool(tmp_path: Path) -> None: document = verify_document(tmp_path, "scala", timeout=1) if document["data"]["checkDocuments"][0]["status"] == "tool_failed": assert document["status"] == "tool_failed" + + +def test_internal_json_toml_yaml_adapters_validate(tmp_path: Path) -> None: + cases = [ + ("json", "broken.json", '{"items": [1, 2,]}'), + ("toml", "broken.toml", "[project\nname = 'broken'\n"), + ("yaml", "broken.yaml", "items:\n - one\n nested: bad\n"), + ] + for language, filename, content in cases: + root = tmp_path / language + root.mkdir() + (root / filename).write_text(content, encoding="utf-8") + adapter = get_adapter(language) + assert adapter is not None + document = adapter.check(root) + assert document["status"] == "failed" + assert document["diagnostics"] + assert validate_document(document) == [] diff --git a/tests/test_schema_examples.py b/tests/test_schema_examples.py index f4d7c7f..1731059 100644 --- a/tests/test_schema_examples.py +++ b/tests/test_schema_examples.py @@ -10,7 +10,7 @@ def test_all_examples_validate() -> None: - paths = sorted((ROOT / "examples").rglob("*.json")) + paths = sorted(path for path in (ROOT / "examples").rglob("*.json") if "buggy" not in path.parts) assert paths for path in paths: document = json.loads(path.read_text(encoding="utf-8")) @@ -23,4 +23,3 @@ def test_manifest_document_validates() -> None: document = manifest_document() assert document["documentKind"] == "manifest.adapter" assert validate_document(document) == [] - diff --git a/tools/audit_contracts.py b/tools/audit_contracts.py index fec35ef..f9ee459 100644 --- a/tools/audit_contracts.py +++ b/tools/audit_contracts.py @@ -57,6 +57,23 @@ "dart", "scala", "julia", + "json", + "toml", + "yaml", + "shell", + "powershell", + "lua", + "perl", + "r", + "haskell", + "ocaml", + "erlang", + "elixir", + "clojure", + "nix", + "terraform", + "dockerfile", + "sql", ] REQUIRED_DOCS = [ @@ -83,6 +100,8 @@ def pass_check(message: str) -> None: def example_doc_kinds() -> set[str]: kinds: set[str] = set() for path in (ROOT / "examples").rglob("*.json"): + if "buggy" in path.parts: + continue doc = json.loads(path.read_text(encoding="utf-8")) kinds.add(doc["documentKind"]) return kinds diff --git a/tools/check_toolchains.py b/tools/check_toolchains.py new file mode 100644 index 0000000..155a148 --- /dev/null +++ b/tools/check_toolchains.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 +"""Report native compiler/checker availability for ANCP adapters.""" + +from __future__ import annotations + +import argparse +import json +import shutil +from dataclasses import dataclass + + +@dataclass(frozen=True) +class Toolchain: + language: str + required_any: tuple[str, ...] + notes: str + + +TOOLCHAINS = [ + Toolchain("python", ("python", "python3"), "Required for ANCP itself and Python syntax checks."), + Toolchain("typescript", ("tsc",), "TypeScript compiler checks."), + Toolchain("javascript", ("eslint", "node"), "ESLint preferred; Node is runtime-only fallback context."), + Toolchain("rust", ("cargo", "rustc"), "Cargo/rustc JSON diagnostics."), + Toolchain("go", ("go",), "Go build/test tooling."), + Toolchain("c", ("gcc", "clang"), "GCC or Clang C frontend."), + Toolchain("cpp", ("g++", "clang++"), "GCC or Clang C++ frontend."), + Toolchain("java", ("javac",), "JDK compiler."), + Toolchain("kotlin", ("kotlinc",), "Kotlin compiler."), + Toolchain("csharp", ("dotnet",), ".NET SDK."), + Toolchain("swift", ("swift",), "Swift compiler/test tooling."), + Toolchain("zig", ("zig",), "Zig compiler/build tooling."), + Toolchain("ruby", ("ruby",), "ruby -c syntax checks."), + Toolchain("php", ("php",), "php -l syntax checks."), + Toolchain("dart", ("dart",), "Dart analyzer/compiler."), + Toolchain("scala", ("scala-cli", "scalac"), "scala-cli preferred, scalac accepted."), + Toolchain("julia", ("julia",), "Julia parser/runtime checks."), + Toolchain("shell", ("shellcheck", "bash"), "ShellCheck JSON preferred, bash -n fallback."), + Toolchain("powershell", ("pwsh", "powershell"), "PowerShell Parser API."), + Toolchain("lua", ("luac", "lua"), "luac preferred, lua loadfile fallback."), + Toolchain("perl", ("perl",), "perl -c syntax checks."), + Toolchain("r", ("Rscript",), "R parser checks."), + Toolchain("haskell", ("ghc",), "GHC -fno-code checks."), + Toolchain("ocaml", ("ocamlc",), "ocamlc syntax/type checks."), + Toolchain("erlang", ("erlc",), "erlc module checks."), + Toolchain("elixir", ("elixirc",), "elixirc compiler checks."), + Toolchain("clojure", ("clj-kondo",), "clj-kondo JSON diagnostics."), + Toolchain("nix", ("nix-instantiate",), "nix-instantiate --parse checks."), + Toolchain("terraform", ("terraform",), "terraform validate -json checks."), + Toolchain("dockerfile", ("hadolint",), "Hadolint JSON diagnostics."), + Toolchain("sql", ("sqlfluff",), "SQLFluff JSON diagnostics."), +] + + +def availability(languages: set[str] | None = None) -> list[dict[str, object]]: + rows: list[dict[str, object]] = [] + for toolchain in TOOLCHAINS: + if languages and toolchain.language not in languages: + continue + found = {name: shutil.which(name) for name in toolchain.required_any} + rows.append( + { + "language": toolchain.language, + "available": any(found.values()), + "tools": found, + "notes": toolchain.notes, + } + ) + return rows + + +def print_table(rows: list[dict[str, object]]) -> None: + width = max(len(str(row["language"])) for row in rows) + for row in rows: + tools = row["tools"] + assert isinstance(tools, dict) + present = [name for name, path in tools.items() if path] + missing = [name for name, path in tools.items() if not path] + status = "ok" if row["available"] else "missing" + detail = ", ".join(present) if present else "missing: " + ", ".join(missing) + print(f"{str(row['language']).ljust(width)} {status.ljust(7)} {detail}") + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--json", action="store_true", help="Emit machine-readable JSON.") + parser.add_argument( + "--strict", + action="store_true", + help="Exit non-zero when any selected native toolchain is missing.", + ) + parser.add_argument( + "--language", + action="append", + choices=[toolchain.language for toolchain in TOOLCHAINS], + help="Restrict the report to one language. May be repeated.", + ) + args = parser.parse_args(argv) + + rows = availability(set(args.language) if args.language else None) + if args.json: + print(json.dumps({"toolchains": rows}, indent=2, sort_keys=True)) + else: + print_table(rows) + + if args.strict and not all(row["available"] for row in rows): + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/run_bug_corpus.py b/tools/run_bug_corpus.py index 744eef1..1a9c19e 100644 --- a/tools/run_bug_corpus.py +++ b/tools/run_bug_corpus.py @@ -32,6 +32,23 @@ ("php", ROOT / "examples/buggy/php", ["ancp", "check", "--workspace", ".", "--language", "php"]), ("dart", ROOT / "examples/buggy/dart", ["ancp", "check", "--workspace", ".", "--language", "dart"]), ("scala", ROOT / "examples/buggy/scala", ["ancp", "check", "--workspace", ".", "--language", "scala"]), + ("json", ROOT / "examples/buggy/json", ["ancp", "check", "--workspace", ".", "--language", "json"]), + ("toml", ROOT / "examples/buggy/toml", ["ancp", "check", "--workspace", ".", "--language", "toml"]), + ("yaml", ROOT / "examples/buggy/yaml", ["ancp", "check", "--workspace", ".", "--language", "yaml"]), + ("shell", ROOT / "examples/buggy/shell", ["ancp", "check", "--workspace", ".", "--language", "shell"]), + ("powershell", ROOT / "examples/buggy/powershell", ["ancp", "check", "--workspace", ".", "--language", "powershell"]), + ("lua", ROOT / "examples/buggy/lua", ["ancp", "check", "--workspace", ".", "--language", "lua"]), + ("perl", ROOT / "examples/buggy/perl", ["ancp", "check", "--workspace", ".", "--language", "perl"]), + ("r", ROOT / "examples/buggy/r", ["ancp", "check", "--workspace", ".", "--language", "r"]), + ("haskell", ROOT / "examples/buggy/haskell", ["ancp", "check", "--workspace", ".", "--language", "haskell"]), + ("ocaml", ROOT / "examples/buggy/ocaml", ["ancp", "check", "--workspace", ".", "--language", "ocaml"]), + ("erlang", ROOT / "examples/buggy/erlang", ["ancp", "check", "--workspace", ".", "--language", "erlang"]), + ("elixir", ROOT / "examples/buggy/elixir", ["ancp", "check", "--workspace", ".", "--language", "elixir"]), + ("clojure", ROOT / "examples/buggy/clojure", ["ancp", "check", "--workspace", ".", "--language", "clojure"]), + ("nix", ROOT / "examples/buggy/nix", ["ancp", "check", "--workspace", ".", "--language", "nix"]), + ("terraform", ROOT / "examples/buggy/terraform", ["ancp", "check", "--workspace", ".", "--language", "terraform"]), + ("dockerfile", ROOT / "examples/buggy/dockerfile", ["ancp", "check", "--workspace", ".", "--language", "dockerfile"]), + ("sql", ROOT / "examples/buggy/sql", ["ancp", "check", "--workspace", ".", "--language", "sql"]), ] diff --git a/tools/verify_repo.py b/tools/verify_repo.py index 2270ba8..60d1c82 100644 --- a/tools/verify_repo.py +++ b/tools/verify_repo.py @@ -29,7 +29,10 @@ def rel(path: pathlib.Path) -> str: def should_skip(path: pathlib.Path) -> bool: ignored = {".git", ".ancp", "__pycache__", ".pytest_cache", ".mypy_cache", "dist", "build"} - return any(part in ignored or part.endswith(".egg-info") for part in path.parts) + if any(part in ignored or part.endswith(".egg-info") for part in path.parts): + return True + parts = set(path.parts) + return "examples" in parts and "buggy" in parts def load_json(path: pathlib.Path) -> Any: @@ -63,6 +66,8 @@ def check_schema_examples() -> Check: failures: list[str] = [] count = 0 for path in (ROOT / "examples").rglob("*.json"): + if "buggy" in path.parts: + continue count += 1 doc = load_json(path) errors = sorted(validator.iter_errors(doc), key=lambda err: list(err.path)) From 02001d265c6c684ed5c11006a1c0f0a26fae8071 Mon Sep 17 00:00:00 2001 From: Rudra Date: Tue, 26 May 2026 14:08:17 +0530 Subject: [PATCH 2/2] Fix TOML adapter on Python 3.10 --- pyproject.toml | 3 ++- requirements-dev.txt | 1 + src/ancp/adapters/base.py | 5 ++++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0b82979..9e128d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,8 @@ classifiers = [ ] dependencies = [ "jsonschema>=4.22.0", - "PyYAML>=6.0.0" + "PyYAML>=6.0.0", + "tomli>=2.0.0; python_version < '3.11'" ] [project.optional-dependencies] diff --git a/requirements-dev.txt b/requirements-dev.txt index 13dea66..d898574 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,6 @@ jsonschema>=4.22.0 PyYAML>=6.0.0 +tomli>=2.0.0; python_version < "3.11" pytest>=8.0.0 build>=1.2.0 twine>=5.0.0 diff --git a/src/ancp/adapters/base.py b/src/ancp/adapters/base.py index a0890df..39633f2 100644 --- a/src/ancp/adapters/base.py +++ b/src/ancp/adapters/base.py @@ -714,7 +714,10 @@ class TomlAdapter(InternalSyntaxAdapter): tool_name = "python-tomllib" def parse_internal(self, root: pathlib.Path) -> list[dict[str, Any]]: - import tomllib + try: + import tomllib + except ModuleNotFoundError: # Python 3.10 compatibility. + import tomli as tomllib diagnostics: list[dict[str, Any]] = [] for path in list_files(root, self.file_extensions, limit=200):