From 72e2bdbe2705c24c02187d3a96b5ef148a65909a Mon Sep 17 00:00:00 2001
From: baranylcn <bbrnylcn@gmail.com>
Date: Fri, 3 Apr 2026 02:46:40 +0300
Subject: [PATCH] feat: add CLI interface with argparse

Closes #2
---
 chav/cli.py       | 66 ++++++++++++++++++++++++++++++++++
 pyproject.toml    |  3 ++
 tests/test_cli.py | 90 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 159 insertions(+)
 create mode 100644 chav/cli.py
 create mode 100644 tests/test_cli.py

diff --git a/chav/cli.py b/chav/cli.py
new file mode 100644
index 0000000..07a15b0
--- /dev/null
+++ b/chav/cli.py
@@ -0,0 +1,66 @@
+from __future__ import annotations
+
+import argparse
+import sys
+
+from chav.engine import analyze
+
+
+def main(args: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="chav",
+        description="Chav — diagnostic engine for tabular datasets",
+    )
+    sub = parser.add_subparsers(dest="command")
+
+    analyze_cmd = sub.add_parser("analyze", help="Run diagnostics on a CSV file")
+    analyze_cmd.add_argument("data", help="Path to the input CSV file")
+    analyze_cmd.add_argument("--reference", metavar="FILE", help="Path to a reference CSV file")
+    analyze_cmd.add_argument("--target", metavar="COL", help="Target column name")
+    analyze_cmd.add_argument("--time-column", metavar="COL", help="Datetime column name")
+    analyze_cmd.add_argument(
+        "--format",
+        choices=["summary", "json", "csv"],
+        default="summary",
+        help="Output format (default: summary)",
+    )
+    analyze_cmd.add_argument("--all", action="store_true", help="Include passing diagnostics in output")
+    analyze_cmd.add_argument("--output-file", metavar="FILE", help="Write output to file instead of stdout")
+
+    parsed = parser.parse_args(args)
+
+    if parsed.command is None:
+        parser.print_help()
+        return 1
+
+    try:
+        report = analyze(
+            data=parsed.data,
+            reference_data=parsed.reference,
+            target=parsed.target,
+            time_column=parsed.time_column,
+        )
+    except Exception as exc:
+        print(f"Error: {exc}", file=sys.stderr)
+        return 1
+
+    include_all: bool = parsed.all
+
+    if parsed.format == "json":
+        output = report.to_json(all=include_all)
+    elif parsed.format == "csv":
+        output = report.to_csv(all=include_all) or ""
+    else:
+        output = report.summary()
+
+    if parsed.output_file:
+        with open(parsed.output_file, "w", encoding="utf-8") as f:
+            f.write(output)
+    else:
+        print(output)
+
+    return 0
+
+
+def cli_entry() -> None:
+    sys.exit(main())
diff --git a/pyproject.toml b/pyproject.toml
index 7722694..6eb11b2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,6 +32,9 @@ dependencies = [
     "scikit-learn>=1.1",
 ]
 
+[project.scripts]
+chav = "chav.cli:cli_entry"
+
 [project.urls]
 Homepage = "https://github.com/baranylcn/chav"
 Repository = "https://github.com/baranylcn/chav"
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 0000000..3bf1a6b
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,90 @@
+from __future__ import annotations
+
+import csv
+import io
+import json
+
+import pandas as pd
+import pytest
+
+from chav.cli import main
+
+
+@pytest.fixture
+def csv_file(tmp_path):
+    df = pd.DataFrame(
+        {
+            "age": [25, 30, 35, 40],
+            "income": [50000, 60000, 70000, 80000],
+            "city": ["Istanbul", "Ankara", "Izmir", "Bursa"],
+            "label": [0, 1, 0, 1],
+        }
+    )
+    path = tmp_path / "data.csv"
+    df.to_csv(path, index=False)
+    return str(path)
+
+
+@pytest.fixture
+def reference_csv_file(tmp_path):
+    df = pd.DataFrame(
+        {
+            "age": [20, 25, 30],
+            "income": [40000, 50000, 60000],
+            "city": ["Istanbul", "Ankara", "Izmir"],
+            "label": [0, 1, 0],
+        }
+    )
+    path = tmp_path / "reference.csv"
+    df.to_csv(path, index=False)
+    return str(path)
+
+
+class TestCLIBasic:
+    def test_no_command_returns_1(self):
+        assert main([]) == 1
+
+    def test_analyze_returns_0(self, csv_file):
+        assert main(["analyze", csv_file]) == 0
+
+    def test_analyze_with_target(self, csv_file):
+        assert main(["analyze", csv_file, "--target", "label"]) == 0
+
+    def test_analyze_with_reference(self, csv_file, reference_csv_file):
+        assert main(["analyze", csv_file, "--reference", reference_csv_file]) == 0
+
+    def test_analyze_with_all_options(self, csv_file, reference_csv_file):
+        assert main(["analyze", csv_file, "--reference", reference_csv_file, "--target", "label", "--all"]) == 0
+
+    def test_invalid_file_returns_1(self):
+        assert main(["analyze", "nonexistent.csv"]) == 1
+
+
+class TestCLIFormats:
+    def test_format_json(self, csv_file, capsys):
+        assert main(["analyze", csv_file, "--format", "json", "--all"]) == 0
+        out = capsys.readouterr().out
+        parsed = json.loads(out)
+        assert "diagnostics" in parsed
+
+    def test_format_csv(self, csv_file, capsys):
+        assert main(["analyze", csv_file, "--format", "csv", "--all"]) == 0
+        out = capsys.readouterr().out
+        reader = csv.DictReader(io.StringIO(out))
+        rows = list(reader)
+        assert len(rows) > 0
+        assert "rule" in rows[0]
+
+    def test_format_summary(self, csv_file, capsys):
+        assert main(["analyze", csv_file, "--format", "summary"]) == 0
+        out = capsys.readouterr().out
+        assert "Chav Report" in out
+
+
+class TestCLIOutputFile:
+    def test_output_file_written(self, csv_file, tmp_path):
+        out_path = str(tmp_path / "out.json")
+        assert main(["analyze", csv_file, "--format", "json", "--output-file", out_path]) == 0
+        with open(out_path, encoding="utf-8") as f:
+            parsed = json.loads(f.read())
+        assert "diagnostics" in parsed