diff --git a/README.md b/README.md index ae2da86..c9635ba 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,16 @@ ```bash -pip install cognis-deepcheck +pip install "git+https://github.com/cognis-digital/deepcheck.git" deepcheck scan . # → prioritized findings in seconds ``` + +## What is this? + +Deepcheck is a command-line tool that examines images (JPEG and PNG) to determine whether they were taken by a real camera or generated by AI software like Midjourney, Stable Diffusion, or DALL-E. It reads the hidden technical data embedded in image files — such as camera make, compression patterns, and content-authenticity certificates (C2PA) — and gives you a plain verdict: likely authentic, suspicious, or likely synthetic. It also shows exactly which signals drove that conclusion, so you can quickly judge whether an image is trustworthy. It is useful for journalists, researchers, content moderators, and anyone who needs to verify whether a photo is genuine before publishing or acting on it. + + ## Contents - [Why deepcheck?](#why) · [Features](#features) · [Quick start](#quick-start) · [Example](#example) · [Architecture](#architecture) · [AI stack](#ai-stack) · [How it compares](#how-it-compares) · [Integrations](#integrations) · [Install anywhere](#install-anywhere) · [Related](#related) · [Contributing](#contributing) @@ -46,10 +52,56 @@ Lightweight synthetic-media detector with C2PA validation — without standing u
↑ back to top
+ +## Domains + +**Primary domain:** Intelligence & OSINT · **JTF MERIDIAN division:** NULLBYTE · BLACK CELL + +**Topics:** `cognis` `osint` `intelligence` `recon` + +Part of the **Cognis Neural Suite** — 300+ source-available tools organized across 12 domains under the JTF MERIDIAN command structure. See the [suite on GitHub](https://github.com/cognis-digital) and [jtf-meridian](https://github.com/cognis-digital/jtf-meridian) for how the pieces fit together. + + + +## Install + +`deepcheck` is source-available (not published to PyPI) — every method below installs +straight from GitHub. Pick whichever you prefer; the one-line scripts auto-detect +the best tool available on your machine. + +**One-liner (Linux / macOS):** +```sh +curl -fsSL https://raw.githubusercontent.com/cognis-digital/deepcheck/HEAD/install.sh | sh +``` + +**One-liner (Windows PowerShell):** +```powershell +irm https://raw.githubusercontent.com/cognis-digital/deepcheck/HEAD/install.ps1 | iex +``` + +**Or install manually — any one of:** +```sh +pipx install "git+https://github.com/cognis-digital/deepcheck.git" # isolated (recommended) +uv tool install "git+https://github.com/cognis-digital/deepcheck.git" # uv +pip install "git+https://github.com/cognis-digital/deepcheck.git" # pip +``` + +**From source:** +```sh +git clone https://github.com/cognis-digital/deepcheck.git +cd deepcheck && pip install . +``` + +Then run: +```sh +deepcheck --help +``` + + ## Quick start ```bash -pip install cognis-deepcheck +pip install "git+https://github.com/cognis-digital/deepcheck.git" deepcheck --version deepcheck scan . # scan current project deepcheck scan . --format json # machine-readable diff --git a/deepcheck/cli.py b/deepcheck/cli.py index 3c667bd..221006f 100644 --- a/deepcheck/cli.py +++ b/deepcheck/cli.py @@ -14,6 +14,7 @@ import argparse import json import os +import struct import sys from . import TOOL_NAME, TOOL_VERSION @@ -89,9 +90,12 @@ def main(argv=None) -> int: try: result = analyze_image(args.image) - except (OSError, struct_error_t()) as exc: # type: ignore[misc] + except (OSError, struct.error, ValueError) as exc: print(f"{TOOL_NAME}: error: {exc}", file=sys.stderr) return 2 + except Exception as exc: # noqa: BLE001 + print(f"{TOOL_NAME}: unexpected error: {type(exc).__name__}: {exc}", file=sys.stderr) + return 2 if args.format == "json": print(json.dumps(result.to_dict(), indent=2)) @@ -101,10 +105,5 @@ def main(argv=None) -> int: return 1 if _is_finding(result) else 0 -def struct_error_t(): - import struct - return struct.error - - if __name__ == "__main__": raise SystemExit(main()) diff --git a/deepcheck/core.py b/deepcheck/core.py index 80f8403..089c248 100644 --- a/deepcheck/core.py +++ b/deepcheck/core.py @@ -283,19 +283,19 @@ def validate_c2pa(blob: bytes) -> C2PAResult: types = [b["type"] for b in boxes] # A valid C2PA store carries a manifest superbox and a claim. - has_store = any(l and l.startswith("c2pa") for l in labels) or b"c2pa" in blob[:64].lower() - has_claim = any(l and "claim" in l for l in labels) - has_assertions = any(l and "assertions" in l for l in labels) + has_store = any(lbl and lbl.startswith("c2pa") for lbl in labels) or b"c2pa" in blob[:64].lower() + has_claim = any(lbl and "claim" in lbl for lbl in labels) + has_assertions = any(lbl and "assertions" in lbl for lbl in labels) # Assertions are labelled child boxes under the assertion store. res.assertions = sorted( - {l for l in labels if l and ("." in l or l.startswith("c2pa.") or l.startswith("cai."))} + {lbl for lbl in labels if lbl and ("." in lbl or lbl.startswith("c2pa.") or lbl.startswith("cai."))} ) # Hard binding: a data-hash / box-hash assertion must exist for the manifest # to actually bind to the asset bytes. res.has_hard_binding = any( - l and ("hash.data" in l or "hash.boxes" in l or l.endswith(".hash")) for l in labels + lbl and ("hash.data" in lbl or "hash.boxes" in lbl or lbl.endswith(".hash")) for lbl in labels ) # Claim generator string, if present in a CBOR-ish text blob. @@ -347,7 +347,7 @@ def _dqt_signals(dqt_tables: list[bytes]) -> list[Signal]: else: # 16-bit entries for k in range(count): - if p + 1 < len(tbl): + if p + 2 <= len(tbl): values.append(struct.unpack(">H", tbl[p : p + 2])[0]) p += 2 if not values: @@ -412,27 +412,45 @@ def _score_to_verdict(score: float, c2pa: C2PAResult) -> Verdict: def analyze_image(path: str) -> AnalysisResult: + if not path: + raise ValueError("path must be a non-empty string") with open(path, "rb") as fh: data = fh.read() + if not data: + return AnalysisResult( + path=path, + format="unknown", + width=None, + height=None, + verdict=Verdict.UNKNOWN.value, + synthetic_score=0.0, + signals=[{"name": "empty_file", "weight": 0.0, "detail": "file contains no data"}], + metadata={"metadata_bytes": 0, "has_ai_tag": False, "has_camera_hint": False}, + c2pa=C2PAResult(note="no data to analyse"), + ) fmt = _sniff_format(data) width = height = None signals: list[Signal] = [] raw_meta = b"" - if fmt == "jpeg": - parsed = _parse_jpeg(data) - width, height = parsed["width"], parsed["height"] - raw_meta = b"".join(p for _, p in parsed["app_segments"]) - signals += _dqt_signals(parsed["dqt_tables"]) - jumbf = parsed["jumbf"] - elif fmt == "png": - parsed = _parse_png(data) - width, height = parsed["width"], parsed["height"] - raw_meta = b"".join(parsed["text_chunks"]) - jumbf = parsed["jumbf"] - else: + try: + if fmt == "jpeg": + parsed = _parse_jpeg(data) + width, height = parsed["width"], parsed["height"] + raw_meta = b"".join(p for _, p in parsed["app_segments"]) + signals += _dqt_signals(parsed["dqt_tables"]) + jumbf = parsed["jumbf"] + elif fmt == "png": + parsed = _parse_png(data) + width, height = parsed["width"], parsed["height"] + raw_meta = b"".join(parsed["text_chunks"]) + jumbf = parsed["jumbf"] + else: + jumbf = b"" + signals.append(Signal("unknown_format", 0.0, "unrecognized container; limited analysis")) + except (struct.error, ValueError) as exc: + signals.append(Signal("parse_error", 0.0, f"format parser raised {type(exc).__name__}: {exc}")) jumbf = b"" - signals.append(Signal("unknown_format", 0.0, "unrecognized container; limited analysis")) meta = {"_raw_metadata": raw_meta} signals += _metadata_signals(meta) diff --git a/demos/01-basic/__pycache__/make_input.cpython-314.pyc b/demos/01-basic/__pycache__/make_input.cpython-314.pyc new file mode 100644 index 0000000..a5c440f Binary files /dev/null and b/demos/01-basic/__pycache__/make_input.cpython-314.pyc differ diff --git a/install.ps1 b/install.ps1 new file mode 100644 index 0000000..5df4615 --- /dev/null +++ b/install.ps1 @@ -0,0 +1,29 @@ +# Comprehensive installer for cognis-digital/deepcheck (Windows PowerShell). +# Tries: pipx -> uv -> pip (git+https) -> from source. +# deepcheck is source-available and not on PyPI; all paths install from GitHub. +$ErrorActionPreference = "Stop" +$Repo = "deepcheck" +$Url = "git+https://github.com/cognis-digital/deepcheck.git" +$Git = "https://github.com/cognis-digital/deepcheck.git" +function Say($m) { Write-Host "[$Repo] $m" -ForegroundColor Magenta } +function Have($c) { [bool](Get-Command $c -ErrorAction SilentlyContinue) } + +if (-not (Have python) -and -not (Have py)) { + Say "Python 3.9+ is required but was not found. Install Python first."; exit 1 +} +if (Have pipx) { + Say "Installing with pipx (isolated, recommended)..." + pipx install $Url; if ($LASTEXITCODE -eq 0) { Say "Done. Run: deepcheck"; exit 0 } +} +if (Have uv) { + Say "Installing with uv..." + uv tool install $Url; if ($LASTEXITCODE -eq 0) { Say "Done. Run: deepcheck"; exit 0 } +} +if (Have pip) { + Say "Installing with pip (user site)..." + pip install --user $Url; if ($LASTEXITCODE -eq 0) { Say "Done. Run: deepcheck"; exit 0 } +} +Say "No packaging tool worked; falling back to a source clone." +$Tmp = Join-Path $env:TEMP "$Repo-src" +git clone --depth 1 $Git $Tmp +Say "Cloned to $Tmp - run: cd $Tmp; python -m pip install ." diff --git a/install.sh b/install.sh index 9b16e91..ba80bd2 100644 --- a/install.sh +++ b/install.sh @@ -1,10 +1,34 @@ -#!/usr/bin/env sh -# Universal installer for deepcheck. Prefers uv > pipx > pip; installs from the repo. -set -e -SRC="git+https://github.com/cognis-digital/deepcheck.git" -echo "Installing deepcheck ..." -if command -v uv >/dev/null 2>&1; then uv tool install "$SRC" -elif command -v pipx >/dev/null 2>&1; then pipx install "$SRC" -elif command -v python3 >/dev/null 2>&1; then python3 -m pip install --user "$SRC" -else echo "Need uv, pipx, or python3+pip"; exit 1; fi -echo "Done. Run: deepcheck --help" +#!/usr/bin/env sh +# Comprehensive installer for cognis-digital/deepcheck (Linux / macOS). +# Tries the best available method: pipx -> uv -> pip (git+https) -> from source. +# deepcheck is source-available and not on PyPI; all paths install from GitHub. +set -eu + +REPO="deepcheck" +URL="git+https://github.com/cognis-digital/deepcheck.git" +GITURL="https://github.com/cognis-digital/deepcheck.git" + +say() { printf '\033[1;35m[%s]\033[0m %s\n' "$REPO" "$1"; } +have() { command -v "$1" >/dev/null 2>&1; } + +if ! have python3 && ! have python; then + say "Python 3.9+ is required but was not found. Install Python first."; exit 1 +fi + +if have pipx; then + say "Installing with pipx (isolated, recommended)..." + pipx install "$URL" && { say "Done. Run: deepcheck"; exit 0; } +fi +if have uv; then + say "Installing with uv..." + uv tool install "$URL" && { say "Done. Run: deepcheck"; exit 0; } +fi +if have pip3 || have pip; then + PIP="$(command -v pip3 || command -v pip)" + say "Installing with pip (user site)..." + "$PIP" install --user "$URL" && { say "Done. Run: deepcheck"; exit 0; } +fi + +say "No packaging tool worked; falling back to a source clone." +TMP="$(mktemp -d)"; git clone --depth 1 "$GITURL" "$TMP/$REPO" +say "Cloned to $TMP/$REPO — run: cd $TMP/$REPO && python3 -m pip install ." diff --git a/integrations/webhook.py b/integrations/webhook.py index 91e0211..9bf7258 100644 --- a/integrations/webhook.py +++ b/integrations/webhook.py @@ -5,7 +5,7 @@ Usage: scan . --format json | python integrations/webhook.py --url URL """ from __future__ import annotations -import argparse, json, sys, urllib.request +import argparse, sys, urllib.request def main() -> int: ap = argparse.ArgumentParser() diff --git a/layman.md b/layman.md new file mode 100644 index 0000000..bff17ba --- /dev/null +++ b/layman.md @@ -0,0 +1 @@ +Deepcheck is a command-line tool that examines images (JPEG and PNG) to determine whether they were taken by a real camera or generated by AI software like Midjourney, Stable Diffusion, or DALL-E. It reads the hidden technical data embedded in image files — such as camera make, compression patterns, and content-authenticity certificates (C2PA) — and gives you a plain verdict: likely authentic, suspicious, or likely synthetic. It also shows exactly which signals drove that conclusion, so you can quickly judge whether an image is trustworthy. It is useful for journalists, researchers, content moderators, and anyone who needs to verify whether a photo is genuine before publishing or acting on it. diff --git a/tests/test_smoke.py b/tests/test_smoke.py index f81a623..70e0a67 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -2,6 +2,9 @@ These build fixtures in-memory so they don't depend on any committed binary. """ +import contextlib +import io +import json import os import struct import sys @@ -85,8 +88,9 @@ def test_ai_tag_flags_synthetic(self): os.remove(path) def test_clean_photo_authentic(self): - # camera EXIF hint + varied quant table => low score - path = _write(_jpeg(software=b"Apple iPhone 15", camera=True)) + # camera EXIF hint + varied low-mean quant table => low score + # use range(2, 66): 64 distinct values, mean ~33.5 (well below the >40 coarse threshold) + path = _write(_jpeg(software=b"Apple iPhone 15", camera=True, quant=list(range(2, 66)))) try: r = analyze_image(path) self.assertLess(r.synthetic_score, 0.25) @@ -157,7 +161,8 @@ def test_exit_finding(self): os.remove(path) def test_exit_authentic(self): - path = _write(_jpeg(software=b"Apple iPhone 15", camera=True)) + # use same low-mean quant table as test_clean_photo_authentic + path = _write(_jpeg(software=b"Apple iPhone 15", camera=True, quant=list(range(2, 66)))) try: self.assertEqual(main(["inspect", path]), 0) finally: @@ -169,6 +174,94 @@ def test_missing_file(self): def test_no_command_usage(self): self.assertEqual(main([]), 2) + def test_directory_as_image(self): + # Passing a directory path must return exit code 2, not crash. + import tempfile + d = tempfile.mkdtemp() + try: + self.assertEqual(main(["inspect", d]), 2) + finally: + os.rmdir(d) + + def test_json_output_is_valid_json(self): + path = _write(_jpeg(software=b"Apple iPhone 15", camera=True, quant=list(range(2, 66)))) + try: + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + code = main(["inspect", path, "--format", "json"]) + self.assertEqual(code, 0) + parsed = json.loads(buf.getvalue()) + self.assertIn("verdict", parsed) + self.assertIn("synthetic_score", parsed) + finally: + os.remove(path) + + +class TestEdgeCases(unittest.TestCase): + """Edge-case and robustness tests introduced by hardening.""" + + def test_empty_file_no_crash(self): + # A zero-byte file must return a clean result, not raise an exception. + fd, path = tempfile.mkstemp() + os.close(fd) + try: + from deepcheck.core import analyze_image + r = analyze_image(path) + self.assertEqual(r.format, "unknown") + self.assertEqual(r.verdict, Verdict.UNKNOWN.value) + self.assertEqual(r.synthetic_score, 0.0) + finally: + os.remove(path) + + def test_empty_file_cli_exit_code(self): + # CLI must not traceback on an empty file — exit 0 (unknown → not a finding). + fd, path = tempfile.mkstemp(suffix=".jpg") + os.close(fd) + try: + self.assertEqual(main(["inspect", path]), 0) + finally: + os.remove(path) + + def test_truncated_png_no_crash(self): + # A PNG with just the header and a malformed IHDR must not raise. + png_header = b"\x89PNG\r\n\x1a\n" + truncated_ihdr = struct.pack(">I", 13) + b"IHDR" + b"\x00\x00\x00\x10\x00\x00\x00\x10" + # Missing the last byte of IHDR + CRC -> truncated + data = png_header + truncated_ihdr + fd, path = tempfile.mkstemp(suffix=".png") + with os.fdopen(fd, "wb") as fh: + fh.write(data) + try: + from deepcheck.core import analyze_image + r = analyze_image(path) + self.assertEqual(r.format, "png") + finally: + os.remove(path) + + def test_validate_c2pa_large_corrupt_blob(self): + # A large blob of random-ish bytes must not raise, just report errors. + big_blob = bytes(range(256)) * 40 # 10 240 bytes, no valid JUMBF + res = validate_c2pa(big_blob) + self.assertTrue(res.present) + self.assertFalse(res.valid) + + def test_dqt_16bit_entries_no_crash(self): + # A DQT table with precision=1 (16-bit entries) must parse cleanly. + from deepcheck.core import _dqt_signals + # Build a valid 16-bit DQT: 1-byte header (pq=1,id=0) + 64×2-byte values + header = bytes([0x10]) + entries = struct.pack(">64H", *([42] * 64)) + sigs = _dqt_signals([header + entries]) + # All 64 entries are 42, so distinct=1 (<= 4 triggers flat_quant_table) + names = {s.name for s in sigs} + self.assertIn("flat_quant_table", names) + + def test_analyze_image_empty_string_path_raises(self): + # Passing an empty path must raise ValueError, not an obscure OSError. + from deepcheck.core import analyze_image + with self.assertRaises((ValueError, OSError)): + analyze_image("") + if __name__ == "__main__": unittest.main()