Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions cmd/inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ func newInspectCmd() *cobra.Command {
inspectors []string
maxLines int
verbose bool
detail string
similarity float64
maxClasses int
selectExpr string
)

c := &cobra.Command{
Expand All @@ -36,9 +34,12 @@ Inspectors describe; they never recommend. inspect writes no schema and mutates
nothing. Output is Markdown by default; --json emits the same evidence as JSON.`,
Args: exactArgs(1, "inspect <path-or-collection>"),
RunE: func(cmd *cobra.Command, args []string) error {
params, err := inspect.ParseParams(detail, similarity, maxClasses)
if err != nil {
return usageErr(err.Error())
params := inspect.Params{}
if selectExpr != "" {
if len(inspectors) != 1 || inspectors[0] != "file_content_shape" {
return usageErr("--select requires exactly one source inspector: --inspector file_content_shape")
}
params = params.WithSelection(inspect.ParseSelection(selectExpr))
}

evidence, err := runInspect(args[0], inspectors, params)
Expand Down Expand Up @@ -74,12 +75,8 @@ nothing. Output is Markdown by default; --json emits the same evidence as JSON.`
"Truncate each inspector's Markdown output to N lines (0 = no limit).")
c.Flags().BoolVarP(&verbose, "verbose", "v", false,
"Show full output; do not truncate (same as --max-lines 0).")
c.Flags().StringVar(&detail, "detail", "",
"Summarizer detail level: exact, grouped, or coarse (default grouped).")
c.Flags().Float64Var(&similarity, "similarity", -1,
"Summarizer similarity threshold (0–1). Mutually exclusive with --detail/--max-classes.")
c.Flags().IntVar(&maxClasses, "max-classes", 0,
"Cap the number of summarized classes. Mutually exclusive with --detail/--similarity.")
c.Flags().StringVar(&selectExpr, "select", "",
"Select files for file_content_shape: directory, glob, ext = \".csv\", or path under \"docs\".")
return c
}

Expand Down Expand Up @@ -116,6 +113,9 @@ func resolveCollection(arg string) (*project.Project, project.Collection, bool)
}

func runCollectionLayer(proj *project.Project, c project.Collection, names []string, params inspect.Params) ([]inspect.Evidence, error) {
if params.Selection.Mode != "" {
return nil, usageErr("--select requires a source path target")
}
selected, err := selectCollectionInspectors(names)
if err != nil {
return nil, err
Expand Down
99 changes: 78 additions & 21 deletions cmd/inspect_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,16 +113,75 @@ func TestInspect_outputFileMatchesStdout(t *testing.T) {

func TestInspect_inspectorFlagNarrows(t *testing.T) {
dir := inspectRepo(t)
stdout, _, err := runRoot(t, "inspect", "--json", "--inspector", "document_shape", dir)
stdout, _, err := runRoot(t, "inspect", "--json", "--inspector", "file_tree", dir)
if err != nil {
t.Fatalf("inspect --inspector: %v", err)
}
var records []map[string]any
if err := json.Unmarshal([]byte(stdout), &records); err != nil {
t.Fatalf("bad json: %v", err)
}
if len(records) != 1 || records[0]["inspector"] != "document_shape" {
t.Errorf("expected only document_shape, got %v", records)
if len(records) != 1 || records[0]["inspector"] != "file_tree" {
t.Errorf("expected only file_tree, got %v", records)
}
}

func TestInspect_selectRunsFileContentShape(t *testing.T) {
dir := inspectRepo(t)
writeFile(t, dir, "data/books.csv", "title,rating\nDune,5\n")
stdout, _, err := runRoot(t, "inspect", "--json", "--inspector", "file_content_shape", "--select", `ext = ".csv"`, dir)
if err != nil {
t.Fatalf("inspect --select: %v", err)
}
var records []map[string]any
if err := json.Unmarshal([]byte(stdout), &records); err != nil {
t.Fatalf("bad json: %v", err)
}
if len(records) != 1 || records[0]["inspector"] != "file_content_shape" {
t.Fatalf("expected only file_content_shape, got %v", records)
}
ev := records[0]["evidence"].(map[string]any)
if got := ev["file_count"].(float64); got != 1 {
t.Errorf("file_count = %v, want 1 selected CSV file", got)
}
if got := ev["selector"].(string); got != `ext = ".csv"` {
t.Errorf("selector = %q", got)
}
}

func TestInspect_selectRejectsInvalidCombinations(t *testing.T) {
dir := inspectRepo(t)
tests := [][]string{
{"inspect", "--select", "books", dir},
{"inspect", "--inspector", "file_tree", "--select", "books", dir},
{"inspect", "--inspector", "file_content_shape", "--inspector", "file_tree", "--select", "books", dir},
}
for _, args := range tests {
_, _, err := runRoot(t, args...)
var coded interface{ Code() int }
if err == nil || !errors.As(err, &coded) || coded.Code() != 2 {
t.Errorf("%v: expected exit 2, got %v", args, err)
}
}
}

func TestInspect_selectRejectsCollectionTarget(t *testing.T) {
dir := t.TempDir()
writeFile(t, dir, ".katalyst/storage/local.yaml", `type: filesystem
root: .
collections:
notes:
path: notes
checks:
- kind: markdown_requires_h1
`)
writeFile(t, dir, "notes/dune.md", "---\ntitle: Dune\n---\n# Dune\n")
chdir(t, dir)

_, _, err := runRoot(t, "inspect", "--inspector", "file_content_shape", "--select", "notes", "notes")
var coded interface{ Code() int }
if err == nil || !errors.As(err, &coded) || coded.Code() != 2 {
t.Errorf("expected exit 2 for --select with collection target, got %v", err)
}
}

Expand Down Expand Up @@ -159,51 +218,49 @@ func TestInspect_unknownInspectorIsUsageError(t *testing.T) {
}
}

func TestInspect_collapseParamsMutuallyExclusive(t *testing.T) {
dir := inspectRepo(t)
_, _, err := runRoot(t, "inspect", "--detail", "coarse", "--max-classes", "2", dir)
var coded interface{ Code() int }
if err == nil || !errors.As(err, &coded) || coded.Code() != 2 {
t.Errorf("expected exit 2 for mutually-exclusive collapse flags, got: %v", err)
}
}

func TestInspect_outputIncludesDescriptions(t *testing.T) {
stdout, _, err := runRoot(t, "inspect", inspectRepo(t))
if err != nil {
t.Fatalf("inspect: %v", err)
}
if !strings.Contains(stdout, "Cluster files into candidate collections") {
if !strings.Contains(stdout, "Profile selected files by text") {
t.Errorf("output missing inspector description\n%s", stdout)
}
}

func TestInspect_truncatesLongOutputAndVerboseShowsAll(t *testing.T) {
dir := t.TempDir()
// Ten files with disjoint frontmatter keys + sections → ten singleton
// document_shape classes, enough lines to exceed a small --max-lines.
writeFile(t, dir, ".katalyst/storage/local.yaml", `type: filesystem
root: .
collections:
notes:
path: notes
checks:
- kind: markdown_requires_h1
`)
for i := 0; i < 10; i++ {
writeFile(t, dir, fmt.Sprintf("docs/f%02d.md", i),
fmt.Sprintf("---\nk%02d: v\n---\n# H\n\n## S%02d\n", i, i))
writeFile(t, dir, fmt.Sprintf("notes/f%02d.md", i),
fmt.Sprintf("---\nk%02d: v\n---\n# H\n", i))
}
chdir(t, dir)

truncated, _, err := runRoot(t, "inspect", "--inspector", "document_shape", "--max-lines", "5", dir)
truncated, _, err := runRoot(t, "inspect", "--inspector", "object_fields", "--max-lines", "5", "notes")
if err != nil {
t.Fatalf("inspect --max-lines: %v", err)
}
if !strings.Contains(truncated, "truncated") {
t.Errorf("expected a truncation notice with --max-lines 5\n%s", truncated)
}

full, _, err := runRoot(t, "inspect", "--inspector", "document_shape", "-v", dir)
full, _, err := runRoot(t, "inspect", "--inspector", "object_fields", "-v", "notes")
if err != nil {
t.Fatalf("inspect -v: %v", err)
}
if strings.Contains(full, "truncated") {
t.Errorf("-v should not truncate\n%s", full)
}
if got := strings.Count(full, "label=docs/f"); got != 10 {
t.Errorf("-v rendered %d outliers, want 10\n%s", got, full)
if !strings.Contains(full, "k09") {
t.Errorf("-v should render all object field evidence\n%s", full)
}
}

Expand Down
12 changes: 6 additions & 6 deletions cmd/inspectors_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func TestInspectors_listsEveryInspectorGroupedByLayer(t *testing.T) {
t.Fatalf("inspectors list: %v", err)
}

for _, want := range []string{"file_tree", "document_shape", "object_fields", "markdown_body"} {
for _, want := range []string{"file_tree", "file_content_shape", "object_fields", "markdown_body"} {
if !strings.Contains(stdout, want) {
t.Errorf("expected inspector %q in output", want)
}
Expand Down Expand Up @@ -55,15 +55,15 @@ func TestInspectorsShow_showsDetail(t *testing.T) {
snapshot(t, "inspectors/show-object_fields.txt", stdout)
}

func TestInspectorsShow_showsLayerContextAndSiblings(t *testing.T) {
func TestInspectorsShow_showsSourceLayerContextAndSiblings(t *testing.T) {
chdir(t, t.TempDir())
stdout, _, err := runRoot(t, "inspectors", "show", "document_shape")
stdout, _, err := runRoot(t, "inspectors", "show", "file_content_shape")
if err != nil {
t.Fatalf("inspectors show document_shape: %v", err)
t.Fatalf("inspectors show file_content_shape: %v", err)
}
// The fixture pins the breadcrumb header, the layer intro, and the sibling
// list.
snapshot(t, "inspectors/show-document_shape.txt", stdout)
snapshot(t, "inspectors/show-file_content_shape.txt", stdout)
}

func TestInspectorsShow_unknown_exit2(t *testing.T) {
Expand Down Expand Up @@ -125,7 +125,7 @@ func TestInspectorsList_jsonArrayCoversEveryDescriptor(t *testing.T) {
t.Errorf("entry %d (%s): empty layer/summary", i, d.Name)
}
}
for _, want := range []string{"file_tree", "document_shape", "object_fields", "markdown_body"} {
for _, want := range []string{"file_tree", "file_content_shape", "object_fields", "markdown_body"} {
if !seen[want] {
t.Errorf("expected inspector %q in JSON output", want)
}
Expand Down
4 changes: 1 addition & 3 deletions cmd/testdata/snapshots/help/inspect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@ Usage:
katalyst inspect <path-or-collection> [flags]

Flags:
--detail string Summarizer detail level: exact, grouped, or coarse (default grouped).
-h, --help help for inspect
--inspector stringArray Run only the named inspector(s); repeatable. Default: all in the selected layer.
--json Emit evidence as JSON instead of Markdown.
--max-classes int Cap the number of summarized classes. Mutually exclusive with --detail/--similarity.
--max-lines int Truncate each inspector's Markdown output to N lines (0 = no limit). (default 20)
-o, --output string Write the report to a file instead of stdout.
--similarity float Summarizer similarity threshold (0–1). Mutually exclusive with --detail/--max-classes. (default -1)
--select string Select files for file_content_shape: directory, glob, ext = ".csv", or path under "docs".
-v, --verbose Show full output; do not truncate (same as --max-lines 0).
86 changes: 72 additions & 14 deletions cmd/testdata/snapshots/inspect/source-report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,86 @@

## Structural

### file_tree_content (n=2)
### file_content_shape (n=2)

_Parse markdown and profile each directory's content shape: parse rate, frontmatter, key-sets._
_Profile selected files by text, tabular, and tree content structure._

- classes:
- outliers:
- features=[fmkey:status, fmkey:title, parsed, frontmatter] label=books
----------------------------------------
selection:
expression : all files
files : 2
directories : 1
readable : 2
unsupported : 0
parse failures: 0

### document_shape (n=2)
----------------------------------------
file types:
TYPE FILES
.md 2

_Cluster files into candidate collections by a composite fingerprint of frontmatter, body structure, and file naming._
----------------------------------------
coherence:
status: coherent

- classes:
- class=P1 features=[ext:.md, casing:kebab, fmkey:status, fmkey:title, sec:Review] members=[books/dune.md, books/it.md] size=2
- outliers:
----------------------------------------
common structure:
- 2/2 Markdown files have an H1
- 2/2 Markdown files have frontmatter key status
- 2/2 Markdown files have frontmatter key title
- 2/2 Markdown files have section Review

----------------------------------------
variation:
none

----------------------------------------
text:
files : 2
with H1: 2
frontmatter keys:
KEY FILES
status 2
title 2

----------------------------------------
tabular:
no CSV files selected

----------------------------------------
tree:
no JSON files selected

----------------------------------------
read/parse issues:
none

## Filesystem

### file_tree (n=2)

_Profile each directory's file types, naming, and depth, opening no files._
_Map files, directories, extensions, regions, and filename conventions, opening no files._

----------------------------------------
summary:
files : 2
directories : 2
max depth : 2
dominant type: -

----------------------------------------
tree:
.
└── books/
├── dune.md
└── it.md

----------------------------------------
file types:
TYPE FILES
.md 2

- classes:
- outliers:
- features=[ext:.md, casing:kebab] label=books
----------------------------------------
representative paths:
books/dune.md
books/it.md
10 changes: 4 additions & 6 deletions cmd/testdata/snapshots/inspectors/list.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
Raw-source inspectors (3)
Raw-source inspectors (2)
-------------------------
- file_tree
Profile each directory's file types, naming, and depth, opening no files.
- file_tree_content
Parse markdown and profile each directory's content shape: parse rate, frontmatter, key-sets.
- document_shape
Cluster files into candidate collections by a composite fingerprint of frontmatter, body structure, and file naming.
Map files, directories, extensions, regions, and filename conventions, opening no files.
- file_content_shape
Profile selected files by text, tabular, and tree content structure.

Collection inspectors (2)
-------------------------
Expand Down
15 changes: 0 additions & 15 deletions cmd/testdata/snapshots/inspectors/show-document_shape.txt

This file was deleted.

14 changes: 14 additions & 0 deletions cmd/testdata/snapshots/inspectors/show-file_content_shape.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Raw-source inspectors › File content shape
--------------------------------------------
- inspector: file_content_shape
- layer: source
- family: structural
- purpose: Profile selected files by text, tabular, and tree content structure.

Layer context
-------------
Raw-source inspectors profile a backend store directly, before any collection configuration: what files are present, how they parse, and how they are named.

Other raw-source inspectors (1)
-------------------------------
- file_tree
Loading
Loading