diff --git a/pkg/analyzers/couples/aggregator.go b/pkg/analyzers/couples/aggregator.go index 3596e60..36efc2c 100644 --- a/pkg/analyzers/couples/aggregator.go +++ b/pkg/analyzers/couples/aggregator.go @@ -239,8 +239,19 @@ func ticksToReport( lastCommit analyze.CommitLike, ) analyze.Report { mergedFiles := make(map[string]map[string]int) - mergedPeople := make([]map[string]int, peopleNumber+1) + // Determine the actual people count from tick data, which may exceed + // the initial peopleNumber when authors are discovered incrementally. + actualPeople := peopleNumber + 1 + + for _, tick := range ticks { + td, ok := tick.Data.(*TickData) + if ok && td != nil && len(td.People) > actualPeople { + actualPeople = len(td.People) + } + } + + mergedPeople := make([]map[string]int, actualPeople) for i := range mergedPeople { mergedPeople[i] = make(map[string]int) } @@ -259,8 +270,10 @@ func ticksToReport( mergedRenames = append(mergedRenames, td.Renames...) } + effectivePeopleNumber := actualPeople - 1 + return buildReport(ctx, mergedFiles, mergedPeople, mergedRenames, - reversedNames, peopleNumber, lastCommit) + reversedNames, effectivePeopleNumber, lastCommit) } // mergeTickFiles additively merges per-tick file couplings into the accumulator. diff --git a/pkg/analyzers/couples/aggregator_test.go b/pkg/analyzers/couples/aggregator_test.go index d6b88b5..8d3ecf3 100644 --- a/pkg/analyzers/couples/aggregator_test.go +++ b/pkg/analyzers/couples/aggregator_test.go @@ -403,3 +403,324 @@ func TestAggregator_Spill_Empty(t *testing.T) { require.NoError(t, err) assert.Zero(t, freed) } + +// --- Tests for ticksToReport people sizing (PeopleNumber=0 regression) ---. + +func TestTicksToReport_PeopleNumberZero_IncrementalAuthors(t *testing.T) { + t.Parallel() + + // Simulates the real scenario: PeopleNumber=0 at Configure time (no --people-dict), + // but the Aggregator grows people slices as authors are discovered incrementally. + // ticksToReport must use the actual people count from tick data, not the stale peopleNumber. + ticks := []analyze.TICK{ + { + Tick: 0, + Data: &TickData{ + Files: map[string]map[string]int{ + "a.go": {"a.go": 3, "b.go": 2}, + "b.go": {"a.go": 2, "b.go": 3}, + }, + // 4 authors discovered incrementally (indices 0..3). + People: []map[string]int{ + {"a.go": 5, "b.go": 3}, + {"a.go": 2}, + {"b.go": 4}, + {"a.go": 1, "b.go": 1}, + }, + PeopleCommits: []int{10, 5, 8, 2}, + }, + }, + } + + names := []string{"alice", "bob", "charlie"} + report := ticksToReport(context.Background(), ticks, names, 0, nil) + + // Verify all 4 authors made it into the PeopleMatrix (not truncated to 1). + matrix, ok := report["PeopleMatrix"].([]map[int]int64) + require.True(t, ok, "PeopleMatrix should be []map[int]int64") + require.Len(t, matrix, 4, "PeopleMatrix should have 4 entries (one per author)") + + // Verify PeopleFiles has entries for all authors. + pf, ok := report["PeopleFiles"].([][]int) + require.True(t, ok, "PeopleFiles should be [][]int") + require.Len(t, pf, 4, "PeopleFiles should have 4 entries") + + // Alice (idx 0) touched both a.go and b.go → should appear in PeopleFiles[0]. + assert.Len(t, pf[0], 2, "alice should have touched 2 files") + + // Verify cross-author coupling: alice and bob both touched a.go. + // PeopleMatrix[0][1] should be > 0 (shared file touches). + assert.Positive(t, matrix[0][1], "alice-bob coupling should be positive (shared a.go)") + + // Verify diagonal: alice self-coupling should reflect her total touches. + assert.Positive(t, matrix[0][0], "alice self-coupling should be positive") +} + +func TestTicksToReport_MultipleTicks_PeopleGrowth(t *testing.T) { + t.Parallel() + + // Tick 0: 2 authors. + // Tick 1: 4 authors (grew during processing). + ticks := []analyze.TICK{ + { + Tick: 0, + Data: &TickData{ + Files: map[string]map[string]int{ + "a.go": {"a.go": 2}, + }, + People: []map[string]int{{"a.go": 3}, {"a.go": 1}}, + PeopleCommits: []int{5, 2}, + }, + }, + { + Tick: 1, + Data: &TickData{ + Files: map[string]map[string]int{ + "a.go": {"a.go": 1}, + }, + People: []map[string]int{{"a.go": 1}, {}, {"a.go": 2}, {"a.go": 1}}, + PeopleCommits: []int{1, 0, 3, 1}, + }, + }, + } + + report := ticksToReport(context.Background(), ticks, []string{"a", "b", "c"}, 1, nil) + + matrix, ok := report["PeopleMatrix"].([]map[int]int64) + require.True(t, ok) + // Should have 4 entries (max across ticks), not 2 (from peopleNumber=1 → 1+1=2). + require.Len(t, matrix, 4) +} + +func TestMergeTickPeople_DstSmallerThanSrc(t *testing.T) { + t.Parallel() + + // dst has 2 slots, src has 4. + // Should merge the first 2 without panic and silently skip the rest. + dst := []map[string]int{ + {"a.go": 1}, + {"b.go": 2}, + } + src := []map[string]int{ + {"a.go": 3}, + {"b.go": 1}, + {"c.go": 5}, + {"d.go": 7}, + } + + mergeTickPeople(dst, src) + + assert.Equal(t, 4, dst[0]["a.go"]) // 1 + 3 + assert.Equal(t, 3, dst[1]["b.go"]) // 2 + 1 +} + +func TestMergeTickPeople_SrcSmallerThanDst(t *testing.T) { + t.Parallel() + + dst := []map[string]int{ + {"a.go": 1}, + {"b.go": 2}, + {"c.go": 3}, + } + src := []map[string]int{ + {"a.go": 10}, + } + + mergeTickPeople(dst, src) + + assert.Equal(t, 11, dst[0]["a.go"]) // 1 + 10 + assert.Equal(t, 2, dst[1]["b.go"]) // Unchanged. + assert.Equal(t, 3, dst[2]["c.go"]) // Unchanged. +} + +func TestTicksToReport_VerifyPeopleMatrixValues(t *testing.T) { + t.Parallel() + + // Two authors share file a.go. The PeopleMatrix should reflect their coupling. + ticks := []analyze.TICK{ + { + Tick: 0, + Data: &TickData{ + Files: map[string]map[string]int{ + "a.go": {"a.go": 5, "b.go": 2}, + "b.go": {"a.go": 2, "b.go": 3}, + }, + People: []map[string]int{ + {"a.go": 4, "b.go": 2}, // alice: 4 touches on a.go, 2 on b.go. + {"a.go": 3}, // bob: 3 touches on a.go. + }, + PeopleCommits: []int{10, 5}, + }, + }, + } + + report := ticksToReport(context.Background(), ticks, []string{"alice", "bob"}, 1, nil) + + matrix, ok := report["PeopleMatrix"].([]map[int]int64) + require.True(t, ok) + require.Len(t, matrix, 2) + + // Alice-Bob coupling on a.go: min(4, 3) = 3. + assert.Equal(t, int64(3), matrix[0][1], "alice-bob coupling = min(4,3) on shared file a.go") + assert.Equal(t, int64(3), matrix[1][0], "bob-alice coupling should be symmetric") + + // Alice self-coupling: min(4,4) on a.go + min(2,2) on b.go = 4+2 = 6. + assert.Equal(t, int64(6), matrix[0][0], "alice self-coupling = 4 (a.go) + 2 (b.go)") + + // Bob self-coupling: min(3,3) on a.go = 3. + assert.Equal(t, int64(3), matrix[1][1], "bob self-coupling = 3 (a.go)") + + // Verify FilesMatrix values. + fm, ok := report["FilesMatrix"].([]map[int]int64) + require.True(t, ok) + require.Len(t, fm, 2) + + // Files should be ["a.go", "b.go"] sorted. + files, ok := report["Files"].([]string) + require.True(t, ok) + assert.Equal(t, []string{"a.go", "b.go"}, files) + + // FilesMatrix[0] (a.go): self=5, coupled with b.go=2. + assert.Equal(t, int64(5), fm[0][0], "a.go self-coupling") + assert.Equal(t, int64(2), fm[0][1], "a.go-b.go coupling") +} + +func TestComputePeopleMatrix_ThreeAuthorsOverlapping(t *testing.T) { + t.Parallel() + + // 3 authors with overlapping file touches. + filesIndex := map[string]int{"a.go": 0, "b.go": 1, "c.go": 2} + people := []map[string]int{ + {"a.go": 5, "b.go": 3}, // alice: 5 on a, 3 on b. + {"a.go": 2, "b.go": 4, "c.go": 1}, // bob: 2 on a, 4 on b, 1 on c. + {"b.go": 6, "c.go": 3}, // charlie: 6 on b, 3 on c. + } + + matrix, pf := computePeopleMatrix(people, filesIndex, 2) + + require.Len(t, matrix, 3) + require.Len(t, pf, 3) + + // alice-bob coupling: min(5,2) on a.go + min(3,4) on b.go = 2+3 = 5. + assert.Equal(t, int64(5), matrix[0][1], "alice-bob coupling") + assert.Equal(t, int64(5), matrix[1][0], "bob-alice coupling (symmetric)") + + // alice-charlie coupling: min(3,6) on b.go = 3. + assert.Equal(t, int64(3), matrix[0][2], "alice-charlie coupling (shared b.go)") + assert.Equal(t, int64(3), matrix[2][0], "charlie-alice coupling (symmetric)") + + // bob-charlie coupling: min(4,6) on b.go + min(1,3) on c.go = 4+1 = 5. + assert.Equal(t, int64(5), matrix[1][2], "bob-charlie coupling") + assert.Equal(t, int64(5), matrix[2][1], "charlie-bob coupling (symmetric)") + + // Self couplings. + // alice: min(5,5) on a + min(3,3) on b = 8. + assert.Equal(t, int64(8), matrix[0][0], "alice self-coupling") + // bob: min(2,2) on a + min(4,4) on b + min(1,1) on c = 7. + assert.Equal(t, int64(7), matrix[1][1], "bob self-coupling") + // charlie: min(6,6) on b + min(3,3) on c = 9. + assert.Equal(t, int64(9), matrix[2][2], "charlie self-coupling") + + // PeopleFiles: alice has a.go(0), b.go(1). Sorted. + assert.Equal(t, []int{0, 1}, pf[0]) + // bob has a.go(0), b.go(1), c.go(2). Sorted. + assert.Equal(t, []int{0, 1, 2}, pf[1]) + // charlie has b.go(1), c.go(2). Sorted. + assert.Equal(t, []int{1, 2}, pf[2]) +} + +func TestEndToEnd_Consume_Aggregator_TicksToReport(t *testing.T) { + t.Parallel() + + // Simulate the full pipeline: multiple commits → Aggregator → FlushAllTicks → ticksToReport. + // This is the closest we can get to an integration test without a real git repo. + + // Step 1: Build commit data as Consume would produce. + commits := []analyze.TC{ + { + AuthorID: 0, + Data: &CommitData{ + CouplingFiles: []string{"a.go", "b.go"}, + AuthorFiles: map[string]int{"a.go": 1, "b.go": 1}, + CommitCounted: true, + }, + }, + { + AuthorID: 1, + Data: &CommitData{ + CouplingFiles: []string{"a.go", "c.go"}, + AuthorFiles: map[string]int{"a.go": 1, "c.go": 1}, + CommitCounted: true, + }, + }, + { + AuthorID: 2, + Data: &CommitData{ + CouplingFiles: []string{"b.go", "c.go"}, + AuthorFiles: map[string]int{"b.go": 1, "c.go": 1}, + CommitCounted: true, + }, + }, + { + AuthorID: 0, + Data: &CommitData{ + CouplingFiles: []string{"a.go"}, + AuthorFiles: map[string]int{"a.go": 1}, + CommitCounted: true, + }, + }, + } + + // Step 2: Feed through Aggregator (PeopleNumber=0 — no --people-dict). + agg := newAggregator(analyze.AggregatorOptions{}, 0, nil, nil) + defer agg.Close() + + for _, tc := range commits { + require.NoError(t, agg.Add(tc)) + } + + // Step 3: FlushAllTicks. + ticks, err := agg.FlushAllTicks() + require.NoError(t, err) + require.Len(t, ticks, 1) + + // Step 4: ticksToReport (PeopleNumber=0, simulating no --people-dict). + names := []string{"alice", "bob", "charlie"} + report := ticksToReport(context.Background(), ticks, names, 0, nil) + + // Verify report structure. + matrix, ok := report["PeopleMatrix"].([]map[int]int64) + require.True(t, ok, "PeopleMatrix type assertion") + require.Len(t, matrix, 3, "should have 3 authors") + + pf, ok := report["PeopleFiles"].([][]int) + require.True(t, ok, "PeopleFiles type assertion") + require.Len(t, pf, 3, "should have 3 author entries in PeopleFiles") + + files, ok := report["Files"].([]string) + require.True(t, ok, "Files type assertion") + assert.ElementsMatch(t, []string{"a.go", "b.go", "c.go"}, files) + + // Verify coupling values. + // Alice touched a.go twice, b.go once. Bob touched a.go once, c.go once. + // Alice-Bob coupling: shared a.go → min(2,1)=1. + assert.Equal(t, int64(1), matrix[0][1], "alice-bob coupling via shared a.go") + + // Alice self-coupling: min(2,2) on a.go + min(1,1) on b.go = 3. + assert.Equal(t, int64(3), matrix[0][0], "alice self-coupling") + + // Bob self: min(1,1) on a + min(1,1) on c = 2. + assert.Equal(t, int64(2), matrix[1][1], "bob self-coupling") + + // Charlie self: min(1,1) on b + min(1,1) on c = 2. + assert.Equal(t, int64(2), matrix[2][2], "charlie self-coupling") + + // Bob-Charlie: shared c.go → min(1,1) = 1. + assert.Equal(t, int64(1), matrix[1][2], "bob-charlie coupling via shared c.go") + + // Verify the report can be consumed by ComputeAllMetrics. + metrics, err := ComputeAllMetrics(report) + require.NoError(t, err) + assert.Equal(t, 3, metrics.Aggregate.TotalFiles) + assert.Len(t, metrics.DeveloperCoupling, 3, "should have 3 developer coupling pairs") +} diff --git a/pkg/analyzers/couples/history.go b/pkg/analyzers/couples/history.go index 8d25463..4c62b31 100644 --- a/pkg/analyzers/couples/history.go +++ b/pkg/analyzers/couples/history.go @@ -3,16 +3,11 @@ package couples import ( "context" - "encoding/json" "errors" - "fmt" "io" "sort" - "gopkg.in/yaml.v3" - "github.com/Sumatoshi-tech/codefang/pkg/analyzers/analyze" - "github.com/Sumatoshi-tech/codefang/pkg/analyzers/common/reportutil" "github.com/Sumatoshi-tech/codefang/pkg/analyzers/plumbing" "github.com/Sumatoshi-tech/codefang/pkg/gitlib" "github.com/Sumatoshi-tech/codefang/pkg/identity" @@ -166,6 +161,12 @@ func (c *HistoryAnalyzer) Consume(_ context.Context, ac *analyze.Context) (analy CommitCounted: true, } + // Skip oversized changesets — mass changes (formatting, license headers, + // dependency bumps) produce noise rather than meaningful coupling signal. + if len(c.TreeDiff.Changes) > CouplesMaximumMeaningfulContextSize { + return analyze.TC{Data: &data}, nil + } + for _, change := range c.TreeDiff.Changes { c.processChange(change, mergeMode, author, &data) } @@ -201,19 +202,21 @@ func (c *HistoryAnalyzer) processChange(change *gitlib.Change, mergeMode bool, a c.seenFiles[name] = true if author != identity.AuthorMissing { - data.AuthorFiles[name] = author + data.AuthorFiles[name] = 1 } return } if !c.seenFiles[name] { - // Merge mode and not delete. + // Merge mode: only add to coupling context if file not seen before. data.CouplingFiles = append(data.CouplingFiles, name) + } - if author != identity.AuthorMissing { - data.AuthorFiles[name] = author - } + // Always record author touch, even for previously-seen files in merge mode. + // This mirrors the devs analyzer pattern: coupling dedup ≠ ownership dedup. + if author != identity.AuthorMissing { + data.AuthorFiles[name] = 1 } } @@ -389,66 +392,17 @@ func (c *HistoryAnalyzer) mergeMerges(other map[gitlib.Hash]bool) { } // Serialize writes the analysis result to the given writer. +// Text and plot formats are handled here; JSON/YAML/Binary delegate to the base. func (c *HistoryAnalyzer) Serialize(result analyze.Report, format string, writer io.Writer) error { - switch format { - case analyze.FormatJSON: - return c.serializeJSON(result, writer) - case analyze.FormatYAML: - return c.serializeYAML(result, writer) - case analyze.FormatPlot: - return c.generatePlot(result, writer) - case analyze.FormatBinary: - return c.serializeBinary(result, writer) - default: - return fmt.Errorf("%w: %s", analyze.ErrUnsupportedFormat, format) - } -} - -func (c *HistoryAnalyzer) serializeJSON(result analyze.Report, writer io.Writer) error { - metrics, err := ComputeAllMetrics(result) - if err != nil { - metrics = &ComputedMetrics{} - } - - err = json.NewEncoder(writer).Encode(metrics) - if err != nil { - return fmt.Errorf("json encode: %w", err) - } - - return nil -} - -func (c *HistoryAnalyzer) serializeYAML(result analyze.Report, writer io.Writer) error { - metrics, err := ComputeAllMetrics(result) - if err != nil { - metrics = &ComputedMetrics{} - } - - data, err := yaml.Marshal(metrics) - if err != nil { - return fmt.Errorf("yaml marshal: %w", err) - } - - _, err = writer.Write(data) - if err != nil { - return fmt.Errorf("yaml write: %w", err) + if format == analyze.FormatText { + return c.generateText(result, writer) } - return nil -} - -func (c *HistoryAnalyzer) serializeBinary(result analyze.Report, writer io.Writer) error { - metrics, err := ComputeAllMetrics(result) - if err != nil { - metrics = &ComputedMetrics{} - } - - err = reportutil.EncodeBinaryEnvelope(metrics, writer) - if err != nil { - return fmt.Errorf("binary encode: %w", err) + if format == analyze.FormatPlot { + return c.generatePlot(result, writer) } - return nil + return c.BaseHistoryAnalyzer.Serialize(result, format, writer) } // NewAggregator creates a new aggregator for this analyzer. diff --git a/pkg/analyzers/couples/history_test.go b/pkg/analyzers/couples/history_test.go index 93c848e..e6fc3a4 100644 --- a/pkg/analyzers/couples/history_test.go +++ b/pkg/analyzers/couples/history_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/json" + "fmt" "testing" "time" @@ -85,8 +86,8 @@ func TestHistoryAnalyzer_Consume_ReturnsTC(t *testing.T) { cd, ok := tc.Data.(*CommitData) require.True(t, ok, "expected *CommitData") assert.ElementsMatch(t, []string{"f1", "f2"}, cd.CouplingFiles) - assert.Equal(t, 0, cd.AuthorFiles["f1"]) // The author ID is 0 from c.Identity.AuthorID = 0. - assert.Equal(t, 0, cd.AuthorFiles["f2"]) + assert.Equal(t, 1, cd.AuthorFiles["f1"]) // Touch count = 1 per file per commit. + assert.Equal(t, 1, cd.AuthorFiles["f2"]) assert.True(t, cd.CommitCounted) assert.Empty(t, cd.Renames) @@ -127,7 +128,7 @@ func TestHistoryAnalyzer_Consume_Delete(t *testing.T) { require.True(t, ok) // Deletes don't add to coupling context. assert.Empty(t, cd.CouplingFiles) - assert.Equal(t, 0, cd.AuthorFiles["f1"]) // The author ID is 0 from c.Identity.AuthorID = 0. + assert.Equal(t, 1, cd.AuthorFiles["f1"]) // Touch count = 1 (deletes still record author touch). } func TestHistoryAnalyzer_Consume_Rename(t *testing.T) { @@ -238,10 +239,11 @@ func TestHistoryAnalyzer_Consume_MergeMode(t *testing.T) { cd, ok := tc.Data.(*CommitData) require.True(t, ok) - // existing.go should be filtered out (already seen in merge mode). + // existing.go should be filtered from coupling context (already seen in merge mode), + // but author attribution should still be recorded for ownership tracking. assert.Equal(t, []string{"new.go"}, cd.CouplingFiles) - assert.Equal(t, 0, cd.AuthorFiles["new.go"]) // The author ID is 0 from c.Identity.AuthorID = 0. - assert.Zero(t, cd.AuthorFiles["existing.go"]) + assert.Equal(t, 1, cd.AuthorFiles["new.go"]) // Touch count = 1 per file per commit. + assert.Equal(t, 1, cd.AuthorFiles["existing.go"]) // Author touch recorded even for seen files. } func TestHistoryAnalyzer_Fork_WorkingStateOnly(t *testing.T) { @@ -305,7 +307,7 @@ func TestHistoryAnalyzer_Merge_WorkingState(t *testing.T) { func TestHistoryAnalyzer_Serialize_JSON(t *testing.T) { t.Parallel() - c := &HistoryAnalyzer{} + c := NewHistoryAnalyzer() fm := []map[int]int64{{1: 3}, {0: 3}} pm := []map[int]int64{{1: 5}, {0: 5}} @@ -338,7 +340,7 @@ func TestHistoryAnalyzer_Serialize_JSON(t *testing.T) { func TestHistoryAnalyzer_Serialize_YAML(t *testing.T) { t.Parallel() - c := &HistoryAnalyzer{} + c := NewHistoryAnalyzer() fm := []map[int]int64{{1: 3}, {0: 3}} pm := []map[int]int64{{1: 5}, {0: 5}} @@ -367,7 +369,7 @@ func TestHistoryAnalyzer_Serialize_YAML(t *testing.T) { func TestHistoryAnalyzer_Serialize_Binary(t *testing.T) { t.Parallel() - c := &HistoryAnalyzer{} + c := NewHistoryAnalyzer() report := analyze.Report{ "Files": []string{"f1.go"}, @@ -388,6 +390,86 @@ func TestHistoryAnalyzer_Serialize_Binary(t *testing.T) { } } +func TestHistoryAnalyzer_Consume_OversizedChangeset(t *testing.T) { + t.Parallel() + + c := &HistoryAnalyzer{ + PeopleNumber: 1, + Identity: &plumbing.IdentityDetector{}, + TreeDiff: &plumbing.TreeDiffAnalyzer{}, + } + require.NoError(t, c.Initialize(nil)) + + // Create a changeset larger than the maximum meaningful context size. + hash := gitlib.NewHash("1111111111111111111111111111111111111111") + changes := make(gitlib.Changes, CouplesMaximumMeaningfulContextSize+1) + + for i := range changes { + changes[i] = &gitlib.Change{ + Action: gitlib.Insert, + To: gitlib.ChangeEntry{Name: fmt.Sprintf("f%d.go", i), Hash: hash}, + } + } + + c.TreeDiff.Changes = changes + c.Identity.AuthorID = 0 + + commit := gitlib.NewTestCommit( + gitlib.NewHash("c600000000000000000000000000000000000006"), + gitlib.Signature{When: time.Now()}, + "mass_change", + ) + + tc, err := c.Consume(context.Background(), &analyze.Context{Commit: commit}) + require.NoError(t, err) + + cd, ok := tc.Data.(*CommitData) + require.True(t, ok) + // Oversized changeset should produce empty coupling data. + assert.Empty(t, cd.CouplingFiles) + assert.Empty(t, cd.AuthorFiles) + assert.True(t, cd.CommitCounted) +} + +func TestHistoryAnalyzer_Consume_ExactMaxChangeset(t *testing.T) { + t.Parallel() + + c := &HistoryAnalyzer{ + PeopleNumber: 1, + Identity: &plumbing.IdentityDetector{}, + TreeDiff: &plumbing.TreeDiffAnalyzer{}, + } + require.NoError(t, c.Initialize(nil)) + + // Exactly at the limit — should still be processed. + hash := gitlib.NewHash("1111111111111111111111111111111111111111") + changes := make(gitlib.Changes, CouplesMaximumMeaningfulContextSize) + + for i := range changes { + changes[i] = &gitlib.Change{ + Action: gitlib.Insert, + To: gitlib.ChangeEntry{Name: fmt.Sprintf("f%d.go", i), Hash: hash}, + } + } + + c.TreeDiff.Changes = changes + c.Identity.AuthorID = 0 + + commit := gitlib.NewTestCommit( + gitlib.NewHash("c700000000000000000000000000000000000007"), + gitlib.Signature{When: time.Now()}, + "exact_limit", + ) + + tc, err := c.Consume(context.Background(), &analyze.Context{Commit: commit}) + require.NoError(t, err) + + cd, ok := tc.Data.(*CommitData) + require.True(t, ok) + // Exactly at limit should still be processed. + assert.Len(t, cd.CouplingFiles, CouplesMaximumMeaningfulContextSize) +} + func TestHistoryAnalyzer_Misc(t *testing.T) { t.Parallel() diff --git a/pkg/analyzers/couples/metrics.go b/pkg/analyzers/couples/metrics.go index d6f2938..c6d370b 100644 --- a/pkg/analyzers/couples/metrics.go +++ b/pkg/analyzers/couples/metrics.go @@ -130,13 +130,17 @@ func (m *FileCouplingMetric) Compute(input *ReportData) []FileCouplingData { file2 := input.Files[j] - // Calculate coupling strength (normalized by max possible). - maxChanges := max(coChanges, row[i]) + // Calculate coupling strength using code-maat formula: + // degree = co_changes / average(revisions_file1, revisions_file2) + // where revisions = diagonal element (self-change count). + selfI := row[i] // file1 self-changes. + selfJ := input.FilesMatrix[j][j] // file2 self-changes. + avgRevs := float64(selfI+selfJ) / 2.0 //nolint:mnd // average of two values. var strength float64 - if maxChanges > 0 { - strength = float64(coChanges) / float64(maxChanges) + if avgRevs > 0 { + strength = min(float64(coChanges)/avgRevs, 1.0) } result = append(result, FileCouplingData{ @@ -179,7 +183,7 @@ func (m *DeveloperCouplingMetric) Compute(input *ReportData) []DeveloperCoupling result := make([]DeveloperCouplingData, 0, len(input.PeopleMatrix)) for i, row := range input.PeopleMatrix { - couplings := computeDevCouplings(i, row, input.ReversedPeopleDict) + couplings := computeDevCouplings(i, row, input.PeopleMatrix, input.ReversedPeopleDict) result = append(result, couplings...) } @@ -190,7 +194,7 @@ func (m *DeveloperCouplingMetric) Compute(input *ReportData) []DeveloperCoupling return result } -func computeDevCouplings(devIdx int, row map[int]int64, names []string) []DeveloperCouplingData { +func computeDevCouplings(devIdx int, row map[int]int64, matrix []map[int]int64, names []string) []DeveloperCouplingData { dev1 := getDevName(devIdx, names) var result []DeveloperCouplingData @@ -200,20 +204,28 @@ func computeDevCouplings(devIdx int, row map[int]int64, names []string) []Develo continue } - coupling := buildCouplingData(dev1, j, sharedChanges, row[devIdx], names) + selfDev2 := int64(0) + if j < len(matrix) { + selfDev2 = matrix[j][j] + } + + coupling := buildCouplingData(dev1, j, sharedChanges, row[devIdx], selfDev2, names) result = append(result, coupling) } return result } -func buildCouplingData(dev1 string, dev2Idx int, sharedChanges, selfChanges int64, names []string) DeveloperCouplingData { +func buildCouplingData(dev1 string, dev2Idx int, sharedChanges, selfDev1, selfDev2 int64, names []string) DeveloperCouplingData { dev2 := getDevName(dev2Idx, names) - maxChanges := max(sharedChanges, selfChanges) + + // Coupling strength using code-maat formula: + // degree = shared_changes / average(self_dev1, self_dev2), capped at 1.0. + avgRevs := float64(selfDev1+selfDev2) / 2.0 //nolint:mnd // average of two values. var strength float64 - if maxChanges > 0 { - strength = float64(sharedChanges) / float64(maxChanges) + if avgRevs > 0 { + strength = min(float64(sharedChanges)/avgRevs, 1.0) } return DeveloperCouplingData{ @@ -302,6 +314,28 @@ func NewAggregateMetric() *AggregateMetric { } } +// aggregateAccum accumulates statistics while iterating file pairs. +type aggregateAccum struct { + totalCoChanges int64 + pairCount int + highlyCoupled int + totalStrength float64 +} + +func (a *aggregateAccum) addPair(coChanges, selfI, selfJ int64) { + a.totalCoChanges += coChanges + a.pairCount++ + + if coChanges >= CouplingThresholdHigh { + a.highlyCoupled++ + } + + avgRevs := float64(selfI+selfJ) / 2.0 //nolint:mnd // average of two values. + if avgRevs > 0 { + a.totalStrength += min(float64(coChanges)/avgRevs, 1.0) + } +} + // Compute calculates aggregate statistics. func (m *AggregateMetric) Compute(input *ReportData) AggregateData { agg := AggregateData{ @@ -309,39 +343,134 @@ func (m *AggregateMetric) Compute(input *ReportData) AggregateData { TotalDevelopers: len(input.ReversedPeopleDict), } - var totalCoChanges int64 - - var pairCount int - - var highlyCouplded int + var acc aggregateAccum for i, row := range input.FilesMatrix { for j, coChanges := range row { - if j <= i { + if j <= i || coChanges <= 0 { continue } - if coChanges > 0 { - totalCoChanges += coChanges - pairCount++ - - if coChanges >= CouplingThresholdHigh { - highlyCouplded++ - } - } + acc.addPair(coChanges, row[i], input.FilesMatrix[j][j]) } } - agg.TotalCoChanges = totalCoChanges - agg.HighlyCoupledPairs = highlyCouplded + agg.TotalCoChanges = acc.totalCoChanges + agg.HighlyCoupledPairs = acc.highlyCoupled - if pairCount > 0 { - agg.AvgCouplingStrength = float64(totalCoChanges) / float64(pairCount) + if acc.pairCount > 0 { + agg.AvgCouplingStrength = acc.totalStrength / float64(acc.pairCount) } return agg } +// --- Data Reduction / Bucketing (used by both text and plot renderers) ---. + +// OwnershipBucket categorizes files by their contributor count. +type OwnershipBucket struct { + Label string `json:"label" yaml:"label"` + Count int `json:"count" yaml:"count"` +} + +// Ownership contributor count thresholds. +const ( + ownershipFewThreshold = 3 + ownershipModerateThreshold = 5 +) + +// BucketOwnership groups file ownership data into contributor count categories. +func BucketOwnership(ownership []FileOwnershipData) []OwnershipBucket { + single, few, moderate, many := 0, 0, 0, 0 + + for _, fo := range ownership { + switch { + case fo.Contributors <= 1: + single++ + case fo.Contributors <= ownershipFewThreshold: + few++ + case fo.Contributors <= ownershipModerateThreshold: + moderate++ + default: + many++ + } + } + + return []OwnershipBucket{ + {Label: "Single owner", Count: single}, + {Label: "2-3 owners", Count: few}, + {Label: "4-5 owners", Count: moderate}, + {Label: "6+ owners", Count: many}, + } +} + +// SortOwnershipByRisk returns a copy sorted by contributors ascending (highest risk first). +func SortOwnershipByRisk(ownership []FileOwnershipData) []FileOwnershipData { + sorted := make([]FileOwnershipData, len(ownership)) + copy(sorted, ownership) + + sort.Slice(sorted, func(i, j int) bool { + return sorted[i].Contributors < sorted[j].Contributors + }) + + return sorted +} + +// FilterTopDevs limits a developer coupling matrix to the top N developers +// ranked by diagonal value (activity). Returns the original data if within limit. +func FilterTopDevs(matrix []map[int]int64, names []string, limit int) (filtered []map[int]int64, filteredNames []string) { + if len(names) <= limit { + return matrix, names + } + + // Rank developers by diagonal value (self-activity). + type devActivity struct { + idx int + activity int64 + } + + devs := make([]devActivity, len(names)) + for i := range names { + devs[i] = devActivity{idx: i, activity: matrix[i][i]} + } + + sort.Slice(devs, func(a, b int) bool { + return devs[a].activity > devs[b].activity + }) + + // Take top N. + topN := devs[:limit] + + // Build index mapping: old index → new index. + oldToNew := make(map[int]int, limit) + newNames := make([]string, limit) + + for newIdx, d := range topN { + oldToNew[d.idx] = newIdx + newNames[newIdx] = names[d.idx] + } + + // Build filtered sub-matrix. + newMatrix := make([]map[int]int64, limit) + for i := range newMatrix { + newMatrix[i] = make(map[int]int64) + } + + for _, d := range topN { + oldI := d.idx + newI := oldToNew[oldI] + + for oldJ, val := range matrix[oldI] { + newJ, ok := oldToNew[oldJ] + if ok { + newMatrix[newI][newJ] = val + } + } + } + + return newMatrix, newNames +} + // --- Computed Metrics ---. // ComputedMetrics holds all computed metric results for the couples analyzer. diff --git a/pkg/analyzers/couples/metrics_test.go b/pkg/analyzers/couples/metrics_test.go index 33117e9..80760c9 100644 --- a/pkg/analyzers/couples/metrics_test.go +++ b/pkg/analyzers/couples/metrics_test.go @@ -108,8 +108,8 @@ func TestFileCouplingMetric_SinglePair(t *testing.T) { assert.Equal(t, testFile1, result[0].File1) assert.Equal(t, testFile2, result[0].File2) assert.Equal(t, int64(5), result[0].CoChanges) - // Strength = 5 / max(5, 10) = 5/10 = 0.5. - assert.InDelta(t, 0.5, result[0].Strength, floatDelta) + // Strength = co_changes / avg(self_file1, self_file2) = 5 / avg(10, 8) = 5/9 ≈ 0.556. + assert.InDelta(t, 5.0/9.0, result[0].Strength, floatDelta) } func TestFileCouplingMetric_MultiplePairs_SortedByCoChanges(t *testing.T) { @@ -213,8 +213,8 @@ func TestDeveloperCouplingMetric_SinglePair(t *testing.T) { assert.Equal(t, testDev1, result[0].Developer1) assert.Equal(t, testDev2, result[0].Developer2) assert.Equal(t, int64(10), result[0].SharedFiles) - // Strength = 10 / max(10, 20) = 0.5. - assert.InDelta(t, 0.5, result[0].Strength, floatDelta) + // Strength = shared / avg(self_dev1, self_dev2) = 10 / avg(20, 15) = 10/17.5 ≈ 0.571. + assert.InDelta(t, 10.0/17.5, result[0].Strength, floatDelta) } func TestDeveloperCouplingMetric_MultiplePairs_SortedBySharedFiles(t *testing.T) { @@ -424,8 +424,12 @@ func TestCouplesAggregateMetric_WithData(t *testing.T) { assert.Equal(t, 2, result.TotalDevelopers) // Total co-changes = 15 + 5 + 3 = 23 (upper triangle only). assert.Equal(t, int64(23), result.TotalCoChanges) - // 3 pairs with non-zero changes. - assert.InDelta(t, 23.0/3.0, result.AvgCouplingStrength, floatDelta) + // AvgCouplingStrength = average of per-pair strengths using code-maat formula: + // file1-file2: min(15/avg(10,8), 1.0) = min(15/9, 1.0) = 1.0 + // file1-file3: 5/avg(10,6) = 5/8 = 0.625 + // file2-file3: 3/avg(8,6) = 3/7 ≈ 0.4286 + // avg = (1.0 + 0.625 + 0.4286) / 3 ≈ 0.6845 + assert.InDelta(t, (1.0+5.0/8.0+3.0/7.0)/3.0, result.AvgCouplingStrength, floatDelta) // Highly coupled pairs (>= 10): 15 only. assert.Equal(t, 1, result.HighlyCoupledPairs) } @@ -544,3 +548,119 @@ func TestComputedMetrics_ToYAML(t *testing.T) { assert.Equal(t, m, result) } + +// --- BucketOwnership Tests ---. + +func TestBucketOwnership_Empty(t *testing.T) { + t.Parallel() + + buckets := BucketOwnership(nil) + require.Len(t, buckets, 4) + + for _, b := range buckets { + assert.Zero(t, b.Count) + } +} + +func TestBucketOwnership_AllCategories(t *testing.T) { + t.Parallel() + + ownership := []FileOwnershipData{ + {File: "a.go", Contributors: 0}, // Single. + {File: "b.go", Contributors: 1}, // Single. + {File: "c.go", Contributors: 2}, // Few (2-3). + {File: "d.go", Contributors: 3}, // Few (2-3). + {File: "e.go", Contributors: 4}, // Moderate (4-5). + {File: "f.go", Contributors: 5}, // Moderate (4-5). + {File: "g.go", Contributors: 6}, // Many (6+). + {File: "h.go", Contributors: 10}, // Many (6+). + } + + buckets := BucketOwnership(ownership) + require.Len(t, buckets, 4) + + assert.Equal(t, "Single owner", buckets[0].Label) + assert.Equal(t, 2, buckets[0].Count) + + assert.Equal(t, "2-3 owners", buckets[1].Label) + assert.Equal(t, 2, buckets[1].Count) + + assert.Equal(t, "4-5 owners", buckets[2].Label) + assert.Equal(t, 2, buckets[2].Count) + + assert.Equal(t, "6+ owners", buckets[3].Label) + assert.Equal(t, 2, buckets[3].Count) +} + +// --- SortOwnershipByRisk Tests ---. + +func TestSortOwnershipByRisk_SortsAscending(t *testing.T) { + t.Parallel() + + ownership := []FileOwnershipData{ + {File: "a.go", Contributors: 5}, + {File: "b.go", Contributors: 1}, + {File: "c.go", Contributors: 3}, + } + + sorted := SortOwnershipByRisk(ownership) + require.Len(t, sorted, 3) + assert.Equal(t, "b.go", sorted[0].File) // 1 = highest risk. + assert.Equal(t, "c.go", sorted[1].File) // 3 + assert.Equal(t, "a.go", sorted[2].File) // 5 = lowest risk. +} + +func TestSortOwnershipByRisk_DoesNotMutateOriginal(t *testing.T) { + t.Parallel() + + ownership := []FileOwnershipData{ + {File: "a.go", Contributors: 5}, + {File: "b.go", Contributors: 1}, + } + + sorted := SortOwnershipByRisk(ownership) + + // Original should remain unchanged. + assert.Equal(t, "a.go", ownership[0].File) + assert.Equal(t, "b.go", sorted[0].File) +} + +// --- FilterTopDevs Tests ---. + +func TestFilterTopDevs_UnderLimit(t *testing.T) { + t.Parallel() + + matrix := []map[int]int64{ + {0: 10, 1: 3}, + {0: 3, 1: 20}, + } + names := []string{"alice", "bob"} + + fm, fn := FilterTopDevs(matrix, names, 5) + assert.Equal(t, matrix, fm) + assert.Equal(t, names, fn) +} + +func TestFilterTopDevs_OverLimit(t *testing.T) { + t.Parallel() + + // 3 devs, limit to 2: charlie (diag=30) and bob (diag=20) should remain. + matrix := []map[int]int64{ + {0: 10, 1: 5, 2: 2}, + {0: 5, 1: 20, 2: 8}, + {0: 2, 1: 8, 2: 30}, + } + names := []string{"alice", "bob", "charlie"} + + fm, fn := FilterTopDevs(matrix, names, 2) + require.Len(t, fn, 2) + require.Len(t, fm, 2) + + // Top 2 by diagonal: charlie (30), bob (20). + assert.Equal(t, "charlie", fn[0]) + assert.Equal(t, "bob", fn[1]) + + // Verify coupling between charlie and bob is preserved. + assert.Equal(t, int64(8), fm[0][1]) + assert.Equal(t, int64(8), fm[1][0]) +} diff --git a/pkg/analyzers/couples/plot.go b/pkg/analyzers/couples/plot.go index e6c32a9..690bec9 100644 --- a/pkg/analyzers/couples/plot.go +++ b/pkg/analyzers/couples/plot.go @@ -2,8 +2,10 @@ package couples import ( "errors" + "fmt" "io" "sort" + "strings" "github.com/go-echarts/go-echarts/v2/charts" "github.com/go-echarts/go-echarts/v2/components" @@ -14,11 +16,21 @@ import ( ) const ( - heatMapHeight = "650px" labelRotate = 60 labelFontSize = 10 innerLabelSize = 9 emptyChartHeight = "400px" + barChartHeight = "500px" + pieChartWidth = "600px" + pieChartHeight = "400px" + pieRadius = "65%" + maxFileCouples = 20 + maxHeatmapDevs = 20 + heatmapMinHeight = 400 + heatmapMaxHeight = 900 + heatmapPerDev = 30 + heatmapPadding = 200 + maxDevNameLen = 30 ) // ErrInvalidMatrix indicates the report doesn't contain expected matrix data. @@ -34,8 +46,8 @@ func (c *HistoryAnalyzer) generatePlot(report analyze.Report, writer io.Writer) } page := plotpage.NewPage( - "Developer Coupling Analysis", - "Co-occurrence patterns between developers based on commit history", + "Couples Analysis", + "File coupling, developer coupling, and ownership patterns from commit history", ) page.Add(sections...) @@ -44,28 +56,69 @@ func (c *HistoryAnalyzer) generatePlot(report analyze.Report, writer io.Writer) // GenerateSections returns the sections for combined reports. func (c *HistoryAnalyzer) GenerateSections(report analyze.Report) (sections []plotpage.Section, err error) { - chart, err := c.buildChart(report) - if err != nil { - return nil, err + var result []plotpage.Section + + // Section 1: File coupling bar chart. + fileCouplingChart := buildFileCouplingBarChart(report) + if fileCouplingChart != nil { + result = append(result, plotpage.Section{ + Title: "Top File Couples", + Subtitle: "Most frequently co-changed file pairs across commit history.", + Chart: plotpage.WrapChart(fileCouplingChart), + Hint: plotpage.Hint{ + Title: "How to interpret:", + Items: []string{ + "Tall bars = file pairs that frequently change together", + "Cross-package coupling may indicate architectural issues", + "Test files coupled with implementation is expected and healthy", + "Action: Consider extracting shared logic or merging tightly coupled files", + }, + }, + }) } - return []plotpage.Section{ - { - Title: "Developer Coupling Heatmap", - Subtitle: "Shows how often developers work on the same files in the same commits.", - Chart: plotpage.WrapChart(chart), + // Section 2: Developer coupling heatmap. + heatmap, hmErr := c.buildChart(report) + if hmErr != nil { + return nil, hmErr + } + + result = append(result, plotpage.Section{ + Title: "Developer Coupling Heatmap", + Subtitle: "Shows how often developers work on the same files in the same commits.", + Chart: plotpage.WrapChart(heatmap), + Hint: plotpage.Hint{ + Title: "How to interpret:", + Items: []string{ + "High values on diagonal = individual developer activity", + "High off-diagonal values = developers frequently working on the same code", + "Symmetric patterns = collaborative pairs who often commit together", + "Look for: Isolated developers or tight clusters", + "Action: High coupling may indicate knowledge sharing or ownership issues", + }, + }, + }) + + // Section 3: Ownership distribution pie chart. + ownershipPie := buildOwnershipPieChart(report) + if ownershipPie != nil { + result = append(result, plotpage.Section{ + Title: "File Ownership Distribution", + Subtitle: "How files are distributed by number of contributors.", + Chart: plotpage.WrapChart(ownershipPie), Hint: plotpage.Hint{ Title: "How to interpret:", Items: []string{ - "High values on diagonal = individual developer activity", - "High off-diagonal values = developers frequently working on the same code", - "Symmetric patterns = collaborative pairs who often commit together", - "Look for: Isolated developers or tight clusters", - "Action: High coupling may indicate knowledge sharing or ownership issues", + "Single owner = bus factor risk if that person leaves", + "Many owners = potential coordination overhead", + "2-3 owners is often the healthy sweet spot", + "Action: Review single-owner files for knowledge sharing opportunities", }, }, - }, - }, nil + }) + } + + return result, nil } // GenerateChart implements PlotGenerator interface. @@ -84,10 +137,22 @@ func (c *HistoryAnalyzer) buildChart(report analyze.Report) (heatMap *charts.Hea return createEmptyHeatMap(), nil } + // Limit to top-N developers by activity (diagonal value). + matrix, names = FilterTopDevs(matrix, names, maxHeatmapDevs) + + // Shorten "name|email" labels to just the name part. + shortNames := shortenDevNames(names) + co := plotpage.DefaultChartOpts() - maxVal := findMaxValue(matrix) + maxVal := findMaxOffDiagonal(matrix) + + if maxVal == 0 { + maxVal = findMaxValue(matrix) + } + data := buildHeatMapData(matrix, names) - hm := createHeatMapChart(names, maxVal, data, co) + height := dynamicHeatmapHeight(len(shortNames)) + hm := createHeatMapChart(shortNames, maxVal, data, co, height) return hm, nil } @@ -259,6 +324,49 @@ func findMaxValue(matrix []map[int]int64) (maxVal int64) { return maxVal } +// findMaxOffDiagonal returns the max value excluding diagonal cells (i==j). +// The diagonal represents self-activity which dominates the color scale. +func findMaxOffDiagonal(matrix []map[int]int64) (maxVal int64) { + for i, row := range matrix { + for j, val := range row { + if i != j && val > maxVal { + maxVal = val + } + } + } + + return maxVal +} + +// shortenDevNames extracts the name part from "name|email" format strings +// and truncates long names for chart readability. +func shortenDevNames(names []string) []string { + short := make([]string, len(names)) + + for i, name := range names { + if before, _, found := strings.Cut(name, "|"); found { + name = before + } + + if len(name) > maxDevNameLen { + name = name[:maxDevNameLen-3] + "..." + } + + short[i] = name + } + + return short +} + +// dynamicHeatmapHeight computes a height string based on developer count. +func dynamicHeatmapHeight(devCount int) string { + h := devCount*heatmapPerDev + heatmapPadding + + h = max(heatmapMinHeight, min(heatmapMaxHeight, h)) + + return fmt.Sprintf("%dpx", h) +} + func buildHeatMapData(matrix []map[int]int64, names []string) (data []opts.HeatMapData) { for i, row := range matrix { for j, val := range row { @@ -271,11 +379,11 @@ func buildHeatMapData(matrix []map[int]int64, names []string) (data []opts.HeatM return data } -func createHeatMapChart(names []string, maxVal int64, data []opts.HeatMapData, co *plotpage.ChartOpts) *charts.HeatMap { +func createHeatMapChart(names []string, maxVal int64, data []opts.HeatMapData, co *plotpage.ChartOpts, height string) *charts.HeatMap { hm := charts.NewHeatMap() hm.SetGlobalOptions( charts.WithTooltipOpts(co.Tooltip("item")), - charts.WithInitializationOpts(co.Init("100%", heatMapHeight)), + charts.WithInitializationOpts(co.Init("100%", height)), charts.WithDataZoomOpts(co.DataZoom()...), charts.WithXAxisOpts(opts.XAxis{ Type: "category", Data: names, @@ -321,3 +429,131 @@ func createEmptyHeatMap() *charts.HeatMap { return hm } + +// buildFileCouplingBarChart creates a horizontal bar chart of top coupled file pairs. +func buildFileCouplingBarChart(report analyze.Report) *charts.Bar { + input, err := ParseReportData(report) + if err != nil || len(input.Files) == 0 || len(input.FilesMatrix) == 0 { + return nil + } + + metric := NewFileCouplingMetric() + couples := metric.Compute(input) + + if len(couples) == 0 { + return nil + } + + shown := min(len(couples), maxFileCouples) + labels := make([]string, shown) + values := make([]opts.BarData, shown) + + for i, cp := range couples[:shown] { + labels[shown-1-i] = truncatePath(cp.File1) + " \u2194 " + truncatePath(cp.File2) // ↔ + values[shown-1-i] = opts.BarData{Value: cp.CoChanges} + } + + co := plotpage.DefaultChartOpts() + palette := plotpage.GetChartPalette(plotpage.ThemeDark) + + bar := charts.NewBar() + bar.SetGlobalOptions( + charts.WithTooltipOpts(co.Tooltip("axis")), + charts.WithInitializationOpts(co.Init("100%", barChartHeight)), + charts.WithGridOpts(opts.Grid{ + Left: "35%", Right: "5%", Top: "40", Bottom: "10%", + }), + charts.WithXAxisOpts(opts.XAxis{ + Type: "value", + AxisLabel: &opts.AxisLabel{FontSize: labelFontSize, Color: co.TextMutedColor()}, + }), + charts.WithYAxisOpts(opts.YAxis{ + Type: "category", Data: labels, + AxisLabel: &opts.AxisLabel{FontSize: labelFontSize, Color: co.TextMutedColor()}, + }), + ) + + bar.AddSeries("Co-changes", values, + charts.WithItemStyleOpts(opts.ItemStyle{Color: palette.Primary[0]}), + charts.WithLabelOpts(opts.Label{ + Show: opts.Bool(true), + Position: "right", + Color: co.TextMutedColor(), + FontSize: innerLabelSize, + }), + ) + + return bar +} + +// buildOwnershipPieChart creates a pie chart showing file ownership distribution. +func buildOwnershipPieChart(report analyze.Report) *charts.Pie { + input, err := ParseReportData(report) + if err != nil || len(input.Files) == 0 { + return nil + } + + metric := NewFileOwnershipMetric() + ownership := metric.Compute(input) + + if len(ownership) == 0 { + return nil + } + + buckets := BucketOwnership(ownership) + + co := plotpage.DefaultChartOpts() + palette := plotpage.GetChartPalette(plotpage.ThemeDark) + + pie := charts.NewPie() + pie.SetGlobalOptions( + charts.WithTooltipOpts(co.Tooltip("item")), + charts.WithInitializationOpts(co.Init(pieChartWidth, pieChartHeight)), + charts.WithLegendOpts(opts.Legend{ + Show: opts.Bool(true), + Top: "bottom", + TextStyle: &opts.TextStyle{Color: co.TextMutedColor()}, + }), + ) + + bucketColors := []string{ + palette.Semantic.Bad, + palette.Semantic.Good, + palette.Semantic.Warning, + palette.Primary[0], + } + + pieData := make([]opts.PieData, len(buckets)) + for i, b := range buckets { + pieData[i] = opts.PieData{ + Name: b.Label, + Value: b.Count, + ItemStyle: &opts.ItemStyle{Color: bucketColors[i]}, + } + } + + pie.AddSeries("Ownership", pieData). + SetSeriesOptions( + charts.WithLabelOpts(opts.Label{ + Show: opts.Bool(true), + Formatter: "{b}: {c} ({d}%)", + Color: co.TextMutedColor(), + }), + charts.WithPieChartOpts(opts.PieChart{ + Radius: pieRadius, + }), + ) + + return pie +} + +// truncatePath shortens a file path for chart labels. +func truncatePath(path string) string { + const maxLen = 30 + + if len(path) <= maxLen { + return path + } + + return "..." + path[len(path)-maxLen+3:] +} diff --git a/pkg/analyzers/couples/plot_test.go b/pkg/analyzers/couples/plot_test.go new file mode 100644 index 0000000..d8041f0 --- /dev/null +++ b/pkg/analyzers/couples/plot_test.go @@ -0,0 +1,347 @@ +package couples + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/Sumatoshi-tech/codefang/pkg/analyzers/analyze" +) + +func TestGeneratePlot_WithData(t *testing.T) { + t.Parallel() + + c := NewHistoryAnalyzer() + + report := analyze.Report{ + "Files": []string{"pkg/a.go", "pkg/b.go"}, + "FilesLines": []int{100, 200}, + "FilesMatrix": []map[int]int64{ + {0: 10, 1: 8}, + {0: 8, 1: 12}, + }, + "ReversedPeopleDict": []string{"alice", "bob"}, + "PeopleMatrix": []map[int]int64{ + {0: 20, 1: 10}, + {0: 10, 1: 15}, + }, + "PeopleFiles": [][]int{ + {0, 1}, + {0}, + }, + } + + var buf bytes.Buffer + + err := c.generatePlot(report, &buf) + require.NoError(t, err) + assert.NotEmpty(t, buf.String()) + assert.Contains(t, buf.String(), "Couples Analysis") +} + +func TestGeneratePlot_EmptyMatrix(t *testing.T) { + t.Parallel() + + c := NewHistoryAnalyzer() + + report := analyze.Report{ + "Files": []string{}, + "FilesLines": []int{}, + "FilesMatrix": []map[int]int64{}, + "ReversedPeopleDict": []string{}, + "PeopleMatrix": []map[int]int64{}, + "PeopleFiles": [][]int{}, + } + + var buf bytes.Buffer + + err := c.generatePlot(report, &buf) + require.NoError(t, err) + assert.NotEmpty(t, buf.String()) +} + +func TestSerializePlotFormat(t *testing.T) { + t.Parallel() + + c := NewHistoryAnalyzer() + + report := analyze.Report{ + "Files": []string{"f.go"}, + "FilesLines": []int{10}, + "FilesMatrix": []map[int]int64{{0: 5}}, + "ReversedPeopleDict": []string{"dev"}, + "PeopleMatrix": []map[int]int64{{0: 5}}, + "PeopleFiles": [][]int{{0}}, + } + + var buf bytes.Buffer + + err := c.Serialize(report, analyze.FormatPlot, &buf) + require.NoError(t, err) + assert.NotEmpty(t, buf.String()) +} + +func TestGenerateSections_WithData(t *testing.T) { + t.Parallel() + + c := NewHistoryAnalyzer() + + report := analyze.Report{ + "Files": []string{"a.go", "b.go", "c.go"}, + "FilesLines": []int{100, 200, 50}, + "FilesMatrix": []map[int]int64{ + {0: 10, 1: 8, 2: 3}, + {0: 8, 1: 12, 2: 2}, + {0: 3, 1: 2, 2: 5}, + }, + "ReversedPeopleDict": []string{"alice", "bob"}, + "PeopleMatrix": []map[int]int64{ + {0: 20, 1: 10}, + {0: 10, 1: 15}, + }, + "PeopleFiles": [][]int{ + {0, 1, 2}, + {0, 1}, + }, + } + + sections, err := c.GenerateSections(report) + require.NoError(t, err) + // Should have: file coupling bar, developer heatmap, ownership pie. + require.GreaterOrEqual(t, len(sections), 2) // At least heatmap + one more. + + // Check section titles are present. + titles := make([]string, len(sections)) + for i, s := range sections { + titles[i] = s.Title + } + + assert.Contains(t, titles, "Top File Couples") + assert.Contains(t, titles, "Developer Coupling Heatmap") + assert.Contains(t, titles, "File Ownership Distribution") +} + +func TestBuildFileCouplingBarChart_NoData(t *testing.T) { + t.Parallel() + + report := analyze.Report{} + chart := buildFileCouplingBarChart(report) + assert.Nil(t, chart) +} + +func TestBuildFileCouplingBarChart_WithData(t *testing.T) { + t.Parallel() + + report := analyze.Report{ + "Files": []string{"a.go", "b.go"}, + "FilesLines": []int{10, 20}, + "FilesMatrix": []map[int]int64{ + {0: 5, 1: 3}, + {0: 3, 1: 5}, + }, + "ReversedPeopleDict": []string{"dev"}, + "PeopleMatrix": []map[int]int64{{0: 5}}, + "PeopleFiles": [][]int{{0, 1}}, + } + + chart := buildFileCouplingBarChart(report) + assert.NotNil(t, chart) +} + +func TestBuildOwnershipPieChart_NoData(t *testing.T) { + t.Parallel() + + report := analyze.Report{} + chart := buildOwnershipPieChart(report) + assert.Nil(t, chart) +} + +func TestBuildOwnershipPieChart_WithData(t *testing.T) { + t.Parallel() + + report := analyze.Report{ + "Files": []string{"a.go", "b.go", "c.go"}, + "FilesLines": []int{10, 20, 30}, + "FilesMatrix": []map[int]int64{ + {0: 5}, {1: 5}, {2: 5}, + }, + "ReversedPeopleDict": []string{"alice", "bob"}, + "PeopleMatrix": []map[int]int64{ + {0: 10, 1: 5}, + {0: 5, 1: 8}, + }, + "PeopleFiles": [][]int{ + {0, 1}, + {1, 2}, + }, + } + + chart := buildOwnershipPieChart(report) + assert.NotNil(t, chart) +} + +func TestTruncatePath(t *testing.T) { + t.Parallel() + + // Short path — no truncation. + assert.Equal(t, "short.go", truncatePath("short.go")) + + // Long path — truncated. + long := "very/long/path/that/exceeds/thirty/characters/file.go" + result := truncatePath(long) + assert.LessOrEqual(t, len(result), 30) + assert.Equal(t, "...", result[:3]) +} + +func TestFindMaxValue(t *testing.T) { + t.Parallel() + + matrix := []map[int]int64{ + {0: 5, 1: 10}, + {0: 3, 1: 7}, + } + + assert.Equal(t, int64(10), findMaxValue(matrix)) +} + +func TestFindMaxValue_Empty(t *testing.T) { + t.Parallel() + + assert.Equal(t, int64(0), findMaxValue(nil)) + assert.Equal(t, int64(0), findMaxValue([]map[int]int64{})) +} + +func TestBuildHeatMapData(t *testing.T) { + t.Parallel() + + matrix := []map[int]int64{ + {0: 5, 1: 3}, + {0: 3, 1: 8}, + } + names := []string{"alice", "bob"} + + data := buildHeatMapData(matrix, names) + assert.NotEmpty(t, data) +} + +func TestExtractCouplesData_DirectExtraction(t *testing.T) { + t.Parallel() + + report := analyze.Report{ + "PeopleMatrix": []map[int]int64{{0: 5, 1: 3}, {0: 3, 1: 8}}, + "ReversedPeopleDict": []string{"alice", "bob"}, + } + + matrix, names, err := extractCouplesData(report) + require.NoError(t, err) + assert.Len(t, matrix, 2) + assert.Equal(t, []string{"alice", "bob"}, names) +} + +func TestExtractCouplesData_FromCouplingList(t *testing.T) { + t.Parallel() + + report := analyze.Report{ + "developer_coupling": []any{ + map[string]any{ + "developer1": "alice", + "developer2": "bob", + "shared_file_changes": float64(10), + }, + }, + } + + matrix, names, err := extractCouplesData(report) + require.NoError(t, err) + require.Len(t, names, 2) + require.Len(t, matrix, 2) +} + +func TestExtractCouplesData_Empty(t *testing.T) { + t.Parallel() + + report := analyze.Report{ + "developer_coupling": []any{}, + } + + matrix, names, err := extractCouplesData(report) + require.NoError(t, err) + assert.Nil(t, matrix) + assert.Nil(t, names) +} + +func TestFindMaxOffDiagonal(t *testing.T) { + t.Parallel() + + matrix := []map[int]int64{ + {0: 100, 1: 10}, + {0: 10, 1: 200}, + } + + // Diagonal values (100, 200) should be excluded. + assert.Equal(t, int64(10), findMaxOffDiagonal(matrix)) +} + +func TestFindMaxOffDiagonal_AllDiagonal(t *testing.T) { + t.Parallel() + + matrix := []map[int]int64{ + {0: 50}, + {1: 30}, + } + + assert.Equal(t, int64(0), findMaxOffDiagonal(matrix)) +} + +func TestFindMaxOffDiagonal_Empty(t *testing.T) { + t.Parallel() + + assert.Equal(t, int64(0), findMaxOffDiagonal(nil)) + assert.Equal(t, int64(0), findMaxOffDiagonal([]map[int]int64{})) +} + +func TestShortenDevNames(t *testing.T) { + t.Parallel() + + names := []string{ + "alice|alice@example.com", + "bob", + "a-very-long-developer-name-that-exceeds-limit|long@email.com", + } + + short := shortenDevNames(names) + assert.Equal(t, "alice", short[0]) + assert.Equal(t, "bob", short[1]) + assert.LessOrEqual(t, len(short[2]), maxDevNameLen) + assert.Contains(t, short[2], "...") +} + +func TestShortenDevNames_NoPipe(t *testing.T) { + t.Parallel() + + names := []string{"alice", "bob"} + short := shortenDevNames(names) + assert.Equal(t, []string{"alice", "bob"}, short) +} + +func TestDynamicHeatmapHeight(t *testing.T) { + t.Parallel() + + // Small: should clamp to min. + assert.Equal(t, "400px", dynamicHeatmapHeight(2)) + + // Medium: should compute normally. + assert.Equal(t, "500px", dynamicHeatmapHeight(10)) // 10*30+200=500 + + // Large: should clamp to max. + assert.Equal(t, "900px", dynamicHeatmapHeight(50)) +} + +func TestRegisterPlotSections(t *testing.T) { + t.Parallel() + + // Should not panic. + RegisterPlotSections() +} diff --git a/pkg/analyzers/couples/report_section.go b/pkg/analyzers/couples/report_section.go new file mode 100644 index 0000000..ed4567e --- /dev/null +++ b/pkg/analyzers/couples/report_section.go @@ -0,0 +1,217 @@ +package couples + +import ( + "fmt" + "sort" + "strconv" + + "github.com/Sumatoshi-tech/codefang/pkg/analyzers/analyze" +) + +// Section rendering constants. +const ( + ReportSectionTitle = "COUPLES" + + MetricTotalFiles = "Total Files" + MetricTotalDevelopers = "Total Developers" + MetricTotalCoChanges = "Total Co-Changes" + MetricHighlyCoupled = "Highly Coupled Pairs" + MetricAvgCoupling = "Avg Coupling" + + // DistStrongMin is the minimum coupling strength for "Strong" distribution bucket. + DistStrongMin = 0.7 + DistModerateMin = 0.4 + DistWeakMin = 0.1 + DistLabelStrong = "Strong (>70%)" + DistLabelMod = "Moderate (40-70%)" + DistLabelWeak = "Weak (10-40%)" + DistLabelNone = "Minimal (<10%)" + + // IssueSeverityHighMin is the minimum coupling strength for "high" severity issues. + IssueSeverityHighMin = 0.7 + IssueSeverityMedMin = 0.4 + + DefaultStatusMsg = "No coupling data available" +) + +// ReportSection implements analyze.ReportSection for couples analysis. +type ReportSection struct { + analyze.BaseReportSection + + metrics *ComputedMetrics +} + +// NewReportSection creates a ReportSection from a couples report. +func NewReportSection(report analyze.Report) *ReportSection { + if report == nil { + report = analyze.Report{} + } + + metrics, err := ComputeAllMetrics(report) + if err != nil { + metrics = &ComputedMetrics{} + } + + score, msg := computeScore(metrics) + + return &ReportSection{ + BaseReportSection: analyze.BaseReportSection{ + Title: ReportSectionTitle, + Message: msg, + ScoreValue: score, + }, + metrics: metrics, + } +} + +func computeScore(m *ComputedMetrics) (score float64, msg string) { + if m.Aggregate.TotalFiles == 0 { + return analyze.ScoreInfoOnly, DefaultStatusMsg + } + + // Score is 1.0 - avg_coupling (lower coupling = better score). + avg := m.Aggregate.AvgCouplingStrength + score = 1.0 - avg + + if score < 0 { + score = 0 + } + + const ( + goodThreshold = 0.7 + fairThreshold = 0.4 + ) + + switch { + case score >= goodThreshold: + msg = fmt.Sprintf("Good - low coupling across %d files", m.Aggregate.TotalFiles) + case score >= fairThreshold: + msg = fmt.Sprintf("Fair - moderate coupling (%d highly coupled pairs)", m.Aggregate.HighlyCoupledPairs) + default: + msg = fmt.Sprintf("Poor - high coupling (%d highly coupled pairs need attention)", m.Aggregate.HighlyCoupledPairs) + } + + return score, msg +} + +// KeyMetrics returns the key metrics for the couples section. +func (s *ReportSection) KeyMetrics() []analyze.Metric { + agg := s.metrics.Aggregate + + return []analyze.Metric{ + {Label: MetricTotalFiles, Value: strconv.Itoa(agg.TotalFiles)}, + {Label: MetricTotalDevelopers, Value: strconv.Itoa(agg.TotalDevelopers)}, + {Label: MetricTotalCoChanges, Value: strconv.FormatInt(agg.TotalCoChanges, 10)}, + {Label: MetricHighlyCoupled, Value: strconv.Itoa(agg.HighlyCoupledPairs)}, + {Label: MetricAvgCoupling, Value: fmt.Sprintf("%.0f%%", agg.AvgCouplingStrength*pctMultiplier)}, + } +} + +// Distribution returns coupling strength distribution categories. +func (s *ReportSection) Distribution() []analyze.DistributionItem { + couples := s.metrics.FileCoupling + if len(couples) == 0 { + return nil + } + + counts := categorizeStrength(couples) + total := len(couples) + + return []analyze.DistributionItem{ + {Label: DistLabelStrong, Percent: pct(counts.strong, total), Count: counts.strong}, + {Label: DistLabelMod, Percent: pct(counts.moderate, total), Count: counts.moderate}, + {Label: DistLabelWeak, Percent: pct(counts.weak, total), Count: counts.weak}, + {Label: DistLabelNone, Percent: pct(counts.minimal, total), Count: counts.minimal}, + } +} + +// TopIssues returns the top N most coupled file pairs. +func (s *ReportSection) TopIssues(n int) []analyze.Issue { + issues := s.buildSortedIssues() + if n >= len(issues) { + return issues + } + + return issues[:n] +} + +// AllIssues returns all coupled file pairs sorted by strength descending. +func (s *ReportSection) AllIssues() []analyze.Issue { + return s.buildSortedIssues() +} + +func (s *ReportSection) buildSortedIssues() []analyze.Issue { + couples := s.metrics.FileCoupling + if len(couples) == 0 { + return nil + } + + issues := make([]analyze.Issue, 0, len(couples)) + + for _, cp := range couples { + issues = append(issues, analyze.Issue{ + Name: cp.File1 + " \u2194 " + cp.File2, + Value: fmt.Sprintf("%.0f%% (%d\u00d7)", cp.Strength*pctMultiplier, cp.CoChanges), + Severity: severityForStrength(cp.Strength), + }) + } + + // Sort by strength descending (highest coupling = most concerning = first). + sort.Slice(issues, func(i, j int) bool { + return issues[i].Value > issues[j].Value + }) + + return issues +} + +// CreateReportSection creates a ReportSection from report data. +func (c *HistoryAnalyzer) CreateReportSection(report analyze.Report) analyze.ReportSection { + return NewReportSection(report) +} + +// --- Distribution helpers ---. + +type strengthDistCounts struct { + strong int + moderate int + weak int + minimal int +} + +func categorizeStrength(couples []FileCouplingData) strengthDistCounts { + var counts strengthDistCounts + + for _, cp := range couples { + switch { + case cp.Strength >= DistStrongMin: + counts.strong++ + case cp.Strength >= DistModerateMin: + counts.moderate++ + case cp.Strength >= DistWeakMin: + counts.weak++ + default: + counts.minimal++ + } + } + + return counts +} + +func severityForStrength(strength float64) string { + switch { + case strength >= IssueSeverityHighMin: + return analyze.SeverityPoor + case strength >= IssueSeverityMedMin: + return analyze.SeverityFair + default: + return analyze.SeverityGood + } +} + +func pct(count, total int) float64 { + if total == 0 { + return 0 + } + + return float64(count) / float64(total) +} diff --git a/pkg/analyzers/couples/report_section_test.go b/pkg/analyzers/couples/report_section_test.go new file mode 100644 index 0000000..a12bc66 --- /dev/null +++ b/pkg/analyzers/couples/report_section_test.go @@ -0,0 +1,318 @@ +package couples + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/Sumatoshi-tech/codefang/pkg/analyzers/analyze" +) + +func TestNewReportSection_NilReport(t *testing.T) { + t.Parallel() + + rs := NewReportSection(nil) + require.NotNil(t, rs) + assert.Equal(t, ReportSectionTitle, rs.Title) + assert.InDelta(t, analyze.ScoreInfoOnly, rs.Score(), 0.001) + assert.Equal(t, DefaultStatusMsg, rs.Message) +} + +func TestNewReportSection_EmptyReport(t *testing.T) { + t.Parallel() + + rs := NewReportSection(analyze.Report{}) + require.NotNil(t, rs) + assert.InDelta(t, analyze.ScoreInfoOnly, rs.Score(), 0.001) + assert.Equal(t, DefaultStatusMsg, rs.Message) +} + +func TestNewReportSection_WithData(t *testing.T) { + t.Parallel() + + report := analyze.Report{ + "Files": []string{"a.go", "b.go", "c.go"}, + "FilesLines": []int{100, 200, 50}, + "FilesMatrix": []map[int]int64{ + {0: 10, 1: 8, 2: 3}, + {0: 8, 1: 12, 2: 2}, + {0: 3, 1: 2, 2: 5}, + }, + "ReversedPeopleDict": []string{"alice", "bob"}, + "PeopleMatrix": []map[int]int64{ + {0: 20, 1: 10}, + {0: 10, 1: 15}, + }, + "PeopleFiles": [][]int{ + {0, 1, 2}, + {0, 1}, + }, + } + + rs := NewReportSection(report) + require.NotNil(t, rs) + + // Score should be between 0 and 1 (not info-only). + score := rs.Score() + assert.Greater(t, score, 0.0) + assert.LessOrEqual(t, score, 1.0) + + // Title should match. + assert.Equal(t, ReportSectionTitle, rs.Title) + + // Message should contain files count. + assert.NotEmpty(t, rs.Message) +} + +func TestReportSection_KeyMetrics(t *testing.T) { + t.Parallel() + + report := analyze.Report{ + "Files": []string{"a.go", "b.go"}, + "FilesLines": []int{10, 20}, + "FilesMatrix": []map[int]int64{ + {0: 5, 1: 3}, + {0: 3, 1: 5}, + }, + "ReversedPeopleDict": []string{"dev1", "dev2"}, + "PeopleMatrix": []map[int]int64{ + {0: 10, 1: 5}, + {0: 5, 1: 8}, + }, + "PeopleFiles": [][]int{ + {0, 1}, + {0}, + }, + } + + rs := NewReportSection(report) + km := rs.KeyMetrics() + require.NotEmpty(t, km) + + labels := make(map[string]string) + for _, m := range km { + labels[m.Label] = m.Value + } + + assert.Equal(t, "2", labels[MetricTotalFiles]) + assert.Equal(t, "2", labels[MetricTotalDevelopers]) + assert.Contains(t, labels, MetricTotalCoChanges) + assert.Contains(t, labels, MetricHighlyCoupled) + assert.Contains(t, labels, MetricAvgCoupling) +} + +func TestReportSection_Distribution(t *testing.T) { + t.Parallel() + + report := analyze.Report{ + "Files": []string{"a.go", "b.go", "c.go"}, + "FilesLines": []int{10, 20, 30}, + "FilesMatrix": []map[int]int64{ + {0: 10, 1: 8, 2: 1}, + {0: 8, 1: 10, 2: 2}, + {0: 1, 1: 2, 2: 10}, + }, + "ReversedPeopleDict": []string{"dev"}, + "PeopleMatrix": []map[int]int64{{0: 5}}, + "PeopleFiles": [][]int{{0, 1, 2}}, + } + + rs := NewReportSection(report) + dist := rs.Distribution() + require.NotEmpty(t, dist) + + // Verify all distribution labels present. + labelSet := map[string]bool{} + for _, d := range dist { + labelSet[d.Label] = true + } + + assert.True(t, labelSet[DistLabelStrong]) + assert.True(t, labelSet[DistLabelMod]) + assert.True(t, labelSet[DistLabelWeak]) + assert.True(t, labelSet[DistLabelNone]) +} + +func TestReportSection_Distribution_Empty(t *testing.T) { + t.Parallel() + + rs := NewReportSection(analyze.Report{}) + dist := rs.Distribution() + assert.Nil(t, dist) +} + +func TestReportSection_TopIssues(t *testing.T) { + t.Parallel() + + report := analyze.Report{ + "Files": []string{"a.go", "b.go", "c.go"}, + "FilesLines": []int{10, 20, 30}, + "FilesMatrix": []map[int]int64{ + {0: 10, 1: 8, 2: 3}, + {0: 8, 1: 12, 2: 2}, + {0: 3, 1: 2, 2: 5}, + }, + "ReversedPeopleDict": []string{"dev"}, + "PeopleMatrix": []map[int]int64{{0: 5}}, + "PeopleFiles": [][]int{{0, 1, 2}}, + } + + rs := NewReportSection(report) + + // Request more than available — should return all. + all := rs.TopIssues(100) + require.NotEmpty(t, all) + + // Request top 1. + top1 := rs.TopIssues(1) + require.Len(t, top1, 1) + + // Issue should contain file names and severity. + assert.Contains(t, top1[0].Name, "\u2194") // ↔ + assert.NotEmpty(t, top1[0].Value) + assert.NotEmpty(t, top1[0].Severity) +} + +func TestReportSection_AllIssues(t *testing.T) { + t.Parallel() + + report := analyze.Report{ + "Files": []string{"a.go", "b.go"}, + "FilesLines": []int{10, 20}, + "FilesMatrix": []map[int]int64{ + {0: 5, 1: 3}, + {0: 3, 1: 5}, + }, + "ReversedPeopleDict": []string{"dev"}, + "PeopleMatrix": []map[int]int64{{0: 5}}, + "PeopleFiles": [][]int{{0, 1}}, + } + + rs := NewReportSection(report) + all := rs.AllIssues() + require.Len(t, all, 1) + + assert.Contains(t, all[0].Name, "a.go") + assert.Contains(t, all[0].Name, "b.go") +} + +func TestReportSection_AllIssues_Empty(t *testing.T) { + t.Parallel() + + rs := NewReportSection(analyze.Report{}) + assert.Nil(t, rs.AllIssues()) +} + +func TestComputeScore_GoodCoupling(t *testing.T) { + t.Parallel() + + // Low avg coupling → high score. + m := &ComputedMetrics{ + Aggregate: AggregateData{ + TotalFiles: 10, + AvgCouplingStrength: 0.1, + }, + } + + score, msg := computeScore(m) + assert.InDelta(t, 0.9, score, 0.01) + assert.Contains(t, msg, "Good") +} + +func TestComputeScore_FairCoupling(t *testing.T) { + t.Parallel() + + m := &ComputedMetrics{ + Aggregate: AggregateData{ + TotalFiles: 10, + AvgCouplingStrength: 0.5, + HighlyCoupledPairs: 3, + }, + } + + score, msg := computeScore(m) + assert.InDelta(t, 0.5, score, 0.01) + assert.Contains(t, msg, "Fair") +} + +func TestComputeScore_PoorCoupling(t *testing.T) { + t.Parallel() + + m := &ComputedMetrics{ + Aggregate: AggregateData{ + TotalFiles: 10, + AvgCouplingStrength: 0.8, + HighlyCoupledPairs: 5, + }, + } + + score, msg := computeScore(m) + assert.InDelta(t, 0.2, score, 0.01) + assert.Contains(t, msg, "Poor") +} + +func TestComputeScore_NoFiles(t *testing.T) { + t.Parallel() + + m := &ComputedMetrics{} + score, msg := computeScore(m) + assert.InDelta(t, analyze.ScoreInfoOnly, score, 0.001) + assert.Equal(t, DefaultStatusMsg, msg) +} + +func TestSeverityForStrength(t *testing.T) { + t.Parallel() + + assert.Equal(t, analyze.SeverityPoor, severityForStrength(0.9)) + assert.Equal(t, analyze.SeverityPoor, severityForStrength(0.7)) + assert.Equal(t, analyze.SeverityFair, severityForStrength(0.5)) + assert.Equal(t, analyze.SeverityFair, severityForStrength(0.4)) + assert.Equal(t, analyze.SeverityGood, severityForStrength(0.3)) + assert.Equal(t, analyze.SeverityGood, severityForStrength(0.0)) +} + +func TestCategorizeStrength(t *testing.T) { + t.Parallel() + + couples := []FileCouplingData{ + {Strength: 0.9}, // Strong. + {Strength: 0.75}, // Strong. + {Strength: 0.5}, // Moderate. + {Strength: 0.2}, // Weak. + {Strength: 0.05}, // Minimal. + } + + counts := categorizeStrength(couples) + assert.Equal(t, 2, counts.strong) + assert.Equal(t, 1, counts.moderate) + assert.Equal(t, 1, counts.weak) + assert.Equal(t, 1, counts.minimal) +} + +func TestPct(t *testing.T) { + t.Parallel() + + assert.InDelta(t, 0.5, pct(5, 10), 0.001) + assert.InDelta(t, 1.0, pct(3, 3), 0.001) + assert.InDelta(t, 0.0, pct(0, 10), 0.001) + assert.InDelta(t, 0.0, pct(5, 0), 0.001) // Division by zero → 0. +} + +func TestCreateReportSection_ViaHistoryAnalyzer(t *testing.T) { + t.Parallel() + + c := NewHistoryAnalyzer() + report := analyze.Report{ + "Files": []string{"f.go"}, + "FilesLines": []int{10}, + "FilesMatrix": []map[int]int64{{0: 5}}, + "ReversedPeopleDict": []string{"dev"}, + "PeopleMatrix": []map[int]int64{{0: 5}}, + "PeopleFiles": [][]int{{0}}, + } + + rs := c.CreateReportSection(report) + require.NotNil(t, rs) + assert.Equal(t, ReportSectionTitle, rs.SectionTitle()) +} diff --git a/pkg/analyzers/couples/text.go b/pkg/analyzers/couples/text.go new file mode 100644 index 0000000..04862b0 --- /dev/null +++ b/pkg/analyzers/couples/text.go @@ -0,0 +1,200 @@ +package couples + +import ( + "fmt" + "io" + "strconv" + + "github.com/Sumatoshi-tech/codefang/pkg/analyzers/analyze" + "github.com/Sumatoshi-tech/codefang/pkg/analyzers/common/terminal" +) + +// Text output constants. +const ( + textMaxFileCouples = 7 + textMaxDevCouples = 7 + textMaxOwnership = 7 + textNameWidth = 25 + textIndent = " " + textOwnershipWidth = textNameWidth*2 + 3 // file1 + separator + file2 width. + textIndentBothSide = 2 // indent on both sides. + pctMultiplier = 100 + singleContributor = 1 +) + +// generateText writes a human-readable couples summary to the writer. +func (c *HistoryAnalyzer) generateText(report analyze.Report, writer io.Writer) error { + metrics, err := ComputeAllMetrics(report) + if err != nil { + return fmt.Errorf("compute metrics: %w", err) + } + + cfg := terminal.NewConfig() + width := cfg.Width + agg := metrics.Aggregate + + // Header. + header := terminal.DrawHeader( + "Couples", + fmt.Sprintf("%d files", agg.TotalFiles), + width, + ) + fmt.Fprintln(writer, header) + fmt.Fprintln(writer) + + // Summary section. + writeCouplesSummary(writer, cfg, agg) + + // Top file couples. + if len(metrics.FileCoupling) > 0 { + fmt.Fprintln(writer) + writeFileCouples(writer, cfg, metrics.FileCoupling) + } + + // Top developer couples. + if len(metrics.DeveloperCoupling) > 0 { + fmt.Fprintln(writer) + writeDevCouples(writer, cfg, metrics.DeveloperCoupling) + } + + // File ownership risk. + if len(metrics.FileOwnership) > 0 { + fmt.Fprintln(writer) + writeOwnershipRisk(writer, cfg, metrics.FileOwnership) + } + + fmt.Fprintln(writer) + + return nil +} + +func writeCouplesSummary(writer io.Writer, cfg terminal.Config, agg AggregateData) { + fmt.Fprintf(writer, "%s%s\n", textIndent, + cfg.Colorize("Summary", terminal.ColorBlue)) + fmt.Fprintf(writer, "%s%s\n", textIndent, + terminal.DrawSeparator(cfg.Width-len(textIndent)*textIndentBothSide)) + fmt.Fprintf(writer, "%s%-22s %-12s%-22s %s\n", textIndent, + "Total Files", formatCouplesInt(agg.TotalFiles), + "Total Developers", formatCouplesInt(agg.TotalDevelopers)) + fmt.Fprintf(writer, "%s%-22s %-12s%-22s %s\n", textIndent, + "Total Co-Changes", formatCouplesInt64(agg.TotalCoChanges), + "Highly Coupled Pairs", formatCouplesInt(agg.HighlyCoupledPairs)) + fmt.Fprintf(writer, "%s%-22s %s\n", textIndent, + "Avg Coupling", formatPct(agg.AvgCouplingStrength)) +} + +// coupleRow is a generic row for printing coupling pairs. +type coupleRow struct { + left string + right string + count int64 + strength float64 +} + +func writeFileCouples(writer io.Writer, cfg terminal.Config, couples []FileCouplingData) { + rows := make([]coupleRow, len(couples)) + for i, cp := range couples { + rows[i] = coupleRow{cp.File1, cp.File2, cp.CoChanges, cp.Strength} + } + + writeCoupleRows(writer, cfg, "Top File Couples", rows, textMaxFileCouples) +} + +func writeDevCouples(writer io.Writer, cfg terminal.Config, couples []DeveloperCouplingData) { + rows := make([]coupleRow, len(couples)) + for i, cp := range couples { + rows[i] = coupleRow{cp.Developer1, cp.Developer2, cp.SharedFiles, cp.Strength} + } + + writeCoupleRows(writer, cfg, "Top Developer Couples", rows, textMaxDevCouples) +} + +func writeCoupleRows(writer io.Writer, cfg terminal.Config, title string, rows []coupleRow, maxRows int) { + fmt.Fprintf(writer, "%s%s\n", textIndent, + cfg.Colorize(title, terminal.ColorBlue)) + fmt.Fprintf(writer, "%s%s\n", textIndent, + terminal.DrawSeparator(cfg.Width-len(textIndent)*textIndentBothSide)) + + shown := min(len(rows), maxRows) + + for _, r := range rows[:shown] { + left := terminal.TruncateWithEllipsis(r.left, textNameWidth) + right := terminal.TruncateWithEllipsis(r.right, textNameWidth) + strengthColor := colorForStrength(r.strength) + + fmt.Fprintf(writer, "%s%-*s %s %-*s %4d%s %s\n", + textIndent, + textNameWidth, left, + cfg.Colorize("\u2194", terminal.ColorGray), // ↔ + textNameWidth, right, + r.count, "\u00d7", // × + cfg.Colorize(formatPct(r.strength), strengthColor), + ) + } + + if len(rows) > maxRows { + fmt.Fprintf(writer, "%s%s\n", textIndent, + cfg.Colorize(fmt.Sprintf(" and %d more...", len(rows)-maxRows), terminal.ColorGray)) + } +} + +func writeOwnershipRisk(writer io.Writer, cfg terminal.Config, ownership []FileOwnershipData) { + fmt.Fprintf(writer, "%s%s\n", textIndent, + cfg.Colorize("File Ownership Risk", terminal.ColorBlue)) + fmt.Fprintf(writer, "%s%s\n", textIndent, + terminal.DrawSeparator(cfg.Width-len(textIndent)*textIndentBothSide)) + + // Show files with fewest contributors first (highest risk). + sorted := SortOwnershipByRisk(ownership) + shown := min(len(sorted), textMaxOwnership) + + for _, fo := range sorted[:shown] { + file := terminal.TruncateWithEllipsis(fo.File, textOwnershipWidth) + + risk := "" + if fo.Contributors <= singleContributor { + risk = cfg.Colorize(" !!", terminal.ColorRed) + } + + fmt.Fprintf(writer, "%s%-*s %5d lines %d contributors%s\n", + textIndent, + textOwnershipWidth, file, + fo.Lines, + fo.Contributors, + risk, + ) + } + + if len(sorted) > textMaxOwnership { + fmt.Fprintf(writer, "%s%s\n", textIndent, + cfg.Colorize(fmt.Sprintf(" and %d more...", len(sorted)-textMaxOwnership), terminal.ColorGray)) + } +} + +func colorForStrength(strength float64) terminal.Color { + const ( + highThreshold = 0.7 + medThreshold = 0.4 + ) + + switch { + case strength >= highThreshold: + return terminal.ColorRed + case strength >= medThreshold: + return terminal.ColorYellow + default: + return terminal.ColorGreen + } +} + +func formatPct(v float64) string { + return fmt.Sprintf("%.0f%%", v*pctMultiplier) +} + +func formatCouplesInt(n int) string { + return strconv.Itoa(n) +} + +func formatCouplesInt64(n int64) string { + return strconv.FormatInt(n, 10) +} diff --git a/pkg/analyzers/couples/text_test.go b/pkg/analyzers/couples/text_test.go new file mode 100644 index 0000000..b514dc4 --- /dev/null +++ b/pkg/analyzers/couples/text_test.go @@ -0,0 +1,130 @@ +package couples + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/Sumatoshi-tech/codefang/pkg/analyzers/analyze" + "github.com/Sumatoshi-tech/codefang/pkg/analyzers/common/terminal" +) + +func TestGenerateText_EmptyReport(t *testing.T) { + t.Parallel() + + c := NewHistoryAnalyzer() + + var buf bytes.Buffer + + err := c.generateText(analyze.Report{}, &buf) + require.NoError(t, err) + + output := buf.String() + assert.Contains(t, output, "Couples") + assert.Contains(t, output, "0 files") +} + +func TestGenerateText_WithData(t *testing.T) { + t.Parallel() + + c := NewHistoryAnalyzer() + + report := analyze.Report{ + "Files": []string{"pkg/a.go", "pkg/b.go", "pkg/c.go"}, + "FilesLines": []int{100, 200, 50}, + "ReversedPeopleDict": []string{"alice", "bob"}, + "FilesMatrix": []map[int]int64{ + {0: 10, 1: 8, 2: 3}, + {0: 8, 1: 12, 2: 2}, + {0: 3, 1: 2, 2: 5}, + }, + "PeopleMatrix": []map[int]int64{ + {0: 20, 1: 10}, + {0: 10, 1: 15}, + }, + "PeopleFiles": [][]int{ + {0, 1, 2}, + {0, 1}, + }, + } + + var buf bytes.Buffer + + err := c.generateText(report, &buf) + require.NoError(t, err) + + output := buf.String() + // Header. + assert.Contains(t, output, "Couples") + assert.Contains(t, output, "3 files") + // Summary. + assert.Contains(t, output, "Summary") + assert.Contains(t, output, "Total Files") + assert.Contains(t, output, "Total Developers") + // File couples. + assert.Contains(t, output, "Top File Couples") + assert.Contains(t, output, "pkg/a.go") + assert.Contains(t, output, "pkg/b.go") + // Developer couples. + assert.Contains(t, output, "Top Developer Couples") + assert.Contains(t, output, "alice") + assert.Contains(t, output, "bob") + // Ownership. + assert.Contains(t, output, "File Ownership Risk") +} + +func TestGenerateText_Serialize_TextFormat(t *testing.T) { + t.Parallel() + + c := NewHistoryAnalyzer() + + report := analyze.Report{ + "Files": []string{"f1.go"}, + "FilesLines": []int{10}, + "ReversedPeopleDict": []string{"dev"}, + "FilesMatrix": []map[int]int64{{0: 5}}, + "PeopleMatrix": []map[int]int64{{0: 10}}, + "PeopleFiles": [][]int{{0}}, + } + + var buf bytes.Buffer + + err := c.Serialize(report, analyze.FormatText, &buf) + require.NoError(t, err) + assert.Contains(t, buf.String(), "Couples") +} + +func TestSortOwnershipByRisk(t *testing.T) { + t.Parallel() + + ownership := []FileOwnershipData{ + {File: "a.go", Contributors: 3}, + {File: "b.go", Contributors: 1}, + {File: "c.go", Contributors: 2}, + } + + sorted := SortOwnershipByRisk(ownership) + + require.Len(t, sorted, 3) + assert.Equal(t, "b.go", sorted[0].File) // 1 contributor = highest risk. + assert.Equal(t, "c.go", sorted[1].File) + assert.Equal(t, "a.go", sorted[2].File) // 3 contributors = lowest risk. +} + +func TestColorForStrength(t *testing.T) { + t.Parallel() + + assert.Equal(t, terminal.ColorRed, colorForStrength(0.8)) + assert.Equal(t, terminal.ColorYellow, colorForStrength(0.5)) + assert.Equal(t, terminal.ColorGreen, colorForStrength(0.2)) +} + +func TestFormatPct(t *testing.T) { + t.Parallel() + + assert.Equal(t, "50%", formatPct(0.5)) + assert.Equal(t, "100%", formatPct(1.0)) + assert.Equal(t, "0%", formatPct(0.0)) +} diff --git a/site/analyzers/couples.md b/site/analyzers/couples.md index 54c672c..4ae7804 100644 --- a/site/analyzers/couples.md +++ b/site/analyzers/couples.md @@ -18,16 +18,24 @@ codefang run -a history/couples . A square matrix where each cell `[i][j]` counts the number of commits in which files `i` and `j` both appeared. High co-change counts indicate tight coupling between files. -!!! info "Meaningful context size" - Commits touching more than **1000 files** are excluded from coupling analysis, since mass changes (e.g., formatting, license updates) produce noise rather than signal. +**Coupling strength** is computed using the code-maat formula: + +``` +strength = co_changes / average(revisions_file1, revisions_file2) +``` + +Where `revisions_fileN` is the diagonal element of the file matrix (the file's self-change count). The result is capped at 1.0. This normalizes coupling by the average activity of both files, so a pair of files that change together 10 times out of 20 average revisions scores 50%. + +!!! info "Changeset size filter" + Commits touching more than **1000 files** are excluded from coupling analysis. Mass changes (e.g., formatting, license updates, dependency bumps) produce noise rather than meaningful coupling signal. ### Developer Coupling -A matrix where each cell `[i][j]` counts the number of times developers `i` and `j` committed to the same file. This reveals collaboration patterns and shared code ownership. +A matrix where each cell `[i][j]` counts the number of times developers `i` and `j` committed to the same file. Developer coupling strength uses the same code-maat formula normalized by each developer's individual commit activity. ### File Ownership -For each tracked file, the analyzer reports its line count and the number of distinct contributors. +For each tracked file, the analyzer reports its line count and the number of distinct contributors. Single-owner files are flagged as bus-factor risks. ### Rename Tracking @@ -45,6 +53,40 @@ The couples analyzer has no additional configuration options. It uses the identi --- +## Output Formats + +### Text + +The text format provides a terminal-friendly summary with: + +- **Summary**: Total files, developers, co-changes, highly coupled pairs, average coupling strength +- **Top File Couples**: Top 7 most coupled file pairs with co-change count and strength percentage +- **Top Developer Couples**: Top 7 developer pairs with shared file changes and coupling strength +- **File Ownership Risk**: Files sorted by fewest contributors (highest bus-factor risk first), with single-contributor files flagged + +### Plot (HTML) + +The HTML plot includes three interactive chart sections: + +1. **Top File Couples** — Horizontal bar chart of the 20 most co-changed file pairs +2. **Developer Coupling Heatmap** — Matrix showing developer collaboration intensity +3. **File Ownership Distribution** — Pie chart categorizing files by contributor count (single owner, 2-3, 4-5, 6+) + +### JSON / YAML + +Structured output with four top-level sections: `file_coupling`, `developer_coupling`, `file_ownership`, and `aggregate`. + +### Executive Summary (ReportSection) + +The couples analyzer provides a `ReportSection` for use in combined reports: + +- **Score**: `1.0 - avg_coupling_strength` (lower coupling = better score, 0-1 scale) +- **Key Metrics**: Total files, developers, co-changes, highly coupled pairs, average coupling +- **Distribution**: Coupling strength buckets — Strong (>70%), Moderate (40-70%), Weak (10-40%), Minimal (<10%) +- **Issues**: File pairs sorted by coupling strength descending, with severity labels (poor/fair/good) + +--- + ## Example Output === "JSON" @@ -84,7 +126,7 @@ The couples analyzer has no additional configuration options. It uses the identi "total_files": 342, "total_developers": 5, "total_co_changes": 12500, - "avg_coupling_strength": 4.2, + "avg_coupling_strength": 0.42, "highly_coupled_pairs": 23 } } @@ -103,9 +145,15 @@ The couples analyzer has no additional configuration options. It uses the identi developer2: bob shared_file_changes: 234 coupling_strength: 0.65 + file_ownership: + - file: pkg/core/engine.go + lines: 450 + contributors: 3 aggregate: total_files: 342 total_developers: 5 + total_co_changes: 12500 + avg_coupling_strength: 0.42 highly_coupled_pairs: 23 ``` @@ -118,6 +166,7 @@ The couples analyzer has no additional configuration options. It uses the identi - **Module boundary validation**: Files within the same module should be more coupled than files across modules. Cross-module coupling is a design smell. - **Team topology**: Developer coupling reveals who collaborates with whom. This can inform team structure decisions. - **Change impact prediction**: When modifying a file, the coupling matrix predicts which other files are likely to need changes. +- **Bus factor assessment**: File ownership data identifies single-owner files that represent knowledge concentration risks. --- @@ -125,20 +174,35 @@ The couples analyzer has no additional configuration options. It uses the identi The couples analyzer follows the **TC/Aggregator pattern**: -1. **Consume phase**: Each commit produces a `TC{Data: *CommitData}` containing the coupling context (list of co-changed files), per-file author touch counts, rename pairs, and whether the author's commit count was incremented. The `CouplesMaximumMeaningfulContextSize` filter (1000 files) is applied before TC emission. +1. **Consume phase**: Each commit produces a `TC{Data: *CommitData}` containing the coupling context (list of co-changed files), per-file author touch counts (always 1 per file per commit), rename pairs, and whether the author's commit count was incremented. Commits exceeding `CouplesMaximumMeaningfulContextSize` (1000 files) are skipped, producing an empty coupling context. 2. **Aggregation phase**: The `Aggregator` accumulates the file co-occurrence matrix using `SpillStore[map[string]int]`, along with per-person file touch counts (`people`), commit counts (`peopleCommits`), and rename tracking. When memory pressure exceeds `SpillBudget`, the file coupling matrix is spilled to disk via gob encoding. `Collect()` merges spilled data using additive merge semantics. -3. **Serialization phase**: `ticksToReport()` reconstructs the full report (PeopleMatrix, PeopleFiles, Files, FilesLines, FilesMatrix, ReversedPeopleDict) from aggregated TICKs, then `ComputeAllMetrics()` produces the final JSON/YAML/plot output. +3. **Serialization phase**: `ticksToReport()` reconstructs the full report (PeopleMatrix, PeopleFiles, Files, FilesLines, FilesMatrix, ReversedPeopleDict) from aggregated TICKs, then `ComputeAllMetrics()` produces the final typed output for any format (text, plot, JSON, YAML, binary). **Working state** (`merges`, `seenFiles`) stays in the analyzer for merge-mode dedup across commits. **Accumulated output** (file couplings, people maps, renames) is owned entirely by the aggregator. --- +## Methodology + +The coupling strength formula follows the **code-maat** academic standard (Adam Tornhill, "Your Code as a Crime Scene"): + +``` +degree = shared_revisions / average(revisions_A, revisions_B) +``` + +This measures what fraction of a file pair's average activity is shared. A coupling strength of 80% means the pair changes together in 80% of their average revision count. + +The aggregate `avg_coupling_strength` is the mean of all per-pair coupling strengths (not the mean of raw co-change counts). + +**Highly coupled pairs** are those with 10 or more co-changes (raw count threshold). + +--- + ## Limitations -- **Large commits excluded**: Commits touching more than 1000 files are ignored to avoid noise from mass changes (formatting, license headers, dependency updates). -- **Coupling strength**: The coupling strength metric is normalized by the maximum co-change count for the pair. It does not account for the total number of commits each file has individually. +- **Large commits excluded**: Commits touching more than 1000 files are skipped to avoid noise from mass changes (formatting, license headers, dependency updates). - **No temporal decay**: All commits are weighted equally. A coupling from three years ago counts the same as one from yesterday. Consider filtering by date range for recent coupling analysis. - **Merge commits**: Merge commits are processed only once (first encounter) to avoid double-counting changes that appear in multiple branches. - **File deletions**: Deleted files are included in the coupling matrix during the analysis window but may not appear in the final output if they no longer exist at HEAD.