From 6e3f0474c82fa0fac2edec7d75ba1eb524a1d6e3 Mon Sep 17 00:00:00 2001 From: lex0c Date: Tue, 21 Apr 2026 14:01:14 -0300 Subject: [PATCH 1/5] Add per-dev top commits to dev profile Mirrors dataset-level TopCommits per developer: largest individual commits by lines changed, capped at 10 with a hidden counter. Shown in the CLI profile output and the standalone --email HTML profile page; intentionally not surfaced on the main report's Developer Profiles cards to keep those compact. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/report/profile_template.go | 20 ++++ internal/stats/format.go | 12 +++ internal/stats/stats.go | 82 ++++++++++++++++- internal/stats/stats_test.go | 136 ++++++++++++++++++++++++++++ 4 files changed, 249 insertions(+), 1 deletion(-) diff --git a/internal/report/profile_template.go b/internal/report/profile_template.go index 5fedc3a..4b2062b 100644 --- a/internal/report/profile_template.go +++ b/internal/report/profile_template.go @@ -190,6 +190,26 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} +{{if .Profile.TopCommits}} +

Top Commits

+

This developer's largest individual commits by lines changed (additions + deletions). A handful of outsized commits (vendored drops, bulk renames, generated code) reads very differently from a steady stream of medium-sized ones, even when the totals match.

+ + +{{range .Profile.TopCommits}} + + + + + + + +{{end}} +{{if gt .Profile.TopCommitsHidden 0}} + +{{end}} +
SHADateLinesFilesMessage
{{slice .SHA 0 12}}{{.Date}}{{thousands .LinesChanged}}{{thousands .FilesChanged}}{{.Message}}
+{{.Profile.TopCommitsHidden}} more commits not shown
+{{end}} + {{if .ActivityYears}}

Activity

Monthly commit heatmap. Darker = more commits. Gaps = inactive periods; steady cadence signals healthy pace. Hover for details; toggle to table for exact numbers. · {{docRef "activity"}}

diff --git a/internal/stats/format.go b/internal/stats/format.go index 3701812..4328a7e 100644 --- a/internal/stats/format.go +++ b/internal/stats/format.go @@ -520,6 +520,18 @@ func (f *Formatter) PrintProfiles(profiles []DevProfile) error { } } + if len(p.TopCommits) > 0 { + fmt.Fprintln(f.w) + fmt.Fprintln(f.w, " Top commits:") + for _, tc := range p.TopCommits { + fmt.Fprintf(f.w, " %s %s %6d lines %3d files %s\n", + tc.SHA[:12], tc.Date, tc.LinesChanged, tc.FilesChanged, tc.Message) + } + if p.TopCommitsHidden > 0 { + fmt.Fprintf(f.w, " ... (+%d more commits not shown)\n", p.TopCommitsHidden) + } + } + if len(p.MonthlyActivity) > 0 { fmt.Fprintln(f.w, " Activity:") maxCommits := 0 diff --git a/internal/stats/stats.go b/internal/stats/stats.go index 0719041..f8fe1a6 100644 --- a/internal/stats/stats.go +++ b/internal/stats/stats.go @@ -1328,6 +1328,14 @@ type DevProfile struct { // whole footprint or just a sample. Zero when the dev's touched // file count fits in 10. TopFilesHidden int + // TopCommits is the dev's largest commits by LinesChanged (add+del), + // capped at 10. Mirrors the dataset-level TopCommits metric so a + // reader can see which individual commits drive this dev's churn + // footprint — a handful of huge vendored-drop commits reads very + // differently from a steady stream of medium ones, even when the + // totals match. TopCommitsHidden follows the TopFilesHidden pattern. + TopCommits []DevCommit + TopCommitsHidden int Scope []DirScope // ScopeHidden / ExtensionsHidden count the buckets dropped by the // top-5 truncation so CLI and HTML can surface "+N more" — without @@ -1369,6 +1377,22 @@ type DevFileContrib struct { Churn int64 } +// DevCommit is a single commit attributed to the dev, carrying the +// fields needed to render the per-dev "top commits" list. Mirrors the +// shape of BigCommit (the dataset-level TopCommits type) minus the +// AuthorName/AuthorEmail fields — those are redundant in a per-dev view +// where every entry belongs to the same author. Message is truncated +// at 80 chars (same as TopCommits) to keep the CLI/HTML table narrow. +type DevCommit struct { + SHA string + Date string + Message string + Additions int64 + Deletions int64 + LinesChanged int64 + FilesChanged int +} + // DevExtContrib is a dev's footprint in a single extension bucket. // Churn is the summed per-file dev-lines (from fe.devLines), so it // reflects lines the dev personally added/removed across files that @@ -1525,16 +1549,44 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile { // Per-dev work grid + monthly activity devGrid := make(map[string]*[7][24]int) devMonthly := make(map[string]map[string]*ActivityBucket) + // Per-dev commit list for TopCommits ranking. Collected in the same + // ds.commits pass as devGrid/devMonthly so we don't iterate the full + // commit map twice; actual sort + top-10 truncation happens in the + // per-dev assembly loop below. + devCommits := make(map[string][]DevCommit) dayIdx := [7]int{6, 0, 1, 2, 3, 4, 5} // Sunday=6, Monday=0, ... - for _, cm := range ds.commits { + for sha, cm := range ds.commits { if !inTarget(cm.email) { continue } if cm.date.IsZero() { + // Note: dataset-level TopCommits() renders zero-date commits + // as "0001-01-01"; we drop them here because grid/monthly below + // share this guard and malformed-date commits are rare enough + // in practice (JSONL extract always emits author_date) that + // the divergence is not worth branching the loop for. continue } + // Message is stored un-truncated on purpose: the 80-char + // truncation is deferred to the per-dev assembly loop below, + // which runs after sort + top-10 cap. A dev with thousands of + // commits would otherwise pay N small string allocations here + // just to throw away all but 10. Dataset-level TopCommits() + // truncates inline because it builds BigCommits in one pass; + // the per-dev path splits collection from projection so we can + // avoid that cost. + devCommits[cm.email] = append(devCommits[cm.email], DevCommit{ + SHA: sha, + Date: cm.date.UTC().Format("2006-01-02"), + Message: cm.message, + Additions: cm.add, + Deletions: cm.del, + LinesChanged: cm.add + cm.del, + FilesChanged: cm.files, + }) + if devGrid[cm.email] == nil { devGrid[cm.email] = &[7][24]int{} } @@ -1585,6 +1637,33 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile { } } + // Top commits: rank this dev's commits by lines changed, mirroring + // the dataset-level TopCommits semantics. Deterministic tiebreak on + // SHA asc so the displayed top-10 is stable across runs when a dev + // has several same-sized commits (e.g. a series of formatting + // passes each touching the same LOC count). Message truncation is + // done here, post-cap, so we pay the string-copy cost for at most + // 10 entries per dev instead of the full commit count. + topCommits := devCommits[email] + topCommitsHidden := 0 + if len(topCommits) > 0 { + sort.Slice(topCommits, func(i, j int) bool { + if topCommits[i].LinesChanged != topCommits[j].LinesChanged { + return topCommits[i].LinesChanged > topCommits[j].LinesChanged + } + return topCommits[i].SHA < topCommits[j].SHA + }) + if len(topCommits) > 10 { + topCommitsHidden = len(topCommits) - 10 + topCommits = topCommits[:10] + } + for i := range topCommits { + if len(topCommits[i].Message) > 80 { + topCommits[i].Message = topCommits[i].Message[:77] + "..." + } + } + } + var monthly []ActivityBucket if months, ok := devMonthly[email]; ok { var order []string @@ -1805,6 +1884,7 @@ func DevProfiles(ds *Dataset, filterEmail string, n int) []DevProfile { LinesChanged: cs.Additions + cs.Deletions, FilesTouched: cs.FilesTouched, ActiveDays: cs.ActiveDays, FirstDate: cs.FirstDate, LastDate: cs.LastDate, TopFiles: topFiles, TopFilesHidden: topFilesHidden, + TopCommits: topCommits, TopCommitsHidden: topCommitsHidden, Scope: scope, ScopeHidden: scopeHidden, Extensions: extensions, ExtensionsHidden: extensionsHidden, Specialization: specialization, diff --git a/internal/stats/stats_test.go b/internal/stats/stats_test.go index ba9e906..359a308 100644 --- a/internal/stats/stats_test.go +++ b/internal/stats/stats_test.go @@ -1958,6 +1958,142 @@ func TestDevProfilesContribType(t *testing.T) { } } +func TestDevProfilesTopCommits(t *testing.T) { + // Three devs × varying commit sizes. alice has 12 commits so the + // top-10 cap fires; bob has 3; carol has 1. The fixture deliberately + // includes two same-sized alice commits to exercise the SHA-asc + // tiebreak, a long-message commit to exercise the 80-char truncation, + // and a large bob commit to verify ranking is per-dev, not global. + ds := &Dataset{ + commits: map[string]*commitEntry{}, + contributors: map[string]*ContributorStat{}, + files: map[string]*fileEntry{}, + } + // Alice: 12 commits. Lines = 10*(i+1) except a tie at idx 4,5 both 500. + ds.contributors["alice@x"] = &ContributorStat{Name: "Alice", Email: "alice@x", Commits: 12, ActiveDays: 12} + for i := 0; i < 12; i++ { + sha := fmt.Sprintf("alice-%02d", i) + lines := int64(10 * (i + 1)) + if i == 5 { + lines = 50 // alice-04 (idx 4) stays 50; force idx 5 to same so + // alice-04 and alice-05 tie on 50 lines. + } + ds.commits[sha] = &commitEntry{ + email: "alice@x", + date: time.Date(2024, 1, 1+i, 10, 0, 0, 0, time.UTC), + add: lines, del: 0, files: 1, + } + } + // Long-message commit: 120 chars, should truncate to 77 + "..." (80 total). + longMsg := strings.Repeat("x", 120) + ds.commits["alice-LONG"] = &commitEntry{ + email: "alice@x", + date: time.Date(2024, 2, 1, 10, 0, 0, 0, time.UTC), + add: 9999, del: 0, files: 5, + message: longMsg, + } + ds.contributors["alice@x"].Commits = 13 + + // Bob: 3 small commits, plus one huge (9000 lines) that must NOT + // appear in alice's TopCommits even though it's globally the biggest + // after alice-LONG. + ds.contributors["bob@x"] = &ContributorStat{Name: "Bob", Email: "bob@x", Commits: 4, ActiveDays: 4} + for i := 0; i < 3; i++ { + sha := fmt.Sprintf("bob-%02d", i) + ds.commits[sha] = &commitEntry{ + email: "bob@x", + date: time.Date(2024, 3, 1+i, 10, 0, 0, 0, time.UTC), + add: 5, del: 0, files: 1, + } + } + ds.commits["bob-BIG"] = &commitEntry{ + email: "bob@x", + date: time.Date(2024, 4, 1, 10, 0, 0, 0, time.UTC), + add: 9000, del: 0, files: 3, + } + + profiles := DevProfiles(ds, "", 0) + var alice, bob *DevProfile + for i := range profiles { + switch profiles[i].Email { + case "alice@x": + alice = &profiles[i] + case "bob@x": + bob = &profiles[i] + } + } + if alice == nil || bob == nil { + t.Fatalf("missing profile: alice=%v bob=%v", alice, bob) + } + + // Top-10 cap: alice has 13 commits → TopCommits=10, Hidden=3. + if len(alice.TopCommits) != 10 { + t.Fatalf("alice TopCommits len = %d, want 10", len(alice.TopCommits)) + } + if alice.TopCommitsHidden != 3 { + t.Errorf("alice TopCommitsHidden = %d, want 3", alice.TopCommitsHidden) + } + + // Ranking: LinesChanged desc. alice-LONG is the biggest (9999). + if alice.TopCommits[0].SHA != "alice-LONG" { + t.Errorf("alice[0] = %q, want alice-LONG", alice.TopCommits[0].SHA) + } + for i := 1; i < len(alice.TopCommits); i++ { + if alice.TopCommits[i-1].LinesChanged < alice.TopCommits[i].LinesChanged { + t.Errorf("alice not lines-desc at idx %d: %d < %d", + i, alice.TopCommits[i-1].LinesChanged, alice.TopCommits[i].LinesChanged) + } + } + + // Message truncation: 77 + "..." = 80 chars. + if len(alice.TopCommits[0].Message) != 80 { + t.Errorf("long message len = %d, want 80 (77+...)", len(alice.TopCommits[0].Message)) + } + if !strings.HasSuffix(alice.TopCommits[0].Message, "...") { + t.Errorf("long message missing ellipsis: %q", alice.TopCommits[0].Message) + } + + // Per-dev isolation: no bob commits in alice's list. + for _, c := range alice.TopCommits { + if strings.HasPrefix(c.SHA, "bob-") { + t.Errorf("alice contains bob commit %q", c.SHA) + } + } + + // Bob has 4 commits, no truncation needed. + if len(bob.TopCommits) != 4 { + t.Errorf("bob TopCommits len = %d, want 4", len(bob.TopCommits)) + } + if bob.TopCommitsHidden != 0 { + t.Errorf("bob TopCommitsHidden = %d, want 0", bob.TopCommitsHidden) + } + if bob.TopCommits[0].SHA != "bob-BIG" { + t.Errorf("bob[0] = %q, want bob-BIG", bob.TopCommits[0].SHA) + } + + // Tiebreak: when LinesChanged ties, SHA asc wins. alice-04 and + // alice-05 both carry 50 lines. alice-04 must come first. + var tieIdx04, tieIdx05 = -1, -1 + for i, c := range alice.TopCommits { + if c.SHA == "alice-04" { + tieIdx04 = i + } + if c.SHA == "alice-05" { + tieIdx05 = i + } + } + if tieIdx04 >= 0 && tieIdx05 >= 0 && tieIdx04 > tieIdx05 { + t.Errorf("tiebreak broken: alice-04 at %d, alice-05 at %d (want 04 < 05)", tieIdx04, tieIdx05) + } + + // LinesChanged field equals add+del. + for _, c := range alice.TopCommits { + if c.LinesChanged != c.Additions+c.Deletions { + t.Errorf("%s: LinesChanged=%d, add+del=%d", c.SHA, c.LinesChanged, c.Additions+c.Deletions) + } + } +} + func TestRenameMergesHistory(t *testing.T) { // JSONL newest-first (as git log emits). Historical sequence: // 1) 2024-01 c1 creates old.go From fb72fbb5b9ba55d80456ac9247f3c984c664c347 Mon Sep 17 00:00:00 2001 From: lex0c Date: Tue, 21 Apr 2026 14:03:20 -0300 Subject: [PATCH 2/5] Document per-dev top commits in METRICS.md and README Adds the Top commits row to the Profile table (ranking key, tiebreak, message truncation, render surfaces, and the IsZero divergence from the dataset-level Top Commits section), a README bullet alongside Top files, and includes TopCommits in the deterministic-tiebreak note on per-profile sub-lists. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 1 + docs/METRICS.md | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1ab7645..1225420 100644 --- a/README.md +++ b/README.md @@ -296,6 +296,7 @@ Each profile includes: - **Collaboration**: top devs sharing the same files (ranked by `shared_lines` = Σ min(linesA, linesB)) - **Weekend %**: off-hours work ratio - **Top files**: most impacted files by churn +- **Top commits**: the dev's largest individual commits by lines changed (additions + deletions); surfaces vendored drops and bulk rewrites that can skew the totals ### Coupling analysis diff --git a/docs/METRICS.md b/docs/METRICS.md index ed9f2a4..9e199b0 100644 --- a/docs/METRICS.md +++ b/docs/METRICS.md @@ -219,6 +219,7 @@ Per-developer report combining multiple metrics. | Specialization | Herfindahl index over the **full** per-directory file-count distribution: Σ pᵢ² where pᵢ is the share of the dev's files in directory i. 1 = all files in one directory (narrow specialist); 1/N for a uniform spread across N directories; approaches 0 as the distribution widens. Computed before the top-5 Scope truncation so it reflects actual breadth. Labels (see `specBroadGeneralistMax`, `specBalancedMax`, `specFocusedMax` constants): `< 0.15` broad generalist, `< 0.35` balanced, `< 0.7` focused specialist, `≥ 0.7` narrow specialist. Herfindahl, not Gini, because Gini would collapse "1 file in 1 dir" and "1 file in each of 5 dirs" to the same value (both have zero inequality among buckets), which misses the specialization distinction. **Measures file distribution, not domain expertise** — see caveat below. **Display vs raw:** CLI and HTML show the value rounded to 3 decimals (`%.3f`) for readability; JSON output preserves the full float64. Band classification runs against the raw float, so a value like 0.149 lands in `broad generalist` even though %.2f would have rounded it to `0.15`. JSON consumers that reproduce the banding must use the raw value, not a rounded version. | | Contribution type | Based on del/add ratio: growth (<0.4), balanced (0.4-0.8), refactor (>0.8) | | Collaborators | Top 5 devs sharing code with this dev. Ranked by `shared_lines` (Σ min(linesA, linesB) across shared files), tiebreak `shared_files`, then email. Same `shared_lines` semantics as the Developer Network metric — discounts trivial one-line touches so "collaborator" reflects real overlap. | +| Top commits | The dev's top 10 commits by `lines_changed` (additions + deletions), tiebreak `sha asc`. Same ranking key and tiebreak as the dataset-level Top Commits section so the two read consistently side by side. Messages follow the same 80-character truncation rule and are only populated when `extract` ran with `--include-commit-messages`. Rendered in the CLI `profile` stat and in the standalone `--email` HTML profile page; intentionally omitted from the main report's Developer Profiles cards to keep those compact. **Divergence from dataset-level Top Commits:** commits with a zero `author_date` are dropped from the per-dev list (they share the guard that protects grid/monthly bucketing); the dataset-level section renders them as `0001-01-01`. Negligible in practice — the JSONL extract always emits `author_date` — but worth knowing if you compare the two views. | ## Top Commits @@ -373,7 +374,7 @@ Every ranking function has an explicit tiebreaker so the same input produces the | `dev-network` | shared_lines | shared_files | | `profile` | commits | email asc | -A third-level tiebreaker on path/sha/email asc is applied where primary and secondary can both tie (`churn-risk`, `coupling`, `dev-network`) so ordering is stable even with exact equality on the first two keys. Inside each profile, the `TopFiles`, `Scope`, and `Collaborators` sub-lists are also sorted with explicit tiebreakers (path / dir / email asc) so their internal ordering is deterministic too. +A third-level tiebreaker on path/sha/email asc is applied where primary and secondary can both tie (`churn-risk`, `coupling`, `dev-network`) so ordering is stable even with exact equality on the first two keys. Inside each profile, the `TopFiles`, `TopCommits`, `Scope`, and `Collaborators` sub-lists are also sorted with explicit tiebreakers (path / sha / dir / email asc) so their internal ordering is deterministic too. Inside `busfactor`, the per-file `TopDevs` list is sorted by lines desc with an email asc tiebreaker. Without it, binary assets and small files where two devs contribute equal lines (e.g. `.gif`, `.png`, one-line configs) produced a different `TopDevs` email order on every run. From e611f7a3ed1ade04027e90e3bc7e7dca0a25d054 Mon Sep 17 00:00:00 2001 From: lex0c Date: Tue, 21 Apr 2026 14:04:55 -0300 Subject: [PATCH 3/5] Show shown-of-total counter on Developer Profiles heading Mirrors the "X of Y" pattern already used on Top Contributors, Hotspots, Directories, and other top-N sections so a reader can tell at a glance that the cards below are a truncated slice of a larger dev set rather than the full roster. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/report/template.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/report/template.go b/internal/report/template.go index 908bf2a..15b1136 100644 --- a/internal/report/template.go +++ b/internal/report/template.go @@ -372,7 +372,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .Profiles}} -

Developer Profiles

+

Developer Profiles{{if lt (len .Profiles) .Summary.TotalDevs}} {{thousands (len .Profiles)}} of {{thousands .Summary.TotalDevs}}{{end}}

Per-developer view. Use to spot silos (narrow scope + few collaborators), knowledge concentration (high pace on few directories), and cultural patterns (weekend or refactor-heavy work). · {{docRef "profile"}}

{{range .Profiles}}
From fed9e2ff05f4793645241e2d339f39ce9f6b171e Mon Sep 17 00:00:00 2001 From: lex0c Date: Tue, 21 Apr 2026 14:20:04 -0300 Subject: [PATCH 4/5] Guard SHA truncation in Top commits renderers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both new Top-commits surfaces sliced SHA to a fixed 12 chars (`tc.SHA[:12]` in the CLI formatter, `{{slice .SHA 0 12}}` in the HTML profile template), which panics / errors when a dataset carries short commit IDs — LoadJSONL performs no length validation, so hand-built fixtures like "c1" or any future ingest path that emits abbreviated SHAs would crash profile text output or break template execution. CLI clamps via `if len(sha) > 12` before slicing; HTML uses `printf "%.12s"` which truncates safely on any length. The other SHA slice sites in format.go (TopCommits / LatestCommits) share the same latent risk and are left untouched to keep this change scoped. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/report/profile_template.go | 2 +- internal/stats/format.go | 13 +++++++++++- internal/stats/stats_test.go | 33 +++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) diff --git a/internal/report/profile_template.go b/internal/report/profile_template.go index 4b2062b..e645865 100644 --- a/internal/report/profile_template.go +++ b/internal/report/profile_template.go @@ -197,7 +197,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col SHADateLinesFilesMessage {{range .Profile.TopCommits}} - {{slice .SHA 0 12}} + {{printf "%.12s" .SHA}} {{.Date}} {{thousands .LinesChanged}} {{thousands .FilesChanged}} diff --git a/internal/stats/format.go b/internal/stats/format.go index 4328a7e..bab898a 100644 --- a/internal/stats/format.go +++ b/internal/stats/format.go @@ -524,8 +524,19 @@ func (f *Formatter) PrintProfiles(profiles []DevProfile) error { fmt.Fprintln(f.w) fmt.Fprintln(f.w, " Top commits:") for _, tc := range p.TopCommits { + // Defensive slice: LoadJSONL does not validate SHA + // length, so hand-built fixtures (e.g. "c1") or a + // future ingest path that emits abbreviated SHAs + // would panic on a fixed tc.SHA[:12]. The other SHA + // slice sites in this file (TopCommits / LatestCommits) + // carry the same latent risk and are left as-is so + // this change stays scoped to the new Top-commits block. + sha := tc.SHA + if len(sha) > 12 { + sha = sha[:12] + } fmt.Fprintf(f.w, " %s %s %6d lines %3d files %s\n", - tc.SHA[:12], tc.Date, tc.LinesChanged, tc.FilesChanged, tc.Message) + sha, tc.Date, tc.LinesChanged, tc.FilesChanged, tc.Message) } if p.TopCommitsHidden > 0 { fmt.Fprintf(f.w, " ... (+%d more commits not shown)\n", p.TopCommitsHidden) diff --git a/internal/stats/stats_test.go b/internal/stats/stats_test.go index 359a308..3dd49c1 100644 --- a/internal/stats/stats_test.go +++ b/internal/stats/stats_test.go @@ -2644,6 +2644,39 @@ func TestHerfindahlHelper(t *testing.T) { } } +func TestPrintProfilesTopCommitsShortSHA(t *testing.T) { + // Regression: the Top commits block used to slice tc.SHA[:12] + // unconditionally, which panics when the dataset carries short + // SHAs. LoadJSONL does not validate SHA length, so hand-built + // fixtures (or a future ingest path that emits abbreviated SHAs) + // would crash profile text output. The formatter must clamp to + // min(len(sha), 12) instead of fixed slicing. + p := DevProfile{ + Name: "N", Email: "n@x", Commits: 1, ActiveDays: 1, + FirstDate: "2024-01-01", LastDate: "2024-01-01", + TopCommits: []DevCommit{ + {SHA: "c1", Date: "2024-01-01", LinesChanged: 10, FilesChanged: 1}, + {SHA: "abcdef", Date: "2024-01-02", LinesChanged: 20, FilesChanged: 2}, + {SHA: "abcdef1234567890", Date: "2024-01-03", LinesChanged: 30, FilesChanged: 3}, + }, + } + var buf bytes.Buffer + f := NewFormatter(&buf, "table") + if err := f.PrintProfiles([]DevProfile{p}); err != nil { + t.Fatalf("PrintProfiles: %v", err) + } + out := buf.String() + for _, want := range []string{"c1", "abcdef", "abcdef123456"} { + if !strings.Contains(out, want) { + t.Errorf("output should contain %q (short-SHA rendering), got:\n%s", want, out) + } + } + // 16-char SHA must be truncated to 12, so the trailing "7890" disappears. + if strings.Contains(out, "abcdef1234567890") { + t.Errorf("16-char SHA should have been truncated to 12, got:\n%s", out) + } +} + func TestPrintProfilesSpecializationDisplayPrecision(t *testing.T) { // The Specialization display must show enough decimals that the // rendered number is self-consistent with the band label. At %.2f a From df3cbd598fa00a3e2d91bf515fd80430c31bf790 Mon Sep 17 00:00:00 2001 From: lex0c Date: Tue, 21 Apr 2026 14:34:15 -0300 Subject: [PATCH 5/5] Gate Message column on profile Top Commits; guard dataset-level SHA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit profile_template.go: the per-dev Top Commits block now drops the Message column (header, cells, and hidden-row colspan) when the first commit carries an empty message — mirrors the dataset-level Top Commits convention so `extract --include-commit-messages` is a strict opt-in instead of leaving an always-empty column in place. template.go: the dataset-level Top Commits row switched from {{slice .SHA 0 12}} to {{printf "%.12s" .SHA}}, closing the same latent crash path the profile block was already patched for. Added a report-level regression covering both invariants on the profile template: short SHAs render intact, the 16-char SHA clamps to 12, and the Message column is absent when no TopCommit has a message. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/report/profile_template.go | 7 ++-- internal/report/report_test.go | 52 +++++++++++++++++++++++++++++ internal/report/template.go | 2 +- 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/internal/report/profile_template.go b/internal/report/profile_template.go index e645865..add5b7b 100644 --- a/internal/report/profile_template.go +++ b/internal/report/profile_template.go @@ -191,21 +191,22 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col {{end}} {{if .Profile.TopCommits}} +{{$hasMsg := (index .Profile.TopCommits 0).Message}}

Top Commits

This developer's largest individual commits by lines changed (additions + deletions). A handful of outsized commits (vendored drops, bulk renames, generated code) reads very differently from a steady stream of medium-sized ones, even when the totals match.

- +{{if $hasMsg}}{{end}} {{range .Profile.TopCommits}} - + {{if $hasMsg}}{{end}} {{end}} {{if gt .Profile.TopCommitsHidden 0}} - + {{end}}
SHADateLinesFilesMessage
SHADateLinesFilesMessage
{{printf "%.12s" .SHA}} {{.Date}} {{thousands .LinesChanged}} {{thousands .FilesChanged}}{{.Message}}{{.Message}}
+{{.Profile.TopCommitsHidden}} more commits not shown
+{{.Profile.TopCommitsHidden}} more commits not shown
{{end}} diff --git a/internal/report/report_test.go b/internal/report/report_test.go index af1d6e7..e82d7ff 100644 --- a/internal/report/report_test.go +++ b/internal/report/report_test.go @@ -256,6 +256,58 @@ func TestGenerateProfile_SmokeRender(t *testing.T) { } } +func TestProfileTmpl_TopCommitsShortSHAAndMessageGate(t *testing.T) { + // Two invariants on the per-dev Top Commits block: + // 1. SHAs shorter than 12 chars must not crash template execution. + // LoadJSONL does not enforce SHA length, and the previous + // {{slice .SHA 0 12}} raised "index out of range" on any short + // input, aborting profile generation for the whole page. + // 2. The Message column header and cells must drop out when no + // commit carries a message — mirrors the dataset-level Top + // Commits convention so `extract --include-commit-messages` is + // a strict opt-in, not a silent empty-column penalty. + data := ProfileReportData{ + GeneratedAt: "2024-01-01 00:00", + RepoName: "t", + Profile: stats.DevProfile{ + Name: "N", Email: "n@x", + Commits: 1, ActiveDays: 1, + FirstDate: "2024-01-01", LastDate: "2024-01-01", + TopCommits: []stats.DevCommit{ + {SHA: "c1", Date: "2024-01-01", LinesChanged: 10, FilesChanged: 1}, + {SHA: "abcdef1234567890", Date: "2024-01-02", LinesChanged: 20, FilesChanged: 2}, + }, + }, + } + var buf bytes.Buffer + if err := profileTmpl.Execute(&buf, data); err != nil { + t.Fatalf("profileTmpl.Execute: %v", err) + } + out := buf.String() + + if !strings.Contains(out, ">c1<") { + t.Errorf("short 2-char SHA should render intact, got:\n%s", out) + } + if !strings.Contains(out, ">abcdef123456<") { + t.Errorf("16-char SHA should truncate to 12, got:\n%s", out) + } + if strings.Contains(out, "abcdef1234567890") { + t.Errorf("16-char SHA leaked past the 12-char cap") + } + + // Message column must be absent when all TopCommits have empty messages. + topBlock := out + if idx := strings.Index(out, "

Top Commits"); idx >= 0 { + topBlock = out[idx:] + } + if end := strings.Index(topBlock, ""); end >= 0 { + topBlock = topBlock[:end] + } + if strings.Contains(topBlock, "Message") { + t.Errorf("Message column should not render when no commit has a message, got:\n%s", topBlock) + } +} + func TestGenerateProfile_UnknownEmail(t *testing.T) { ds := loadFixture(t) var buf bytes.Buffer diff --git a/internal/report/template.go b/internal/report/template.go index 15b1136..b6bf907 100644 --- a/internal/report/template.go +++ b/internal/report/template.go @@ -343,7 +343,7 @@ footer { margin-top: 40px; padding-top: 16px; border-top: 1px solid #d0d7de; col SHAAuthorDateLinesFiles{{if and (gt (len .TopCommits) 0) (index .TopCommits 0).Message}}Message{{end}} {{range .TopCommits}} - {{slice .SHA 0 12}} + {{printf "%.12s" .SHA}} {{.AuthorName}} {{.Date}} {{thousands .LinesChanged}}