Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ OSSRank must stay honest:

- Use official GitHub APIs only; never scrape GitHub HTML.
- Treat GitHub profile location text as unverified free text.
- Rank observed public signals, not private contributions. V1 contributor pages prioritize one-year GitHub GraphQL commit totals, one-year public pull request totals, repository count, and followers.
- Rank observed public signals, not private contributions. Contributor pages prioritize one-year GitHub GraphQL commit totals, one-year public pull request totals, repository count, and followers. Raw commit totals stay visible, but ranking uses a per-user burst-adjusted commit signal when a short anomalous daily spike exceeds that contributor's adaptive baseline cap.
- Preserve last-known-good data when refreshes fail.
- Keep the product flow simple: choose a country, view contributors, sort by commits/public PRs/repos/followers, or view the single projects page.
- Avoid claims of complete global coverage, endorsement, nationality, employment status, or identity attributes.
Expand Down
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
"refresh:fixtures": "tsx scripts/refresh-fixtures.ts",
"validate:data": "tsx scripts/validate-data.ts data/latest",
"build": "tsx scripts/build-site.ts --data data/latest --history data/history --out dist && pnpm run build:cli",
"test": "pnpm run check && pnpm run refresh:fixtures && pnpm run validate:data && pnpm run build && pnpm run smoke:cli",
"test": "pnpm run test:burst && pnpm run check && pnpm run refresh:fixtures && pnpm run validate:data && pnpm run build && pnpm run smoke:cli",
"dev": "pnpm run build && npx --yes serve dist",
"deploy:cloudflare": "pnpm run build && wrangler pages deploy dist --project-name ossrank",
"build:cli": "tsc -p tsconfig.cli.json && node scripts/fix-cli-shebang.cjs",
"smoke:cli": "pnpm run build:cli && node dist-cli/cli.js rank contributors --input fixtures/contributors.json --limit 2 --format table && node dist-cli/cli.js token-check --token demo && ! node dist-cli/cli.js rank contributors --input fixtures/contributors.json --limit nope",
"refresh:live": "tsx src/cli.ts refresh --mode live --limit ${OSSRANK_LIMIT:-20}"
"refresh:live": "tsx src/cli.ts refresh --mode live --limit ${OSSRANK_LIMIT:-20}",
"test:burst": "tsx scripts/verify-burst-adjustment.ts"
},
"dependencies": {},
"devDependencies": {
Expand Down
16 changes: 12 additions & 4 deletions scripts/build-site.ts

Large diffs are not rendered by default.

19 changes: 17 additions & 2 deletions scripts/validate-data.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { readdir, readFile } from 'node:fs/promises';
import { join } from 'node:path';
import type { Manifest, RankingSnapshot } from '../src/lib/types.js';
import type { ContributorBurstAdjustment, Manifest, RankingSnapshot } from '../src/lib/types.js';

const dataDir = process.argv[2] ?? 'data/latest';
const errors: string[] = [];
Expand All @@ -13,14 +13,26 @@ function isIso(value: unknown): boolean {
return typeof value === 'string' && Number.isFinite(Date.parse(value));
}


function validateBurstAdjustment(filename: string, row: number, adjustment: ContributorBurstAdjustment): void {
assert(Number.isInteger(adjustment.raw_public_commits) && adjustment.raw_public_commits >= 0, `${filename}: burst raw_public_commits for row ${row} must be a non-negative integer`);
assert(Number.isInteger(adjustment.adjusted_public_commits) && adjustment.adjusted_public_commits >= 0, `${filename}: burst adjusted_public_commits for row ${row} must be a non-negative integer`);
assert(adjustment.adjusted_public_commits <= adjustment.raw_public_commits, `${filename}: burst adjusted commits for row ${row} must not exceed raw commits`);
assert(typeof adjustment.baseline_daily_contributions === 'number' && adjustment.baseline_daily_contributions > 0, `${filename}: burst baseline for row ${row} must be positive`);
assert(Number.isInteger(adjustment.daily_burst_cap) && adjustment.daily_burst_cap > 0, `${filename}: burst daily cap for row ${row} must be a positive integer`);
assert(Number.isInteger(adjustment.capped_days) && adjustment.capped_days > 0, `${filename}: burst capped_days for row ${row} must be positive`);
assert(Number.isInteger(adjustment.excess_contributions) && adjustment.excess_contributions > 0, `${filename}: burst excess_contributions for row ${row} must be positive`);
assert(typeof adjustment.reason === 'string' && adjustment.reason.length > 10, `${filename}: burst reason for row ${row} is required`);
}

const manifest = JSON.parse(await readFile(join(dataDir, 'manifest.json'), 'utf8')) as Manifest;
assert(isIso(manifest.generated_at), 'manifest.generated_at must be ISO');
assert(Array.isArray(manifest.completed_shards), 'manifest.completed_shards must be an array');
assert(manifest.completed_shards.length > 0, 'manifest must include at least one completed shard');

for (const shard of manifest.completed_shards) {
const filename = shard.path.replace('/data/latest/', '');
const snapshot = JSON.parse(await readFile(join(dataDir, filename), 'utf8')) as RankingSnapshot<{ rank: number; login?: string; full_name?: string; previous_rank?: unknown }>;
const snapshot = JSON.parse(await readFile(join(dataDir, filename), 'utf8')) as RankingSnapshot<{ rank: number; login?: string; full_name?: string; previous_rank?: unknown; contribution_burst_adjustment?: ContributorBurstAdjustment }>;
assert(snapshot.kind === shard.kind, `${filename}: kind must match manifest`);
assert(snapshot.slug === shard.slug, `${filename}: slug must match manifest`);
assert(isIso(snapshot.generated_at), `${filename}: generated_at must be ISO`);
Expand All @@ -33,6 +45,9 @@ for (const shard of manifest.completed_shards) {
if (entry.previous_rank !== undefined) {
assert(typeof entry.previous_rank === 'number' && Number.isInteger(entry.previous_rank) && entry.previous_rank > 0, `${filename}: previous_rank for row ${index + 1} must be a positive integer when present`);
}
if (entry.contribution_burst_adjustment !== undefined) {
validateBurstAdjustment(filename, index + 1, entry.contribution_burst_adjustment);
}
const key = entry.login ?? entry.full_name;
assert(key, `${filename}: entry ${index + 1} needs login or full_name`);
if (key) {
Expand Down
45 changes: 45 additions & 0 deletions scripts/verify-burst-adjustment.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import assert from 'node:assert/strict';
import { contributionBurstAdjustment, contributorCommitSignal, rankContributors } from '../src/lib/ranking.js';
import type { RankedContributor } from '../src/lib/types.js';

function days(normal: number, burst: number, burstDays: number): number[] {
return [...Array(365 - burstDays).fill(normal), ...Array(burstDays).fill(burst)];
}

const quietAccountBurst = contributionBurstAdjustment(7007, days(1, 1000, 7));
assert(quietAccountBurst, 'low-baseline 1/day -> 1000/day burst should be adjusted');
assert.equal(quietAccountBurst.daily_burst_cap, 20);
assert.equal(quietAccountBurst.capped_days, 7);
assert.equal(quietAccountBurst.adjusted_public_commits, 147);

assert.equal(
contributionBurstAdjustment(41980, days(200, 4000, 7)),
undefined,
'high-baseline 200/day -> 4000/day should be allowed'
);

assert.equal(
contributionBurstAdjustment(42140, days(20, 6000, 7)),
undefined,
'established 20/day -> 6000/day should be allowed by the adaptive cap'
);

const adjusted: Omit<RankedContributor, 'rank'> = {
login: 'burst-account',
profile_url: 'https://github.com/burst-account',
public_contributions: 7007,
observed_public_commits: 7007,
contribution_burst_adjustment: quietAccountBurst,
followers: 1
};
const steady: Omit<RankedContributor, 'rank'> = {
login: 'steady-builder',
profile_url: 'https://github.com/steady-builder',
public_contributions: 500,
observed_public_commits: 500,
followers: 1
};
assert.equal(contributorCommitSignal(adjusted), 147);
assert.equal(rankContributors([adjusted, steady])[0].login, 'steady-builder');

console.log('Burst adjustment checks passed');
20 changes: 14 additions & 6 deletions src/lib/github.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { mkdir, writeFile, readFile } from 'node:fs/promises';
import { join } from 'node:path';
import { rankContributors, rankProjectMomentum, rankProjects, rankRisingContributors } from './ranking.js';
import { contributionBurstAdjustment, rankContributors, rankProjectMomentum, rankProjects, rankRisingContributors } from './ranking.js';
import { COUNTRY_CONFIGS, type CountryConfig } from './countries.js';
import { snapshotBase } from './snapshots.js';
import { createTokenProvider, type GitHubTokenProvider } from './token-provider.js';
Expand Down Expand Up @@ -81,6 +81,7 @@ interface GitHubUserActivityNode {
contributionsCollection: {
totalCommitContributions: number;
totalPullRequestContributions: number;
contributionCalendar: { weeks: Array<{ contributionDays: Array<{ contributionCount: number }> }> };
};
}

Expand All @@ -92,7 +93,7 @@ interface CandidateQueryStat {

interface UserCandidate {
user: GitHubUserDetail;
activity: { commits: number; pullRequests: number };
activity: { commits: number; pullRequests: number; dailyContributions: number[] };
discoveredByQuery: string;
}

Expand Down Expand Up @@ -394,7 +395,7 @@ function chunks<T>(items: T[], size: number): T[][] {
return result;
}

function toUserCandidate(node: GitHubUserActivityNode): { user: GitHubUserDetail; activity: { commits: number; pullRequests: number } } {
function toUserCandidate(node: GitHubUserActivityNode): { user: GitHubUserDetail; activity: { commits: number; pullRequests: number; dailyContributions: number[] } } {
return {
user: {
login: node.login,
Expand All @@ -407,7 +408,8 @@ function toUserCandidate(node: GitHubUserActivityNode): { user: GitHubUserDetail
},
activity: {
commits: node.contributionsCollection.totalCommitContributions,
pullRequests: node.contributionsCollection.totalPullRequestContributions
pullRequests: node.contributionsCollection.totalPullRequestContributions,
dailyContributions: node.contributionsCollection.contributionCalendar.weeks.flatMap((week) => week.contributionDays.map((day) => day.contributionCount))
}
};
}
Expand All @@ -416,10 +418,10 @@ async function userProfilesWithActivityBatch(
client: GitHubClient,
logins: string[],
generatedAt: string
): Promise<Array<{ user: GitHubUserDetail; activity: { commits: number; pullRequests: number } }>> {
): Promise<Array<{ user: GitHubUserDetail; activity: { commits: number; pullRequests: number; dailyContributions: number[] } }>> {
const from = daysAgoIso(generatedAt, 365);
const to = generatedAt;
const results: Array<{ user: GitHubUserDetail; activity: { commits: number; pullRequests: number } }> = [];
const results: Array<{ user: GitHubUserDetail; activity: { commits: number; pullRequests: number; dailyContributions: number[] } }> = [];

for (const batch of chunks(logins, 5)) {
const variableDefinitions = batch.map((_, index) => `$login${index}: String!`).join(', ');
Expand All @@ -435,6 +437,11 @@ async function userProfilesWithActivityBatch(
contributionsCollection(from: $from, to: $to) {
totalCommitContributions
totalPullRequestContributions
contributionCalendar {
weeks {
contributionDays { contributionCount }
}
}
}
}`).join('');
const variables: Record<string, unknown> = { from, to };
Expand Down Expand Up @@ -538,6 +545,7 @@ async function collectUsers(client: GitHubClient, queries: string | string[], li
public_gists: user.public_gists,
observed_public_commits: activity.commits,
observed_public_pull_requests: activity.pullRequests,
contribution_burst_adjustment: contributionBurstAdjustment(activity.commits, activity.dailyContributions),
followers: user.followers,
location: user.location ?? undefined,
location_confidence: locationTerms && countryName ? locationConfidence(user.location, locationTerms, countryName) : 'unknown' as const,
Expand Down
58 changes: 54 additions & 4 deletions src/lib/ranking.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,58 @@
import type { RankedContributor, RankedProject } from './types.js';
import type { ContributorBurstAdjustment, RankedContributor, RankedProject } from './types.js';

function median(values: number[]): number {
if (values.length === 0) return 0;
const sorted = [...values].sort((a, b) => a - b);
const middle = Math.floor(sorted.length / 2);
return sorted.length % 2 === 0 ? (sorted[middle - 1] + sorted[middle]) / 2 : sorted[middle];
}

function robustDailyBaseline(dailyCounts: number[]): number {
const activeDays = dailyCounts.filter((count) => count > 0);
if (activeDays.length === 0) return 0;
const trimCount = Math.min(activeDays.length - 1, Math.max(1, Math.ceil(activeDays.length * 0.02)));
const trimmed = [...activeDays].sort((a, b) => a - b).slice(0, activeDays.length - trimCount);
return Math.max(1, median(trimmed));
}

function adaptiveDailyBurstCap(baseline: number): number {
return Math.ceil(Math.max(baseline * 20, baseline * baseline * 15));
}

export function contributionBurstAdjustment(rawPublicCommits: number, dailyContributionCounts: number[]): ContributorBurstAdjustment | undefined {
if (rawPublicCommits <= 0 || dailyContributionCounts.length === 0) return undefined;
const baseline = robustDailyBaseline(dailyContributionCounts);
if (baseline <= 0) return undefined;

const cap = adaptiveDailyBurstCap(baseline);
let excessContributions = 0;
let cappedDays = 0;
for (const count of dailyContributionCounts) {
if (count > cap) {
excessContributions += count - cap;
cappedDays += 1;
}
}
if (cappedDays === 0 || excessContributions <= 0) return undefined;

return {
raw_public_commits: rawPublicCommits,
adjusted_public_commits: Math.max(0, rawPublicCommits - Math.round(excessContributions)),
baseline_daily_contributions: Number(baseline.toFixed(2)),
daily_burst_cap: cap,
capped_days: cappedDays,
excess_contributions: Math.round(excessContributions),
reason: 'per-user daily contribution burst exceeded an adaptive baseline cap; raw public commits are preserved for audit'
};
}

export function contributorCommitSignal(entry: Omit<RankedContributor, 'rank'>): number {
return entry.contribution_burst_adjustment?.adjusted_public_commits ?? entry.observed_public_commits ?? entry.public_contributions;
}

export function rankContributors(entries: Omit<RankedContributor, 'rank'>[]): RankedContributor[] {
return [...entries]
.sort((a, b) => (b.observed_public_commits ?? b.public_contributions) - (a.observed_public_commits ?? a.public_contributions) || (b.observed_public_pull_requests ?? 0) - (a.observed_public_pull_requests ?? 0) || (b.public_repos ?? 0) - (a.public_repos ?? 0) || b.followers - a.followers || a.login.localeCompare(b.login))
.sort((a, b) => contributorCommitSignal(b) - contributorCommitSignal(a) || (b.observed_public_pull_requests ?? 0) - (a.observed_public_pull_requests ?? 0) || (b.public_repos ?? 0) - (a.public_repos ?? 0) || b.followers - a.followers || a.login.localeCompare(b.login))
.map((entry, index) => ({ ...entry, rank: index + 1 }));
}

Expand All @@ -14,14 +64,14 @@ export function rankProjects(entries: Omit<RankedProject, 'rank'>[]): RankedProj

export function rankRisingContributors(entries: Omit<RankedContributor, 'rank'>[]): RankedContributor[] {
const score = (entry: Omit<RankedContributor, 'rank'>): number => {
const commits = entry.observed_public_commits ?? entry.public_contributions;
const commits = contributorCommitSignal(entry);
const prs = entry.observed_public_pull_requests ?? 0;
const repos = entry.public_repos ?? 0;
const followers = entry.followers;
return (commits + prs * 4 + repos * 8) / Math.sqrt(followers + 8);
};
return [...entries]
.sort((a, b) => score(b) - score(a) || (b.observed_public_commits ?? b.public_contributions) - (a.observed_public_commits ?? a.public_contributions) || a.login.localeCompare(b.login))
.sort((a, b) => score(b) - score(a) || contributorCommitSignal(b) - contributorCommitSignal(a) || a.login.localeCompare(b.login))
.map((entry, index) => ({ ...entry, rank: index + 1 }));
}

Expand Down
11 changes: 11 additions & 0 deletions src/lib/types.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
export type FreshnessState = 'fresh' | 'stale' | 'failed' | 'demo';

export interface ContributorBurstAdjustment {
raw_public_commits: number;
adjusted_public_commits: number;
baseline_daily_contributions: number;
daily_burst_cap: number;
capped_days: number;
excess_contributions: number;
reason: string;
}

export interface RankedContributor {
rank: number;
login: string;
Expand All @@ -9,6 +19,7 @@ export interface RankedContributor {
public_repos?: number;
public_gists?: number;
observed_public_commits?: number;
contribution_burst_adjustment?: ContributorBurstAdjustment;
observed_public_pull_requests?: number;
followers: number;
location?: string;
Expand Down
Loading