Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/deploy-pages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
# build-history.mjs walks the full git history of the public dump to
# rebuild the homepage growth timeline; a shallow clone hides old syncs.
fetch-depth: 0

- uses: actions/setup-node@v4
with:
Expand Down
5 changes: 2 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,8 @@ env/
# Note: data/_staging/ (raw collected candidate pool) is intentionally tracked —
# comprehensive data collection is a purpose of this repo.

# Verification layer caches: full Tier 0 scores + network caches are cheap to
# recompute. Only data/_verify/ledger.jsonl (the promotion audit trail) is tracked.
data/_verify/state/
# Build-only: regenerated from full git history on every Pages deploy (site/scripts/build-history.mjs)
site/public/v1/history.json

# Testing / coverage
.pytest_cache/
Expand Down
2 changes: 2 additions & 0 deletions site/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
"private": true,
"scripts": {
"dev": "astro dev",
"build:history": "node scripts/build-history.mjs",
"prebuild": "node scripts/build-history.mjs",
"build": "astro build",
"preview": "astro preview"
},
Expand Down
113 changes: 113 additions & 0 deletions site/scripts/build-history.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Precompute the dataset growth timeline from git history at build time.
//
// The homepage History section used to call the GitHub API live and could only
// show the last ~7 commits (per_page=8 + slice(7)), so older syncs were invisible
// and the unauthenticated API rate limit (60/h) made it fragile. Instead we walk
// the full git history of `site/public/v1/index.json` once during the deploy build
// and emit `site/public/v1/history.json` — a single static file the page reads.
//
// Build-only artifact (gitignored): regenerated on every Pages deploy, so it is
// always complete and never churns data PRs. Requires a full-depth checkout
// (fetch-depth: 0) to see old commits; degrades to an empty timeline otherwise.

import { execFileSync } from "node:child_process";
import { mkdirSync, writeFileSync } from "node:fs";
import { dirname, resolve } from "node:path";
import { fileURLToPath } from "node:url";

const SITE_DIR = resolve(dirname(fileURLToPath(import.meta.url)), "..");
const REPO_ROOT = resolve(SITE_DIR, "..");
const TRACKED = "site/public/v1/index.json";
const OUT = resolve(SITE_DIR, "public/v1/history.json");
const REPO_URL = "https://github.com/GetTechAPI/TechAPI";

// Categories we sum into the record total (matches the public dump manifest).
const ORDER = ["smartphones", "tablets", "watches", "pdas", "socs", "gpus", "cpus", "brands"];
// Keep the chart legible if the history ever grows large: downsample to at most
// MAX points, always preserving the first (baseline) and last (latest) commits.
const MAX = 40;

function git(args) {
return execFileSync("git", ["-C", REPO_ROOT, ...args], {
encoding: "utf8",
maxBuffer: 64 * 1024 * 1024,
});
}

function countsOf(manifest) {
const counts = {};
let total = 0;
for (const key of ORDER) {
const n = manifest?.collections?.[key]?.count;
if (typeof n === "number") {
counts[key] = n;
total += n;
}
}
return { counts, total };
}

function downsample(points) {
if (points.length <= MAX) return points;
const step = (points.length - 1) / (MAX - 1);
const picked = [];
const seen = new Set();
for (let i = 0; i < MAX; i++) {
const idx = Math.round(i * step);
if (!seen.has(idx)) {
seen.add(idx);
picked.push(points[idx]);
}
}
return picked;
}

function buildPoints() {
// %H sha, %cI committer ISO date, %s subject — 0x1f-separated, one line/commit.
const raw = git(["log", "--format=%H%x1f%cI%x1f%s", "--", TRACKED]).trim();
if (!raw) return [];
const commits = raw.split("\n").map((line) => {
const [sha, date, ...rest] = line.split("\x1f");
return { sha, date, title: rest.join("\x1f") };
});
// git log is newest-first; the timeline reads oldest-first.
commits.reverse();

const points = [];
for (const c of commits) {
let manifest;
try {
manifest = JSON.parse(git(["show", `${c.sha}:${TRACKED}`]));
} catch {
continue; // file absent/unparseable at this commit — skip it
}
const { counts, total } = countsOf(manifest);
if (!total) continue;
points.push({
sha: c.sha.slice(0, 7),
date: c.date,
title: (c.title || "Dataset sync").trim(),
url: `${REPO_URL}/commit/${c.sha}`,
total,
counts,
});
}
return downsample(points);
}

function main() {
let points = [];
try {
points = buildPoints();
} catch (err) {
console.warn(`[build-history] git history unavailable: ${err.message}`);
}
mkdirSync(dirname(OUT), { recursive: true });
writeFileSync(
OUT,
JSON.stringify({ generated_at: new Date().toISOString(), schema: 1, points }, null, 2),
);
console.log(`[build-history] wrote ${points.length} point(s) -> ${OUT}`);
}

main();
2 changes: 1 addition & 1 deletion site/src/pages/index.astro
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ const endpoints = [
<div>
<span class="kicker">00 - History</span>
<h2 style="margin-top:14px">Dataset growth over time</h2>
<p class="sec-sub">Recent dump commits are replayed into a small growth chart, showing how many records each sync added.</p>
<p class="sec-sub">Every dump commit is replayed into a growth chart — from the first snapshot to the latest sync — showing how many records each one added.</p>
</div>
<span class="num">v1/index.json</span>
</div>
Expand Down
43 changes: 38 additions & 5 deletions site/src/scripts/techapi.js
Original file line number Diff line number Diff line change
Expand Up @@ -288,12 +288,41 @@ function countUp(node, target) {
}).join("");
}

async function loadCommitHistory(currentManifest) {
const commitsUrl = `https://api.github.com/repos/GetTechAPI/TechAPI/commits?path=${encodeURIComponent(dumpPath)}&per_page=8`;
const fmtWhen = (date) => date
? date.toLocaleDateString(undefined, { month: "short", day: "numeric", year: "numeric" })
: "recent";
const rowsFromCounts = (counts) => order
.map((key) => ({ key, count: counts?.[key] }))
.filter((row) => row.count != null);

// Preferred path: a single prebuilt manifest (build-history.mjs) holding the
// FULL dump timeline. No GitHub API, no per-commit fetches, no rate limit, and
// old commits stay visible (the live API path only ever showed the last 7).
async function pointsFromStaticHistory() {
const data = await getJSON("v1/history.json");
const points = (data.points || []).map((p) => {
const date = p.date ? new Date(p.date) : null;
return {
sha: String(p.sha || "").slice(0, 7),
dateValue: date ? date.getTime() : 0,
when: fmtWhen(date),
title: String(p.title || "Dataset sync").split("\n")[0],
url: p.url || "https://github.com/GetTechAPI/TechAPI",
rows: rowsFromCounts(p.counts),
total: p.total != null ? p.total : rowsFromCounts(p.counts).reduce((s, r) => s + r.count, 0),
};
}).filter((p) => p.total > 0);
return points.sort((a, b) => a.dateValue - b.dateValue);
}

// Fallback (e.g. local `astro dev` with no prebuilt history.json): the old live
// GitHub API replay, capped at the most recent commits.
async function pointsFromGitHubApi() {
const commitsUrl = `https://api.github.com/repos/GetTechAPI/TechAPI/commits?path=${encodeURIComponent(dumpPath)}&per_page=10`;
const response = await fetch(commitsUrl);
if (!response.ok) throw new Error(response.statusText);
const commits = await response.json();
const items = Array.isArray(commits) ? commits.slice(0, 7) : [];
const items = Array.isArray(commits) ? commits.slice(0, 10) : [];
const snapshots = await Promise.all(items.map(async (item) => {
const sha = String(item.sha || "");
const rawUrl = `https://raw.githubusercontent.com/GetTechAPI/TechAPI/${sha}/${dumpPath}`;
Expand All @@ -304,15 +333,19 @@ function countUp(node, target) {
return {
sha: sha.slice(0, 7),
dateValue: date ? date.getTime() : 0,
when: date ? date.toLocaleDateString(undefined, { month: "short", day: "numeric", year: "numeric" }) : "recent",
when: fmtWhen(date),
title: (item.commit?.message || "Dataset sync").split("\n")[0],
url: item.html_url || "https://github.com/GetTechAPI/TechAPI",
rows: countRows(manifest),
total: totalRecords(manifest),
};
}));
return snapshots.filter(Boolean).sort((a, b) => a.dateValue - b.dateValue);
}

const points = snapshots.filter(Boolean).sort((a, b) => a.dateValue - b.dateValue);
async function loadCommitHistory(currentManifest) {
let points = await pointsFromStaticHistory().catch(() => null);
if (!points || !points.length) points = await pointsFromGitHubApi();
if (!points.length) throw new Error("empty history");

const currentTotal = totalRecords(currentManifest);
Expand Down
Loading