Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"@braidhq/schema": "workspace:*",
"@braidhq/source-loader-gdrive": "workspace:*",
"@braidhq/source-loader-git": "workspace:*",
"@braidhq/source-loader-github": "workspace:*",
"@braidhq/storage-kuzu": "workspace:*",
"@hono/node-server": "^2.0.4",
"@hono/zod-openapi": "^0.19.10",
Expand Down
13 changes: 12 additions & 1 deletion packages/server/src/composeFs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import { dddOntology } from '@braidhq/ontology-ddd'
import { AgentId, AgentKind, StorageKind as StorageKindSchema } from '@braidhq/schema'
import { GoogleDriveLoader } from '@braidhq/source-loader-gdrive'
import { GitLoader } from '@braidhq/source-loader-git'
import { GithubLoader } from '@braidhq/source-loader-github'
import { kuzuStoragePlugin } from '@braidhq/storage-kuzu'
import { composeApp } from './composition.js'
import { SubprocessSkillRunner } from './infrastructure/agent/SubprocessSkillRunner.js'
Expand Down Expand Up @@ -71,7 +72,10 @@ export interface ComposeFsOptions {
* composition. The defaults bundle is:
* - storage: `kuzuStoragePlugin`
* - ontology: `dddOntology`
* - source-loader: `GitLoader` (+ `GoogleDriveLoader` if OAuth configured)
* - source-loader: `GitLoader`, `GoogleDriveLoader` (always; throws at
* ingest if OAuth env is missing), `GithubLoader` (only when
* `GH_TOKEN` is set, since anonymous GitHub access is rate-limited
* to 60 req/h)
* - agent: `claudeCodeAgentPlugin`
*
* `composeFsApp` is the opinionated entry that ships with batteries.
Expand Down Expand Up @@ -174,6 +178,13 @@ export async function composeFsApp(options: ComposeFsOptions = {}): Promise<AppD
pluginRegistry.register(plugin)

pluginRegistry.register(new GitLoader())
// GitHub Issues loader is gated on GH_TOKEN: anonymous access is
// rate-limited to 60 req/h which won't survive a realistic sync, so
// declaring a `kind: 'github'` source without a token is almost
// certainly a misconfiguration. Skipping registration here surfaces it
// as an unknown-plugin error at workspace load time.
if ((process.env.GH_TOKEN ?? '').length > 0)
pluginRegistry.register(new GithubLoader())
for (const plugin of options.extraSourceLoaderPlugins ?? [])
pluginRegistry.register(plugin)

Expand Down
44 changes: 44 additions & 0 deletions packages/source-loader-github/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"name": "@braidhq/source-loader-github",
"type": "module",
"version": "0.0.1",
"description": "GitHub Issues source-loader plugin for Braid. Ingest issues + comments into markdown.",
"license": "MIT",
"exports": {
".": "./src/index.ts"
},
"main": "./src/index.ts",
"types": "./src/index.ts",
"files": [
"README.md",
"dist"
],
"scripts": {
"build": "tsc -p tsconfig.json",
"typecheck": "tsc -p tsconfig.json --noEmit && tsc -p tsconfig.test.json",
"test": "vitest run",
"clean": "rm -rf dist .turbo *.tsbuildinfo coverage"
},
"dependencies": {
"@braidhq/core": "workspace:*",
"@braidhq/schema": "workspace:*",
"yaml": "^2.9.0",
"zod": "^3.24.0"
},
"devDependencies": {
"@braidhq/test-utils": "workspace:*",
"typescript": "^6.0.3",
"vitest": "^4.1.0"
},
"publishConfig": {
"access": "public",
"main": "./dist/index.js",
"types": "./dist/index.d.ts",
"exports": {
".": {
"types": "./dist/index.d.ts",
"default": "./dist/index.js"
}
}
}
}
296 changes: 296 additions & 0 deletions packages/source-loader-github/src/GithubLoader.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,296 @@
import type { IngestReport, SourceLoaderPlugin, SyncReport } from '@braidhq/core'
import type { AbsolutePath, LoaderKind, Timestamp } from '@braidhq/schema'
import { mkdir, readFile, writeFile } from 'node:fs/promises'
import { join } from 'node:path'
import process from 'node:process'
import { LoaderKind as LoaderKindSchema, PluginId as PluginIdSchema } from '@braidhq/schema'
import { stringify as stringifyYaml } from 'yaml'
import { z } from 'zod'

/**
* Inject `fetchFn` for tests; real callers use globalThis.fetch.
*/
export type FetchFn = typeof globalThis.fetch

export const GithubLoaderConfig = z.object({
owner: z.string().min(1),
repo: z.string().min(1),
state: z.enum(['open', 'closed', 'all']).default('all'),
labels: z.array(z.string().min(1)).optional(),
includeComments: z.boolean().default(true),
/**
* GitHub's REST treats PRs as a subtype of issues. Default `false` so a
* source declared as "issues" doesn't silently pick up PR threads too.
*/
includePullRequests: z.boolean().default(false),
/**
* Auth token. Supports `${VAR}` interpolation against the server's process
* env. Defaults to `${GH_TOKEN}`. An empty string after interpolation
* means anonymous (60 req/h rate limit, public repos only).
*/
// eslint-disable-next-line no-template-curly-in-string -- literal `${VAR}` placeholder for env interpolation, NOT a template string
token: z.string().default('${GH_TOKEN}'),
/** REST base URL. Override for GitHub Enterprise. */
apiBaseUrl: z.string().default('https://api.github.com'),
})
export type GithubLoaderConfig = z.infer<typeof GithubLoaderConfig>

interface RawIssue {
number: number
title: string
state: string
user: { login: string } | null
labels: Array<{ name: string } | string>
body: string | null
html_url: string
created_at: string
updated_at: string
pull_request?: unknown
comments: number
}

interface RawComment {
user: { login: string } | null
body: string | null
created_at: string
updated_at: string
}

interface CursorFile {
owner: string
repo: string
since: string
}

const CURSOR_FILENAME = '.braid-github-cursor.json'

/**
* Source loader for a GitHub repository's Issues. The `destination` is
* owned by the loader: each issue is written as
* `<destination>/issues/<number>.md` with a deterministic YAML
* frontmatter + body + `## Comments` section. Untouched issues stay
* byte-identical across `sync` so downstream sha-based fingerprints
* don't churn.
*
* Auth: pass the token via `${GH_TOKEN}` (or any other env var) in
* `config.token`. Tokens are never persisted on disk; only the rendered
* markdown lands in `destination`.
*/
export class GithubLoader implements SourceLoaderPlugin {
readonly id = PluginIdSchema.parse('source-loader-github')
readonly type = 'source-loader' as const
readonly kind: LoaderKind = LoaderKindSchema.parse('github')
readonly configSchema = GithubLoaderConfig

constructor(private readonly fetchFn: FetchFn = globalThis.fetch) {}

async ingest(rawConfig: unknown, destination: AbsolutePath): Promise<IngestReport> {
const config = GithubLoaderConfig.parse(rawConfig)
const issuesDir = join(destination, 'issues')
await mkdir(issuesDir, { recursive: true })
const headers = this.buildHeaders(config)
const issues = await this.fetchIssues(config, headers, undefined)
let mostRecent = ''
for (const issue of issues) {
if (issue.updated_at > mostRecent)
mostRecent = issue.updated_at
const comments = await this.fetchCommentsIfNeeded(config, headers, issue)
const markdown = renderIssueMarkdown(config, issue, comments)
const path = join(issuesDir, `${issue.number}.md`)
await writeIfChanged(path, markdown)
}
if (mostRecent)
await writeCursor(destination, { owner: config.owner, repo: config.repo, since: mostRecent })
return {
localPath: destination,
metadata: { owner: config.owner, repo: config.repo, issueCount: issues.length },
fetchedAt: new Date().toISOString() as Timestamp,
}
}

async sync(rawConfig: unknown, destination: AbsolutePath): Promise<SyncReport> {
const config = GithubLoaderConfig.parse(rawConfig)
const issuesDir = join(destination, 'issues')
await mkdir(issuesDir, { recursive: true })
const headers = this.buildHeaders(config)
const cursor = await readCursor(destination)
const sinceParam = cursor?.owner === config.owner && cursor.repo === config.repo
? cursor.since
: undefined
const issues = await this.fetchIssues(config, headers, sinceParam)
let added = 0
let updated = 0
let mostRecent = cursor?.since ?? ''
for (const issue of issues) {
if (issue.updated_at > mostRecent)
mostRecent = issue.updated_at
const comments = await this.fetchCommentsIfNeeded(config, headers, issue)
const markdown = renderIssueMarkdown(config, issue, comments)
const path = join(issuesDir, `${issue.number}.md`)
const result = await writeIfChanged(path, markdown)
if (result === 'added')
added++
else if (result === 'updated')
updated++
}
if (mostRecent)
await writeCursor(destination, { owner: config.owner, repo: config.repo, since: mostRecent })
return {
changed: added + updated > 0,
added,
updated,
removed: 0,
metadata: { owner: config.owner, repo: config.repo, since: sinceParam ?? null },
fetchedAt: new Date().toISOString() as Timestamp,
}
}

private buildHeaders(config: GithubLoaderConfig): Record<string, string> {
const headers: Record<string, string> = {
'Accept': 'application/vnd.github+json',
'X-GitHub-Api-Version': '2022-11-28',
'User-Agent': 'braid-source-loader-github',
}
const token = interpolateEnv(config.token).trim()
if (token.length > 0)
headers.Authorization = `Bearer ${token}`
return headers
}

private async fetchIssues(
config: GithubLoaderConfig,
headers: Record<string, string>,
since: string | undefined,
): Promise<RawIssue[]> {
const params = new URLSearchParams()
params.set('state', config.state)
params.set('per_page', '100')
params.set('sort', 'updated')
params.set('direction', 'asc')
if (config.labels && config.labels.length > 0)
params.set('labels', config.labels.join(','))
if (since)
params.set('since', since)
let url = `${config.apiBaseUrl}/repos/${encodeURIComponent(config.owner)}/${encodeURIComponent(config.repo)}/issues?${params.toString()}`
const out: RawIssue[] = []
while (url) {
const response = await this.fetchFn(url, { headers })
if (!response.ok) {
const body = await response.text().catch(() => '')
throw new Error(`GithubLoader: GET ${url} failed (${response.status}): ${body.slice(0, 200)}`)
}
const page = (await response.json()) as RawIssue[]
for (const issue of page) {
if (!config.includePullRequests && issue.pull_request !== undefined)
continue
out.push(issue)
}
url = parseNextLink(response.headers.get('link')) ?? ''
}
return out
}

private async fetchCommentsIfNeeded(
config: GithubLoaderConfig,
headers: Record<string, string>,
issue: RawIssue,
): Promise<RawComment[]> {
if (!config.includeComments || issue.comments === 0)
return []
const params = new URLSearchParams()
params.set('per_page', '100')
let url = `${config.apiBaseUrl}/repos/${encodeURIComponent(config.owner)}/${encodeURIComponent(config.repo)}/issues/${issue.number}/comments?${params.toString()}`
const out: RawComment[] = []
while (url) {
const response = await this.fetchFn(url, { headers })
if (!response.ok) {
const body = await response.text().catch(() => '')
throw new Error(`GithubLoader: GET ${url} failed (${response.status}): ${body.slice(0, 200)}`)
}
const page = (await response.json()) as RawComment[]
out.push(...page)
url = parseNextLink(response.headers.get('link')) ?? ''
}
out.sort((a, b) => a.created_at.localeCompare(b.created_at))
return out
}
}

function renderIssueMarkdown(
config: GithubLoaderConfig,
issue: RawIssue,
comments: readonly RawComment[],
): string {
const labels = (issue.labels ?? [])
.map(l => typeof l === 'string' ? l : l.name)
.filter((name): name is string => typeof name === 'string' && name.length > 0)
.sort()
const frontmatter = {
number: issue.number,
title: issue.title,
state: issue.state,
author: issue.user?.login ?? null,
labels,
createdAt: issue.created_at,
updatedAt: issue.updated_at,
url: issue.html_url,
}
const yaml = stringifyYaml(frontmatter, { lineWidth: 0 }).trimEnd()
const body = (issue.body ?? '').trimEnd()
const parts = [`---\n${yaml}\n---`, '', body]
if (config.includeComments && comments.length > 0) {
parts.push('', '## Comments')
for (const comment of comments) {
const author = comment.user?.login ?? 'unknown'
parts.push('', `### ${author} — ${comment.created_at}`, '', (comment.body ?? '').trimEnd())
}
}
return `${parts.join('\n').trimEnd()}\n`
}

async function writeIfChanged(path: string, content: string): Promise<'added' | 'updated' | 'unchanged'> {
let existing: string | undefined
try {
existing = await readFile(path, 'utf-8')
}
catch {
existing = undefined
}
if (existing === content)
return 'unchanged'
await writeFile(path, content, 'utf-8')
return existing === undefined ? 'added' : 'updated'
}

async function readCursor(destination: string): Promise<CursorFile | undefined> {
try {
const raw = await readFile(join(destination, CURSOR_FILENAME), 'utf-8')
return JSON.parse(raw) as CursorFile
}
catch {
return undefined
}
}

async function writeCursor(destination: string, cursor: CursorFile): Promise<void> {
await writeFile(
join(destination, CURSOR_FILENAME),
`${JSON.stringify(cursor, null, 2)}\n`,
'utf-8',
)
}

function parseNextLink(header: string | null): string | undefined {
if (!header)
return undefined
for (const part of header.split(',')) {
const match = part.trim().match(/^<([^>]+)>;\s*rel="next"$/)
if (match)
return match[1]
}
return undefined
}

function interpolateEnv(input: string): string {
return input.replace(/\$\{([A-Z_][A-Z0-9_]*)\}/g, (_match, name: string) => process.env[name] ?? '')
}
1 change: 1 addition & 0 deletions packages/source-loader-github/src/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from './GithubLoader.js'
Loading
Loading