diff --git a/packages/das/src/api/miners/miners.controller.ts b/packages/das/src/api/miners/miners.controller.ts index 61288eb..6591015 100644 --- a/packages/das/src/api/miners/miners.controller.ts +++ b/packages/das/src/api/miners/miners.controller.ts @@ -1,7 +1,106 @@ -import { Controller, Get, Param, Query } from "@nestjs/common"; -import { ApiOperation, ApiParam, ApiQuery, ApiTags } from "@nestjs/swagger"; +import { + BadRequestException, + Body, + Controller, + Get, + Param, + Post, + Query, +} from "@nestjs/common"; +import { + ApiBody, + ApiOperation, + ApiParam, + ApiQuery, + ApiTags, +} from "@nestjs/swagger"; import { MinersService } from "./miners.service"; +// GitHub owner/repo pattern: alphanum + `.`, `_`, `-`, reasonable length. +const REPO_FULL_NAME_PATTERN = /^[\w.-]{1,100}\/[\w.-]{1,100}$/; +const MAX_REPO_ENTRIES = 200; + +interface SinceByRepoBody { + since_by_repo?: Record; +} + +/** + * Validate a `{ since_by_repo: { "": "" } }` body + * into parallel `repoNames` / `sinceValues` arrays. Repo names are lowercased + * (for the case-insensitive JOIN) and timestamps normalized to ISO. Throws + * BadRequestException on any malformed input. + */ +function parseSinceByRepo(body: SinceByRepoBody): { + repoNames: string[]; + sinceValues: string[]; +} { + const map = body?.since_by_repo; + if (typeof map !== "object" || map === null || Array.isArray(map)) { + throw new BadRequestException( + "since_by_repo must be an object of { 'owner/repo': ISO timestamp }", + ); + } + const entries = Object.entries(map); + if (entries.length === 0) { + throw new BadRequestException("since_by_repo must have at least one entry"); + } + if (entries.length > MAX_REPO_ENTRIES) { + throw new BadRequestException( + `since_by_repo must have at most ${MAX_REPO_ENTRIES} entries`, + ); + } + + const repoNames: string[] = []; + const sinceValues: string[] = []; + const seen = new Set(); + + for (const [rawRepo, rawSince] of entries) { + if (!REPO_FULL_NAME_PATTERN.test(rawRepo)) { + throw new BadRequestException( + `since_by_repo key "${rawRepo}" must match "owner/repo"`, + ); + } + const repo = rawRepo.toLowerCase(); + if (seen.has(repo)) { + throw new BadRequestException( + `since_by_repo has duplicate repo "${repo}" (keys collide after lowercasing)`, + ); + } + seen.add(repo); + + if (typeof rawSince !== "string") { + throw new BadRequestException( + `since_by_repo["${rawRepo}"] must be an ISO timestamp string`, + ); + } + const parsed = new Date(rawSince); + if (Number.isNaN(parsed.getTime())) { + throw new BadRequestException( + `since_by_repo["${rawRepo}"] is not a valid date: "${rawSince}"`, + ); + } + + repoNames.push(repo); + sinceValues.push(parsed.toISOString()); + } + + return { repoNames, sinceValues }; +} + +const SINCE_BY_REPO_API_BODY = { + schema: { + type: "object" as const, + required: ["since_by_repo"], + properties: { + since_by_repo: { + type: "object" as const, + additionalProperties: { type: "string", format: "date-time" }, + example: { "entrius/gittensor": "2026-04-17T00:00:00Z" }, + }, + }, + }, +}; + @ApiTags("Miners") @Controller("api/v1/miners") export class MinersController { @@ -33,6 +132,24 @@ export class MinersController { ); } + @Post(":githubId/pulls") + @ApiOperation({ + summary: "Pull requests authored by a miner, windowed per repository", + description: + "Same response shape as GET /pulls, but each repository is filtered to " + + "its own `since` from the request body instead of one shared window. " + + "Only repositories named in the map are returned.", + }) + @ApiParam({ name: "githubId", description: "GitHub user ID (numeric)" }) + @ApiBody(SINCE_BY_REPO_API_BODY) + async postPullRequests( + @Param("githubId") githubId: string, + @Body() body: SinceByRepoBody, + ): Promise { + const { repoNames, sinceValues } = parseSinceByRepo(body); + return this.miners.getPullRequestsByRepo(githubId, repoNames, sinceValues); + } + @Get(":githubId/issues") @ApiOperation({ summary: "Issues authored by a miner", @@ -58,4 +175,22 @@ export class MinersController { ): Promise { return this.miners.getIssues(githubId, since ?? null); } + + @Post(":githubId/issues") + @ApiOperation({ + summary: "Issues authored by a miner, windowed per repository", + description: + "Same response shape as GET /issues with a `since`, but each " + + "repository is filtered to its own `since` from the request body. " + + "Only repositories named in the map are returned.", + }) + @ApiParam({ name: "githubId", description: "GitHub user ID (numeric)" }) + @ApiBody(SINCE_BY_REPO_API_BODY) + async postIssues( + @Param("githubId") githubId: string, + @Body() body: SinceByRepoBody, + ): Promise { + const { repoNames, sinceValues } = parseSinceByRepo(body); + return this.miners.getIssuesByRepo(githubId, repoNames, sinceValues); + } } diff --git a/packages/das/src/api/miners/miners.service.ts b/packages/das/src/api/miners/miners.service.ts index 180953f..113a502 100644 --- a/packages/das/src/api/miners/miners.service.ts +++ b/packages/das/src/api/miners/miners.service.ts @@ -4,22 +4,10 @@ import { DataSource } from "typeorm"; const DEFAULT_SINCE_DAYS = 35; -@Injectable() -export class MinersService { - constructor(private readonly dataSource: DataSource) {} - - async getPullRequests( - githubId: string, - since: string, - ): Promise<{ - github_id: string; - since: string; - generated_at: string; - pull_requests: unknown[]; - }> { - const rows = await this.dataSource.query( - ` - SELECT +// Column list (everything between SELECT and FROM) for the PR query. Shared by +// the scalar-`since` GET path and the per-repo `since` POST path so the two +// stay identical. +const PR_SELECT_COLUMNS = ` LOWER(p.repo_full_name) AS repo_full_name, p.pr_number, COALESCE(p.title, '') AS title, @@ -96,44 +84,10 @@ export class MinersService { FROM pr_linked_issues li WHERE li.repo_full_name = p.repo_full_name AND li.pr_number = p.pr_number - ), '[]'::json) AS linked_issues - FROM pull_requests p - LEFT JOIN pr_review_summary rs - ON rs.repo_full_name = p.repo_full_name - AND rs.pr_number = p.pr_number - LEFT JOIN repos r - ON r.repo_full_name = p.repo_full_name - WHERE p.author_github_id = $1 - AND ( - (p.state = 'OPEN' AND p.created_at >= $2) - OR (p.state = 'MERGED' AND p.merged_at >= $2) - OR (p.state = 'CLOSED' AND p.created_at >= $2) - ) - ORDER BY p.created_at DESC - `, - [githubId, since], - ); - - return { - github_id: githubId, - since, - generated_at: new Date().toISOString(), - pull_requests: rows, - }; - } + ), '[]'::json) AS linked_issues`; - async getIssues( - githubId: string, - since: string | null, - ): Promise<{ - github_id: string; - since: string | null; - generated_at: string; - issues: unknown[]; - }> { - const rows = await this.dataSource.query( - ` - SELECT +// Column list for the issue query. Shared by the GET and POST paths. +const ISSUE_SELECT_COLUMNS = ` LOWER(i.repo_full_name) AS repo_full_name, i.issue_number, COALESCE(i.title, '') AS title, @@ -204,7 +158,110 @@ export class MinersService { AND sp.author_github_id IS NOT NULL -- Skip corrupted MERGED-without-merged_at shape AND NOT (sp.state = 'MERGED' AND sp.merged_at IS NULL) - ) AS solving_pr + ) AS solving_pr`; + +@Injectable() +export class MinersService { + constructor(private readonly dataSource: DataSource) {} + + async getPullRequests( + githubId: string, + since: string, + ): Promise<{ + github_id: string; + since: string; + generated_at: string; + pull_requests: unknown[]; + }> { + const rows = await this.dataSource.query( + ` + SELECT${PR_SELECT_COLUMNS} + FROM pull_requests p + LEFT JOIN pr_review_summary rs + ON rs.repo_full_name = p.repo_full_name + AND rs.pr_number = p.pr_number + LEFT JOIN repos r + ON r.repo_full_name = p.repo_full_name + WHERE p.author_github_id = $1 + AND ( + (p.state = 'OPEN' AND p.created_at >= $2) + OR (p.state = 'MERGED' AND p.merged_at >= $2) + OR (p.state = 'CLOSED' AND p.created_at >= $2) + ) + ORDER BY p.created_at DESC + `, + [githubId, since], + ); + + return { + github_id: githubId, + since, + generated_at: new Date().toISOString(), + pull_requests: rows, + }; + } + + /** + * Per-repo variant of getPullRequests: each repo is windowed by its own + * `since`. `repoNames` and `sinceValues` are parallel arrays (same length and + * order); repo names are already lowercased and timestamps already ISO. The + * INNER JOIN to the unnested windows restricts results to the named repos. + */ + async getPullRequestsByRepo( + githubId: string, + repoNames: string[], + sinceValues: string[], + ): Promise<{ + github_id: string; + since: null; + generated_at: string; + pull_requests: unknown[]; + }> { + const rows = await this.dataSource.query( + ` + WITH windows AS ( + SELECT * FROM unnest($2::text[], $3::timestamptz[]) AS t(repo_full_name, since) + ) + SELECT${PR_SELECT_COLUMNS} + FROM pull_requests p + JOIN windows w + ON w.repo_full_name = LOWER(p.repo_full_name) + LEFT JOIN pr_review_summary rs + ON rs.repo_full_name = p.repo_full_name + AND rs.pr_number = p.pr_number + LEFT JOIN repos r + ON r.repo_full_name = p.repo_full_name + WHERE p.author_github_id = $1 + AND ( + (p.state = 'OPEN' AND p.created_at >= w.since) + OR (p.state = 'MERGED' AND p.merged_at >= w.since) + OR (p.state = 'CLOSED' AND p.created_at >= w.since) + ) + ORDER BY p.created_at DESC + `, + [githubId, repoNames, sinceValues], + ); + + return { + github_id: githubId, + since: null, + generated_at: new Date().toISOString(), + pull_requests: rows, + }; + } + + async getIssues( + githubId: string, + since: string | null, + ): Promise<{ + github_id: string; + since: string | null; + generated_at: string; + issues: unknown[]; + }> { + const rows = await this.dataSource.query( + ` + SELECT${ISSUE_SELECT_COLUMNS} FROM issues i WHERE i.author_github_id = $1 AND ( @@ -224,6 +281,49 @@ export class MinersService { }; } + /** + * Per-repo variant of getIssues: each repo is windowed by its own `since`. + * `repoNames` / `sinceValues` are parallel arrays as in getPullRequestsByRepo. + * Every window `since` is a concrete timestamp (the controller rejects nulls), + * so the OPEN branch has no NULL fallback. + */ + async getIssuesByRepo( + githubId: string, + repoNames: string[], + sinceValues: string[], + ): Promise<{ + github_id: string; + since: null; + generated_at: string; + issues: unknown[]; + }> { + const rows = await this.dataSource.query( + ` + WITH windows AS ( + SELECT * FROM unnest($2::text[], $3::timestamptz[]) AS t(repo_full_name, since) + ) + SELECT${ISSUE_SELECT_COLUMNS} + FROM issues i + JOIN windows w + ON w.repo_full_name = LOWER(i.repo_full_name) + WHERE i.author_github_id = $1 + AND ( + (i.state = 'OPEN' AND i.created_at >= w.since) + OR (i.state = 'CLOSED' AND i.closed_at >= w.since) + ) + ORDER BY i.created_at DESC + `, + [githubId, repoNames, sinceValues], + ); + + return { + github_id: githubId, + since: null, + generated_at: new Date().toISOString(), + issues: rows, + }; + } + /** * Parse a `since` query param into an ISO timestamp. If not provided, defaults * to DEFAULT_SINCE_DAYS days ago (midnight UTC of that day).