diff --git a/.github/workflows/releaser.yml b/.github/workflows/releaser.yml index d0e7015560..64c4913798 100644 --- a/.github/workflows/releaser.yml +++ b/.github/workflows/releaser.yml @@ -145,6 +145,26 @@ jobs: mkdir -p build tar -czf build/thv-cli-docs.tar.gz -C docs/cli . + - name: Setup Node for CRD schema extraction + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6 + with: + node-version: '22' + + - name: Extract per-CRD JSON schemas + examples + # Produces JSON Schemas + minimal YAML examples + an index with an + # inter-CRD reference graph for every CRD in the operator Helm + # chart. Output is bundled into thv-crd-schemas.tar.gz so + # downstream consumers (notably stacklok/docs-website) can skip + # running the extractor themselves. + run: | + cd hack + npm ci --omit=optional --silent + node extract-crd-schemas.mjs \ + --src ../deploy/charts/operator-crds/files/crds \ + --out ../build/crd-schemas + cd .. + tar -czf build/thv-crd-schemas.tar.gz -C build/crd-schemas . + - name: Remove existing release assets (allows re-runs) env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.goreleaser.yaml b/.goreleaser.yaml index 6229397734..6436e9f548 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -101,6 +101,7 @@ release: name: toolhive extra_files: - glob: build/thv-cli-docs.tar.gz + - glob: build/thv-crd-schemas.tar.gz - glob: docs/server/swagger.yaml - glob: docs/server/swagger.json - glob: docs/operator/crd-api.md diff --git a/hack/extract-crd-schemas.mjs b/hack/extract-crd-schemas.mjs new file mode 100644 index 0000000000..45bb6ccc5d --- /dev/null +++ b/hack/extract-crd-schemas.mjs @@ -0,0 +1,290 @@ +#!/usr/bin/env node +// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +// Extracts each CRD's openAPIV3Schema from deploy/charts/operator-crds/files/crds. +// For each CRD this writes: +// .schema.json - JSON Schema (apiVersion/kind/metadata stripped) +// .example.yaml - Minimal YAML skeleton covering required fields +// Plus a shared index.json with metadata and an inter-CRD reference graph. +// +// Usage: +// node hack/extract-crd-schemas.mjs --src --out +// +// Intended to run during release CI to produce a pre-generated schema +// bundle consumable by docs sites and other downstream tooling. + +import fs from 'node:fs'; +import path from 'node:path'; +import yaml from 'yaml'; + +function parseArgs(argv) { + const args = { src: null, out: null }; + for (let i = 0; i < argv.length; i++) { + if (argv[i] === '--src') { + args.src = argv[i + 1]; + i++; + } else if (argv[i] === '--out') { + args.out = argv[i + 1]; + i++; + } else if (argv[i] === '--help' || argv[i] === '-h') { + console.log( + 'Usage: node extract-crd-schemas.mjs --src --out ' + ); + process.exit(0); + } + } + return args; +} + +const args = parseArgs(process.argv.slice(2)); +if (!args.src || !args.out) { + console.error( + 'Both --src and --out are required. See --help for usage.' + ); + process.exit(1); +} + +if (!fs.existsSync(args.src)) { + console.error(`CRD source directory not found: ${args.src}`); + process.exit(1); +} + +fs.mkdirSync(args.out, { recursive: true }); + +// Placeholder values for leaf types with no default/enum. +function placeholder(schema) { + const t = schema.type; + if (schema.default !== undefined) return schema.default; + if (Array.isArray(schema.enum) && schema.enum.length) return schema.enum[0]; + if (t === 'string') return ''; + if (t === 'integer' || t === 'number') return 0; + if (t === 'boolean') return false; + if (t === 'array') return []; + if (t === 'object') return {}; + return ''; +} + +function buildRequiredExample(schema) { + if (schema.type !== 'object' || !schema.properties) + return placeholder(schema); + const required = Array.isArray(schema.required) ? schema.required : []; + const out = {}; + for (const key of required) { + const child = schema.properties[key]; + if (!child) continue; + if (child.type === 'object' && child.properties) { + out[key] = buildRequiredExample(child); + } else if (child.type === 'array') { + out[key] = []; + } else { + out[key] = placeholder(child); + } + } + return out; +} + +function buildYamlSkeleton({ group, version, kind, scope, schema }) { + const example = { + apiVersion: `${group}/${version}`, + kind, + metadata: { + name: `my-${kind.toLowerCase()}`, + ...(scope === 'Namespaced' ? { namespace: 'default' } : {}), + }, + }; + if (schema.properties?.spec) { + example.spec = buildRequiredExample(schema.properties.spec); + if (example.spec === undefined) example.spec = {}; + } + return yaml.stringify(example, { indent: 2, lineWidth: 0 }); +} + +// Walk a schema and collect outgoing references to other CRDs. +// A field is a reference when its name ends in "Ref" or "Refs" AND its +// description (or its nested `name` subfield's description) mentions a +// known CRD Kind. Returns [{ path, targetKind }] — multiple paths per +// target are preserved so consumers can list every field that points +// at a given Kind. +function findReferences(schema, knownKinds, ownKind) { + const results = []; + const seen = new Set(); + + function check(name, node) { + if (!/Refs?$/.test(name)) return null; + const textParts = []; + if (node?.description) textParts.push(node.description); + if (node?.items?.description) textParts.push(node.items.description); + if (node?.properties?.name?.description) { + textParts.push(node.properties.name.description); + } + if (node?.items?.properties?.name?.description) { + textParts.push(node.items.properties.name.description); + } + const text = textParts.join(' '); + for (const kind of knownKinds) { + if (kind === ownKind) continue; + if (new RegExp(`\\b${kind}\\b`).test(text)) return kind; + } + return null; + } + + function walk(node, jsonPtr) { + if (!node || typeof node !== 'object') return; + if (node.properties) { + for (const [key, child] of Object.entries(node.properties)) { + const target = check(key, child); + if (target) { + const dedupeKey = `${target}@${jsonPtr}.${key}`; + if (!seen.has(dedupeKey)) { + seen.add(dedupeKey); + results.push({ path: `${jsonPtr}.${key}`, targetKind: target }); + } + } + walk(child, `${jsonPtr}.${key}`); + } + } + if (node.items) walk(node.items, `${jsonPtr}[]`); + } + + walk(schema, ''); + return results; +} + +// Pass 1: parse all CRDs and collect metadata. +const files = fs.readdirSync(args.src).filter((f) => f.endsWith('.yaml')); +const crds = []; + +for (const file of files) { + const full = path.join(args.src, file); + const doc = yaml.parse(fs.readFileSync(full, 'utf8')); + + if (doc?.kind !== 'CustomResourceDefinition') { + console.warn(`Skipping ${file}: not a CRD`); + continue; + } + + const kind = doc.spec?.names?.kind; + const plural = doc.spec?.names?.plural; + const group = doc.spec?.group; + const shortNames = doc.spec?.names?.shortNames || []; + const scope = doc.spec?.scope; + + const versions = doc.spec?.versions || []; + const served = versions.find((v) => v.storage) || versions[0]; + if (!served?.schema?.openAPIV3Schema) { + console.warn(`Skipping ${file}: no openAPIV3Schema`); + continue; + } + + const schema = { ...served.schema.openAPIV3Schema }; + + // Strip Kubernetes boilerplate; these fields are identical on every CRD. + if (schema.properties) { + const stripped = { ...schema.properties }; + for (const key of ['apiVersion', 'kind', 'metadata']) delete stripped[key]; + schema.properties = stripped; + } + + crds.push({ + file, + kind, + plural, + group, + version: served.name, + shortNames, + scope, + schema, + }); +} + +const knownKinds = crds.map((c) => c.kind); +const outgoingByKind = new Map(); +for (const crd of crds) { + outgoingByKind.set( + crd.kind, + findReferences(crd.schema, knownKinds, crd.kind) + ); +} + +// Build inverse: for each kind, which other kinds reference it. +const incomingByKind = new Map(); +for (const kind of knownKinds) incomingByKind.set(kind, []); +for (const [src, refs] of outgoingByKind) { + for (const ref of refs) { + incomingByKind + .get(ref.targetKind) + .push({ sourceKind: src, path: ref.path }); + } +} + +// Pass 2: write output files. +const index = []; +for (const crd of crds) { + const { kind, plural, group, version, shortNames, scope, schema } = crd; + + const wrapped = { + $schema: 'http://json-schema.org/draft-07/schema#', + title: kind, + description: schema.description || `${kind} custom resource`, + 'x-kubernetes-group': group, + 'x-kubernetes-kind': kind, + 'x-kubernetes-version': version, + 'x-kubernetes-plural': plural, + 'x-kubernetes-short-names': shortNames, + 'x-kubernetes-scope': scope, + ...schema, + }; + + const schemaFile = path.join(args.out, `${plural}.schema.json`); + fs.writeFileSync(schemaFile, JSON.stringify(wrapped, null, 2) + '\n'); + + const exampleFile = path.join(args.out, `${plural}.example.yaml`); + fs.writeFileSync( + exampleFile, + buildYamlSkeleton({ group, version, kind, scope, schema }) + ); + + const outgoing = outgoingByKind.get(kind) || []; + const incoming = incomingByKind.get(kind) || []; + + const refByTarget = new Map(); + for (const r of outgoing) { + if (!refByTarget.has(r.targetKind)) refByTarget.set(r.targetKind, []); + refByTarget.get(r.targetKind).push(r.path.replace(/^\./, '')); + } + const incomingByKindSrc = new Map(); + for (const r of incoming) { + if (!incomingByKindSrc.has(r.sourceKind)) + incomingByKindSrc.set(r.sourceKind, []); + incomingByKindSrc.get(r.sourceKind).push(r.path.replace(/^\./, '')); + } + + index.push({ + kind, + plural, + group, + version, + shortNames, + scope, + description: schema.description || '', + references: [...refByTarget.entries()] + .map(([targetKind, paths]) => ({ + targetKind, + paths: [...new Set(paths)].sort(), + })) + .sort((a, b) => a.targetKind.localeCompare(b.targetKind)), + referencedBy: [...incomingByKindSrc.entries()] + .map(([sourceKind, paths]) => ({ + sourceKind, + paths: [...new Set(paths)].sort(), + })) + .sort((a, b) => a.sourceKind.localeCompare(b.sourceKind)), + }); +} + +fs.writeFileSync( + path.join(args.out, 'index.json'), + JSON.stringify(index, null, 2) + '\n' +); +console.log(`Extracted ${index.length} CRD schema(s) to ${args.out}.`); diff --git a/hack/package-lock.json b/hack/package-lock.json new file mode 100644 index 0000000000..f621b9d127 --- /dev/null +++ b/hack/package-lock.json @@ -0,0 +1,30 @@ +{ + "name": "toolhive-hack", + "version": "0.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "toolhive-hack", + "version": "0.0.0", + "dependencies": { + "yaml": "^2.6.1" + } + }, + "node_modules/yaml": { + "version": "2.8.3", + "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.3.tgz", + "integrity": "sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==", + "license": "ISC", + "bin": { + "yaml": "bin.mjs" + }, + "engines": { + "node": ">= 14.6" + }, + "funding": { + "url": "https://github.com/sponsors/eemeli" + } + } + } +} diff --git a/hack/package.json b/hack/package.json new file mode 100644 index 0000000000..2bdd669967 --- /dev/null +++ b/hack/package.json @@ -0,0 +1,13 @@ +{ + "name": "toolhive-hack", + "version": "0.0.0", + "private": true, + "description": "Release-time scripts for generating release assets from toolhive source", + "type": "module", + "scripts": { + "extract-crd-schemas": "node extract-crd-schemas.mjs" + }, + "dependencies": { + "yaml": "^2.6.1" + } +}