From dc8ab733a6878eadc894e61b4c5e61b5cffd5031 Mon Sep 17 00:00:00 2001 From: Matt Kane Date: Thu, 14 May 2026 11:24:19 +0100 Subject: [PATCH 1/6] feat(registry-cli): emdash-plugin.jsonc manifest, validate command, publisher pinning Foundation for the registry roadmap. Adds the file format, loader, and schema; no change to discovery or publish behaviour beyond now reading the manifest by default. - Zod schema for `emdash-plugin.jsonc` with v1 profile fields (license, author/authors, security/securityContacts, name, description, keywords, repo). Strict mode rejects unknown keys to catch typos. - JSONC loader with `tsc`-style file:line:column diagnostics. - `emdash-registry validate` for offline schema checking, suitable for pre-commit and CI. - `publish` reads the manifest from `./emdash-plugin.jsonc` by default; explicit flags still win for CI overrides. `--no-manifest` opts out. - New `publisher` field pins the publishing identity. On first publish, the CLI writes the active session's DID back as `publisher`, with a `// ` line comment for readability. Subsequent publishes verify the active session matches the pinned publisher. - JSON Schema generated from the Zod source via z.toJSONSchema; ships in the package at `schemas/emdash-plugin.schema.json` for IDE completion. Drift caught by a snapshot test. Part of #1028 (the init command lands separately in a follow-up). --- .changeset/busy-rivers-drive.md | 11 + packages/registry-cli/README.md | 52 ++- packages/registry-cli/package.json | 13 +- .../schemas/emdash-plugin.schema.json | 191 ++++++++++ packages/registry-cli/scripts/gen-schema.ts | 60 +++ packages/registry-cli/src/commands/publish.ts | 145 ++++++++ .../registry-cli/src/commands/validate.ts | 92 +++++ packages/registry-cli/src/index.ts | 3 + packages/registry-cli/src/manifest/load.ts | 312 ++++++++++++++++ .../registry-cli/src/manifest/publisher.ts | 290 +++++++++++++++ packages/registry-cli/src/manifest/schema.ts | 350 ++++++++++++++++++ .../registry-cli/src/manifest/translate.ts | 126 +++++++ .../registry-cli/tests/manifest-load.test.ts | 184 +++++++++ .../tests/manifest-publisher.test.ts | 284 ++++++++++++++ .../tests/manifest-schema.test.ts | 256 +++++++++++++ .../registry-cli/tests/schema-drift.test.ts | 60 +++ pnpm-lock.yaml | 19 +- pnpm-workspace.yaml | 9 +- 18 files changed, 2444 insertions(+), 13 deletions(-) create mode 100644 .changeset/busy-rivers-drive.md create mode 100644 packages/registry-cli/schemas/emdash-plugin.schema.json create mode 100644 packages/registry-cli/scripts/gen-schema.ts create mode 100644 packages/registry-cli/src/commands/validate.ts create mode 100644 packages/registry-cli/src/manifest/load.ts create mode 100644 packages/registry-cli/src/manifest/publisher.ts create mode 100644 packages/registry-cli/src/manifest/schema.ts create mode 100644 packages/registry-cli/src/manifest/translate.ts create mode 100644 packages/registry-cli/tests/manifest-load.test.ts create mode 100644 packages/registry-cli/tests/manifest-publisher.test.ts create mode 100644 packages/registry-cli/tests/manifest-schema.test.ts create mode 100644 packages/registry-cli/tests/schema-drift.test.ts diff --git a/.changeset/busy-rivers-drive.md b/.changeset/busy-rivers-drive.md new file mode 100644 index 000000000..b26dc3c51 --- /dev/null +++ b/.changeset/busy-rivers-drive.md @@ -0,0 +1,11 @@ +--- +"@emdash-cms/registry-cli": minor +--- + +Adds `emdash-plugin.jsonc` manifest support. Plugin authors can now declare profile fields (license, author, security contact, name, description, keywords, repo) once in a hand-edited JSONC file instead of passing them as flags on every publish. The CLI loads `./emdash-plugin.jsonc` automatically; explicit flags still win for CI use. + +New `emdash-registry validate` command checks a manifest against the schema offline with `tsc`-style file:line:column diagnostics. + +The manifest's optional `publisher` field pins the publishing identity. On first successful publish, the CLI writes the active session's DID back to the manifest. Subsequent publishes verify the active session matches the pinned publisher and refuse on mismatch to prevent accidental cross-account publishes. + +JSON Schema for IDE completion ships in the package at `schemas/emdash-plugin.schema.json`; reference it via `"$schema": "./node_modules/@emdash-cms/registry-cli/schemas/emdash-plugin.schema.json"`. diff --git a/packages/registry-cli/README.md b/packages/registry-cli/README.md index f5d546980..3202c9e36 100644 --- a/packages/registry-cli/README.md +++ b/packages/registry-cli/README.md @@ -28,6 +28,7 @@ emdash-registry search Free-text search emdash-registry info Show package details emdash-registry bundle Bundle a plugin source dir into a tarball emdash-registry publish --url Publish a release that points at a hosted tarball +emdash-registry validate [path] Validate emdash-plugin.jsonc against the v1 schema ``` All commands accept `--json`. Discovery commands accept `--aggregator ` (or `EMDASH_REGISTRY_URL`). @@ -42,7 +43,56 @@ emdash-registry bundle emdash-registry publish --url https://example.com/foo-1.0.0.tar.gz ``` -On first publish, pass `--license` and `--security-email` (or `--security-url`) to bootstrap the package profile. +On first publish, pass `--license` and `--security-email` (or `--security-url`) to bootstrap the package profile — or keep them in `emdash-plugin.jsonc` (see below). + +## `emdash-plugin.jsonc` + +Drop an `emdash-plugin.jsonc` file next to your plugin's `package.json` to declare profile fields once instead of passing them on every publish. The CLI reads it automatically from the current directory. Schema-driven IDE completion works via the bundled JSON Schema: + +```jsonc +{ + "$schema": "./node_modules/@emdash-cms/registry-cli/schemas/emdash-plugin.schema.json", + + "license": "MIT", + "author": { "name": "Jane Doe", "url": "https://example.com" }, + "security": { "email": "security@example.com" }, + + // Optional + "name": "Gallery", + "description": "Image gallery block for EmDash.", + "keywords": ["gallery", "images"], + "repo": "https://github.com/example/plugin-gallery", +} +``` + +The file is JSONC: comments and trailing commas are allowed. Use `authors: [...]` and `securityContacts: [...]` for multi-author or multi-contact plugins. + +### Publisher pinning + +After your first successful publish, the CLI writes the active session's DID back into the manifest as `publisher`: + +```jsonc +{ + "license": "MIT", + "publisher": "did:plc:abc123def456", + ... +} +``` + +On every subsequent publish, the CLI verifies the active session matches the pinned `publisher`. If they don't match, publish refuses with `MANIFEST_PUBLISHER_MISMATCH` so you can't accidentally publish under the wrong account. To resolve a mismatch, either: + +- switch sessions: `emdash-registry switch ` +- update the manifest if you're transferring the plugin to a new publisher + +**DIDs are the identity, not handles.** Internally the CLI always compares the active session's DID against the pinned publisher's DID. If you pin a handle (`"publisher": "example.com"`), the CLI resolves it to a DID at publish time and compares against that — so a handle pin is just a friendlier alias for the underlying DID. Handles are mutable: if the publisher's domain changes ownership and the resolver later points at a different DID, the publish will refuse. DIDs are durable and the recommended pin for long-lived plugins. + +Validate without publishing: + +```sh +emdash-registry validate +``` + +CLI flags (`--license`, `--author-name`, …) still win over manifest values when both are set, which is useful in CI. Pass `--no-manifest` to skip the manifest entirely. ## Programmatic API diff --git a/packages/registry-cli/package.json b/packages/registry-cli/package.json index b0c0f9ab1..ef5c7c49b 100644 --- a/packages/registry-cli/package.json +++ b/packages/registry-cli/package.json @@ -14,11 +14,14 @@ "emdash-registry": "./dist/index.mjs" }, "files": [ - "dist" + "dist", + "schemas", + "templates" ], "scripts": { - "build": "tsdown", + "build": "node --run gen-schema && tsdown", "dev": "tsdown --watch", + "gen-schema": "node --no-warnings --experimental-strip-types scripts/gen-schema.ts", "prepublishOnly": "node --run build", "typecheck": "tsgo --noEmit", "test": "vitest run", @@ -30,16 +33,18 @@ "@atcute/lexicons": "catalog:", "@atcute/multibase": "catalog:", "@atcute/oauth-node-client": "catalog:", - "@oslojs/crypto": "catalog:", "@emdash-cms/plugin-types": "workspace:*", "@emdash-cms/registry-client": "workspace:*", "@emdash-cms/registry-lexicons": "workspace:*", + "@oslojs/crypto": "catalog:", "citty": "^0.1.6", "consola": "^3.4.2", "image-size": "^2.0.2", + "jsonc-parser": "catalog:", "modern-tar": "^0.7.5", "picocolors": "^1.1.1", - "tsdown": "catalog:" + "tsdown": "catalog:", + "zod": "catalog:" }, "devDependencies": { "@arethetypeswrong/cli": "catalog:", diff --git a/packages/registry-cli/schemas/emdash-plugin.schema.json b/packages/registry-cli/schemas/emdash-plugin.schema.json new file mode 100644 index 000000000..5de992e21 --- /dev/null +++ b/packages/registry-cli/schemas/emdash-plugin.schema.json @@ -0,0 +1,191 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://emdashcms.com/schemas/emdash-plugin.schema.json", + "title": "EmDash plugin manifest", + "description": "Hand-authored manifest for publishing a plugin to the EmDash plugin registry. Lives next to the plugin's `package.json` as `emdash-plugin.jsonc`.", + "type": "object", + "properties": { + "$schema": { + "$ref": "#/$defs/__schema0" + }, + "license": { + "$ref": "#/$defs/__schema1" + }, + "publisher": { + "$ref": "#/$defs/__schema2" + }, + "author": { + "$ref": "#/$defs/__schema3" + }, + "authors": { + "$ref": "#/$defs/__schema8" + }, + "security": { + "$ref": "#/$defs/__schema9" + }, + "securityContacts": { + "$ref": "#/$defs/__schema13" + }, + "name": { + "$ref": "#/$defs/__schema14" + }, + "description": { + "$ref": "#/$defs/__schema15" + }, + "keywords": { + "$ref": "#/$defs/__schema16" + }, + "repo": { + "$ref": "#/$defs/__schema18" + } + }, + "required": ["license"], + "additionalProperties": false, + "$defs": { + "__schema0": { + "type": "string", + "description": "Path or URL to the JSON Schema describing this file. Editors use this for completion and validation." + }, + "__schema1": { + "type": "string", + "minLength": 1, + "maxLength": 256, + "title": "License", + "description": "SPDX license expression (e.g. \"MIT\", \"Apache-2.0\", \"MIT OR Apache-2.0\"). Required on first publish; ignored on subsequent publishes (the existing profile wins).", + "examples": ["MIT", "Apache-2.0", "MIT OR Apache-2.0"] + }, + "__schema2": { + "type": "string", + "title": "Publisher", + "description": "Atproto DID or handle of the publishing identity. Pinned on first publish to prevent accidental publishes from a different account. DIDs are recommended (durable); handles work but are mutable.", + "examples": ["did:plc:abc123def456", "example.com"] + }, + "__schema3": { + "$ref": "#/$defs/__schema4" + }, + "__schema4": { + "type": "object", + "properties": { + "name": { + "$ref": "#/$defs/__schema5" + }, + "url": { + "$ref": "#/$defs/__schema6" + }, + "email": { + "$ref": "#/$defs/__schema7" + } + }, + "required": ["name"], + "additionalProperties": false, + "title": "Author", + "description": "A single author entry. Mirrors the lexicon's author shape." + }, + "__schema5": { + "type": "string", + "minLength": 1, + "maxLength": 256, + "description": "Display name." + }, + "__schema6": { + "type": "string", + "maxLength": 1024, + "format": "uri", + "description": "Author's homepage or profile URL. Either this or `email` is recommended." + }, + "__schema7": { + "type": "string", + "maxLength": 256, + "format": "email", + "pattern": "^(?!\\.)(?!.*\\.\\.)([A-Za-z0-9_'+\\-\\.]*)[A-Za-z0-9_+-]@([A-Za-z0-9][A-Za-z0-9\\-]*\\.)+[A-Za-z]{2,}$", + "description": "Author's contact email. Either this or `url` is recommended." + }, + "__schema8": { + "minItems": 1, + "maxItems": 32, + "type": "array", + "items": { + "$ref": "#/$defs/__schema4" + }, + "title": "Authors (multiple)", + "description": "Multi-author form. Mutually exclusive with `author`. Use the singular `author` if there is only one." + }, + "__schema9": { + "$ref": "#/$defs/__schema10" + }, + "__schema10": { + "type": "object", + "properties": { + "url": { + "$ref": "#/$defs/__schema11" + }, + "email": { + "$ref": "#/$defs/__schema12" + } + }, + "additionalProperties": false, + "title": "Security contact", + "description": "A single security contact. At least one of `url` or `email` must be present." + }, + "__schema11": { + "type": "string", + "maxLength": 1024, + "format": "uri", + "description": "Security disclosure URL (e.g. a security.txt or vulnerability-reporting page). Either this or `email` is required." + }, + "__schema12": { + "type": "string", + "maxLength": 256, + "format": "email", + "pattern": "^(?!\\.)(?!.*\\.\\.)([A-Za-z0-9_'+\\-\\.]*)[A-Za-z0-9_+-]@([A-Za-z0-9][A-Za-z0-9\\-]*\\.)+[A-Za-z]{2,}$", + "description": "Security contact email. Either this or `url` is required." + }, + "__schema13": { + "minItems": 1, + "maxItems": 8, + "type": "array", + "items": { + "$ref": "#/$defs/__schema10" + }, + "title": "Security contacts (multiple)", + "description": "Multi-contact form. Mutually exclusive with `security`. Use the singular `security` if there is only one." + }, + "__schema14": { + "type": "string", + "minLength": 1, + "maxLength": 1024, + "title": "Display name", + "description": "Human-readable name shown in directory listings. Defaults to the plugin's `id` when omitted." + }, + "__schema15": { + "type": "string", + "minLength": 1, + "maxLength": 1024, + "title": "Description", + "description": "Short description (<= 140 graphemes by FAIR convention). Aggregators may truncate longer values when displaying in compact lists." + }, + "__schema16": { + "maxItems": 5, + "type": "array", + "items": { + "$ref": "#/$defs/__schema17" + }, + "title": "Keywords", + "description": "Search keywords (<= 5 entries, FAIR convention)." + }, + "__schema17": { + "type": "string", + "minLength": 1, + "maxLength": 128 + }, + "__schema18": { + "type": "string", + "maxLength": 1024, + "format": "uri", + "pattern": "^https:\\/\\/", + "title": "Source repository", + "description": "HTTPS URL of the plugin's source repository. Surfaced in registry listings.", + "examples": ["https://github.com/emdash-cms/plugin-gallery"] + } + } +} diff --git a/packages/registry-cli/scripts/gen-schema.ts b/packages/registry-cli/scripts/gen-schema.ts new file mode 100644 index 000000000..6583b2784 --- /dev/null +++ b/packages/registry-cli/scripts/gen-schema.ts @@ -0,0 +1,60 @@ +/** + * Generate the JSON Schema for `emdash-plugin.jsonc` from the Zod source + * of truth in `src/manifest/schema.ts`. + * + * Run via `pnpm gen-schema` (wired into `build`). The output is committed + * to `schemas/emdash-plugin.schema.json` and shipped in the package's + * `files` array so users can reference it via: + * + * "$schema": "./node_modules/@emdash-cms/registry-cli/schemas/emdash-plugin.schema.json" + * + * Drift between the Zod schema and the committed JSON Schema is caught + * by the snapshot test in `tests/schema.test.ts`. + * + * Why a separate script rather than emitting on build: + * + * - The schema is part of the package's user-facing surface; checking + * it into git makes diffs visible in PR review (a field rename in + * Zod produces a tracked diff in the JSON Schema too). + * - Tests can run without first building. The schema file exists + * at-rest; the test compares Zod's current output to it. + * + * Runs under Node's native TypeScript stripping (Node 22+). No `tsx` or + * `ts-node` dependency. + */ + +import { mkdir, writeFile } from "node:fs/promises"; +import { dirname, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { z } from "zod"; + +import { ManifestSchema } from "../src/manifest/schema.ts"; + +const HERE = dirname(fileURLToPath(import.meta.url)); +const OUT_PATH = resolve(HERE, "..", "schemas", "emdash-plugin.schema.json"); + +// zod 4's native JSON Schema emitter. `target: "draft-2020-12"` is what +// every modern JSON Schema editor (VS Code's built-in schema store, +// IntelliJ's JSON LSP) supports out of the box. +const jsonSchema = z.toJSONSchema(ManifestSchema, { + target: "draft-2020-12", + // Use full reuse rather than inline-everything: smaller file, easier + // diffs when a single subschema changes. + reused: "ref", +}); + +const document = { + $schema: "https://json-schema.org/draft/2020-12/schema", + $id: "https://emdashcms.com/schemas/emdash-plugin.schema.json", + title: "EmDash plugin manifest (emdash-plugin.jsonc)", + description: + "Authoring format for publishing plugins to the EmDash plugin registry. Translated to the on-wire atproto record format at publish time. See https://github.com/emdash-cms/emdash/issues/1028.", + ...jsonSchema, +}; + +const serialised = `${JSON.stringify(document, null, "\t")}\n`; + +await mkdir(dirname(OUT_PATH), { recursive: true }); +await writeFile(OUT_PATH, serialised, "utf8"); +process.stdout.write(`Wrote ${OUT_PATH}\n`); diff --git a/packages/registry-cli/src/commands/publish.ts b/packages/registry-cli/src/commands/publish.ts index 88d819490..3a9e22c58 100644 --- a/packages/registry-cli/src/commands/publish.ts +++ b/packages/registry-cli/src/commands/publish.ts @@ -29,6 +29,14 @@ import consola from "consola"; import pc from "picocolors"; import { formatBytes, MAX_BUNDLE_SIZE, validateBundleSize } from "../bundle/utils.js"; +import { loadManifest, MANIFEST_FILENAME, ManifestError } from "../manifest/load.js"; +import { checkPublisher, PublisherCheckError, writePublisherBack } from "../manifest/publisher.js"; +import { + findUnwiredManifestFields, + manifestToProfileBootstrap, + normaliseManifest, + type NormalisedManifest, +} from "../manifest/translate.js"; import { sha256Multihash } from "../multihash.js"; import { resumeSession } from "../oauth.js"; import { @@ -89,6 +97,16 @@ export const publishCommand = defineCommand({ type: "string", description: "Security contact URL (first publish only)", }, + manifest: { + type: "string", + description: `Path to emdash-plugin.jsonc, or the directory containing it. Defaults to ./${MANIFEST_FILENAME}. Pass --no-manifest (or set to "false") to disable manifest loading and rely entirely on flags.`, + }, + "no-manifest": { + type: "boolean", + description: + "Disable manifest loading and rely entirely on flags. Useful in CI where the manifest lives elsewhere or shouldn't be implicitly consumed.", + default: false, + }, "allow-overwrite": { type: "boolean", description: @@ -149,6 +167,17 @@ async function runPublish(args: PublishArgs): Promise { }); if (stringFlagError) throw new CliError(stringFlagError, 2, "INVALID_FLAG"); + // Load the manifest if present. Precedence: explicit flags win over + // manifest values, manifest values fill in any gaps. With + // --no-manifest, we skip loading entirely. + // + // The default path is `./emdash-plugin.jsonc`. If the user didn't pass + // --manifest and there's no file at the default path, that's NOT an + // error: legacy flag-only invocations keep working. Only `--manifest` + // explicit-not-found is an error. + const manifestLoad = await loadManifestBootstrap(args, consola); + const manifestBase = manifestLoad?.bootstrap ?? null; + // Fetch + checksum the tarball, then extract the manifest BEFORE we // print any reassuring "tarball looks fine" lines. A 200 from a CDN // can serve an HTML 404 page; we want the failure to land before the @@ -189,6 +218,32 @@ async function runPublish(args: PublishArgs): Promise { } consola.info(`Publishing as ${pc.bold(session.handle ?? session.did)} (${pc.dim(session.did)})`); + // Verify the manifest's pinned publisher matches the active session + // before we open any network connections to the PDS. The check runs + // here (rather than at manifest-load time) because it depends on + // session.did, which we only know now. + if (manifestLoad?.manifest.publisher !== undefined) { + try { + const check = await checkPublisher({ + manifestPublisher: manifestLoad.manifest.publisher, + sessionDid: session.did, + }); + if (check.kind === "mismatch") { + throw new CliError( + `Manifest pins publisher to ${pc.bold(check.pinnedDisplay)} (${check.pinnedDid}), but the active session is ${session.did}. ` + + `Either switch sessions (\`emdash-registry switch ${check.pinnedDid}\`), or edit the manifest if you are transferring the plugin to a new publisher.`, + 1, + "MANIFEST_PUBLISHER_MISMATCH", + ); + } + } catch (error) { + if (error instanceof PublisherCheckError) { + throw new CliError(error.message, 1, error.code); + } + throw error; + } + } + const oauthSession = await resumeSession(session.did); const publisher = PublishingClient.fromHandler({ handler: oauthSession, @@ -196,7 +251,15 @@ async function runPublish(args: PublishArgs): Promise { pds: session.pds, }); + // Build the final ProfileBootstrap. Layer ordering: + // 1. manifest values (if any) at the bottom + // 2. flag values on top (explicit caller intent wins) + // Each layer only writes a key when the caller provided it; missing + // keys remain missing so the API's "required on first publish" checks + // fire at the right time. Spreading `null` is a no-op, so the + // no-manifest path doesn't need a fallback object. const profile: ProfileBootstrap = { + ...manifestBase, ...(args.license !== undefined ? { license: args.license } : {}), ...(args["author-name"] !== undefined ? { authorName: args["author-name"] } : {}), ...(args["author-url"] !== undefined ? { authorUrl: args["author-url"] } : {}), @@ -222,6 +285,26 @@ async function runPublish(args: PublishArgs): Promise { logger, }); + // Post-publish: pin the active session's DID back to the manifest if + // the user didn't pin one themselves. This is a convenience, not a + // publish requirement — failures are logged but don't fail the + // command (the publish already committed to the PDS). + // + // The handle is passed for the line-comment annotation; the CLI + // itself only ever uses the DID for the equality check. + if (manifestLoad && manifestLoad.manifest.publisher === undefined) { + await writePublisherBack({ + manifestPath: manifestLoad.path, + sessionDid: session.did, + // session.handle is nullable; normalise to undefined for the + // optional argument so the absence-vs-empty distinction stays + // clean at the writePublisherBack boundary. + sessionHandle: session.handle ?? undefined, + onInfo: (m) => consola.info(m), + onWarn: (m) => consola.warn(m), + }); + } + // Subsequent-publish: warn about ignored first-publish-only flags. if (!result.profileCreated && result.ignoredProfileFields.length > 0) { const flags = result.ignoredProfileFields.map(profileFieldToFlag).join(", "); @@ -319,10 +402,72 @@ type PublishArgs = { "author-email"?: string; "security-email"?: string; "security-url"?: string; + manifest?: string; + "no-manifest"?: boolean; "allow-overwrite"?: boolean; json?: boolean; }; +/** + * Result of resolving the manifest for `runPublish`. Surfaces both the + * derived ProfileBootstrap (the publish API's input) and the normalised + * manifest itself, so downstream code can run the publisher-pin check + * and the post-publish write-back without re-parsing the file. + */ +interface ManifestLoadOutcome { + /** Resolved absolute path to the manifest file. */ + path: string; + /** Normalised manifest (single/multi-author forms collapsed). */ + manifest: NormalisedManifest; + /** Bridged ProfileBootstrap for the legacy publish-API input. */ + bootstrap: ProfileBootstrap; +} + +/** + * Resolve the manifest layer for `runPublish`. Returns `null` when no + * manifest was loaded (either suppressed by --no-manifest or the + * default-path file is missing). Throws a CliError when the user + * explicitly named a manifest path that couldn't be loaded. + * + * Surfaces a warning for any field the manifest carries that publish + * doesn't yet read end-to-end (issues #1029-#1033). Those fields are + * not silently dropped at publish — they're accepted in the manifest + * today so plugin authors can start writing them; the rest of the + * pipeline catches up issue-by-issue. + */ +async function loadManifestBootstrap( + args: PublishArgs, + log: { info(m: string): void; warn(m: string): void }, +): Promise { + if (args["no-manifest"]) return null; + const explicit = args.manifest !== undefined && args.manifest.length > 0; + const path = args.manifest ?? `./${MANIFEST_FILENAME}`; + try { + const { manifest, path: resolvedPath } = await loadManifest(path); + const normalised = normaliseManifest(manifest); + log.info(`Loaded manifest: ${pc.dim(resolvedPath)}`); + const unwired = findUnwiredManifestFields(normalised); + for (const u of unwired) { + log.warn( + `Manifest field ${pc.bold(u.field)} is accepted but not yet published end-to-end (tracking in ${u.issue}). It will be ignored until that issue lands.`, + ); + } + return { + path: resolvedPath, + manifest: normalised, + bootstrap: manifestToProfileBootstrap(normalised), + }; + } catch (error) { + if (error instanceof ManifestError) { + // Default-path miss: not an error. Legacy flag-only callers + // keep working when they have no manifest file. + if (!explicit && error.code === "MANIFEST_NOT_FOUND") return null; + throw new CliError(error.message, 1, error.code); + } + throw error; + } +} + // ── helpers ────────────────────────────────────────────────────────────────── /** diff --git a/packages/registry-cli/src/commands/validate.ts b/packages/registry-cli/src/commands/validate.ts new file mode 100644 index 000000000..e373af9b0 --- /dev/null +++ b/packages/registry-cli/src/commands/validate.ts @@ -0,0 +1,92 @@ +/** + * `emdash-registry validate [path]` + * + * Validate an `emdash-plugin.jsonc` manifest against the v1 schema. + * + * Exit codes: + * + * - 0: manifest is schema-valid. + * - 1: validation failed; details on stderr (human mode) or stdout (JSON mode). + * - 2: usage error (e.g. invalid `--json` combination). + * + * The CLI does not check publish-time invariants here (e.g. license required + * on first publish vs ignored on subsequent). Those checks live in + * `publishRelease` and require network access. `validate` is a fast, offline + * sanity check that's safe to wire into `pre-commit` / CI. + */ + +import { defineCommand } from "citty"; +import consola from "consola"; +import pc from "picocolors"; + +import { ManifestError, loadManifest, MANIFEST_FILENAME } from "../manifest/load.js"; +import { findUnwiredManifestFields, normaliseManifest } from "../manifest/translate.js"; + +export const validateCommand = defineCommand({ + meta: { + name: "validate", + description: + "Validate an emdash-plugin.jsonc manifest against the v1 schema (offline; no network access).", + }, + args: { + path: { + type: "positional", + required: false, + description: `Path to the manifest, or the directory containing it. Defaults to ./${MANIFEST_FILENAME}.`, + }, + json: { + type: "boolean", + description: + "Emit machine-readable JSON instead of human-readable output. Stdout is { ok: true, path, unwired } or { ok: false, error: { code, message, issues } }. Exit code mirrors human mode.", + }, + }, + async run({ args }) { + const path = args.path ?? "."; + try { + const { manifest, path: resolved } = await loadManifest(path); + const normalised = normaliseManifest(manifest); + const unwired = findUnwiredManifestFields(normalised); + + if (args.json) { + process.stdout.write( + `${JSON.stringify({ + ok: true, + path: resolved, + unwired: unwired.map((u) => ({ field: u.field, issue: u.issue })), + })}\n`, + ); + return; + } + + consola.success(`Manifest is valid: ${pc.dim(resolved)}`); + if (unwired.length > 0) { + consola.warn( + "Some fields are accepted by the manifest schema but aren't yet read by `publish`. They'll be wired through in the issues listed.", + ); + for (const u of unwired) { + consola.warn(` ${pc.bold(u.field)} -> ${u.issue}`); + } + } + } catch (error) { + if (error instanceof ManifestError) { + if (args.json) { + process.stdout.write( + `${JSON.stringify({ + ok: false, + error: { + code: error.code, + message: error.message, + path: error.path, + issues: error.issues, + }, + })}\n`, + ); + } else { + consola.error(error.message); + } + process.exit(1); + } + throw error; + } + }, +}); diff --git a/packages/registry-cli/src/index.ts b/packages/registry-cli/src/index.ts index e832b140e..e3c62c3f1 100644 --- a/packages/registry-cli/src/index.ts +++ b/packages/registry-cli/src/index.ts @@ -13,6 +13,7 @@ * - info — show details about a package * - bundle — bundle a plugin source directory into a tarball * - publish — publish a release that points at a hosted tarball + * - validate — validate an emdash-plugin.jsonc manifest against the v1 schema * * EXPERIMENTAL: this CLI targets `com.emdashcms.experimental.*` and the * experimental aggregator. Pin to an exact version while RFC 0001 is in flight. @@ -27,6 +28,7 @@ import { logoutCommand } from "./commands/logout.js"; import { publishCommand } from "./commands/publish.js"; import { searchCommand } from "./commands/search.js"; import { switchCommand } from "./commands/switch.js"; +import { validateCommand } from "./commands/validate.js"; import { whoamiCommand } from "./commands/whoami.js"; const main = defineCommand({ @@ -43,6 +45,7 @@ const main = defineCommand({ info: infoCommand, bundle: bundleCommand, publish: publishCommand, + validate: validateCommand, }, }); diff --git a/packages/registry-cli/src/manifest/load.ts b/packages/registry-cli/src/manifest/load.ts new file mode 100644 index 000000000..5b06e0f76 --- /dev/null +++ b/packages/registry-cli/src/manifest/load.ts @@ -0,0 +1,312 @@ +/** + * Read and validate an `emdash-plugin.jsonc` manifest from disk. + * + * Three failure modes, each with a distinct error code for scriptable + * consumers (`validate --json`, programmatic API users): + * + * - `MANIFEST_NOT_FOUND` — file doesn't exist at the resolved path. + * - `MANIFEST_PARSE_ERROR` — JSONC parse failure (trailing comma, missing + * bracket, control char in string). Includes line + column from + * `jsonc-parser`'s offset. + * - `MANIFEST_VALIDATION_ERROR` — JSONC parsed cleanly but the value + * failed the Zod schema. Includes the field path and the offending + * value's location in the source where possible. + * + * The line/column mapping is critical for editor-side workflows: a user + * running `emdash-registry validate` from a CI step wants the same kind of + * pointer they'd get from `tsc` or `eslint`, not a Zod issue tree. + */ + +import { readFile } from "node:fs/promises"; +import { resolve } from "node:path"; + +import { parseTree, type Node, type ParseError, printParseErrorCode } from "jsonc-parser"; +import type { ZodIssue } from "zod"; + +import { ManifestSchema, type Manifest } from "./schema.js"; + +/** + * Conventional manifest filename. Lives next to the plugin's `package.json`. + */ +export const MANIFEST_FILENAME = "emdash-plugin.jsonc"; + +export type ManifestErrorCode = + | "MANIFEST_NOT_FOUND" + | "MANIFEST_PARSE_ERROR" + | "MANIFEST_VALIDATION_ERROR"; + +export class ManifestError extends Error { + override readonly name = "ManifestError"; + readonly code: ManifestErrorCode; + /** Resolved absolute path of the manifest file. */ + readonly path: string; + /** + * Issues for `MANIFEST_VALIDATION_ERROR`. One per failed rule, each + * carrying a JSON pointer-style path and an optional source location. + * Empty for the other error codes. + */ + readonly issues: ManifestIssue[]; + + constructor( + code: ManifestErrorCode, + message: string, + path: string, + issues: ManifestIssue[] = [], + ) { + super(message); + this.code = code; + this.path = path; + this.issues = issues; + } +} + +export interface ManifestIssue { + /** Dotted/bracketed JSON path, e.g. `authors[0].email`. */ + path: string; + message: string; + /** 1-indexed line and column in the manifest source, when known. */ + location?: { line: number; column: number }; +} + +export interface LoadManifestResult { + manifest: Manifest; + /** Resolved absolute path. */ + path: string; +} + +/** + * Load and validate a manifest at `path`. `path` may be a directory (in + * which case `emdash-plugin.jsonc` is appended) or a file. + * + * Throws `ManifestError` on every failure path. Successful return guarantees + * the manifest is schema-valid (but normalisation to the publish-input + * shape still needs `./translate.ts`). + */ +export async function loadManifest(path: string): Promise { + const resolved = resolve(path); + // Heuristic: paths that end in `.jsonc` or `.json` are treated as + // files; everything else is treated as a directory. We don't `stat` + // to disambiguate because the error path "missing file" should be the + // same regardless of which form the caller passed. + const filePath = + resolved.endsWith(".jsonc") || resolved.endsWith(".json") + ? resolved + : resolve(resolved, MANIFEST_FILENAME); + + let source: string; + try { + source = await readFile(filePath, "utf8"); + } catch (error) { + if (isNodeNotFoundError(error)) { + throw new ManifestError( + "MANIFEST_NOT_FOUND", + `No manifest at ${filePath}. Create one with: emdash-registry init`, + filePath, + ); + } + throw error; + } + + return parseAndValidate(source, filePath); +} + +/** + * Variant for callers that already have the source text in hand (tests, + * editor integrations that read the buffer). The `path` argument is used + * for error messages only. + */ +export function parseAndValidateManifest(source: string, path: string): LoadManifestResult { + return parseAndValidate(source, path); +} + +// ────────────────────────────────────────────────────────────────────────── +// Internals +// ────────────────────────────────────────────────────────────────────────── + +function parseAndValidate(source: string, filePath: string): LoadManifestResult { + const parseErrors: ParseError[] = []; + // `parseTree` gives us both the parsed value AND the syntax tree, so we + // can map a Zod issue's path back to a source offset. `parse` alone + // loses that information. + const root = parseTree(source, parseErrors, { + // Comments are part of the JSONC contract. Trailing commas are + // allowed because they reduce diff noise when the user adds a new + // field at the end. + disallowComments: false, + allowTrailingComma: true, + allowEmptyContent: false, + }); + + if (parseErrors.length > 0) { + const first = parseErrors[0]!; + const { line, column } = offsetToLineCol(source, first.offset); + throw new ManifestError( + "MANIFEST_PARSE_ERROR", + `${filePath}:${line}:${column}: ${printParseErrorCode(first.error)}`, + filePath, + ); + } + + if (!root) { + // Shouldn't be reachable when `allowEmptyContent: false` is set and + // `parseErrors` is empty, but `parseTree`'s return type is nullable. + throw new ManifestError("MANIFEST_PARSE_ERROR", `${filePath}: file is empty`, filePath); + } + + const value = nodeToValue(root); + + const result = ManifestSchema.safeParse(value); + if (!result.success) { + const issues = result.error.issues.map((issue) => zodIssueToManifestIssue(issue, source, root)); + const summary = issues + .map((i) => { + const loc = i.location ? `:${i.location.line}:${i.location.column}` : ""; + return `${filePath}${loc}: ${i.path ? `${i.path}: ` : ""}${i.message}`; + }) + .join("\n"); + throw new ManifestError( + "MANIFEST_VALIDATION_ERROR", + `Manifest validation failed:\n${summary}`, + filePath, + issues, + ); + } + + return { manifest: result.data, path: filePath }; +} + +/** + * Map a Zod issue to a manifest issue. The path translation strips the + * leading `$` that some Zod versions prepend and produces the JSONC-style + * `authors[0].email` syntax users will recognise. + * + * Zod 4 types path segments as `PropertyKey` (string | number | symbol). + * Symbols cannot appear in a JSON-parsed value's path (JSON has no symbol + * keys), so we narrow defensively and treat any stray symbol as an + * opaque "" string in the displayed path. + */ +function zodIssueToManifestIssue(issue: ZodIssue, source: string, root: Node): ManifestIssue { + const path = narrowZodPath(issue.path); + const pathStr = formatZodPath(path); + const node = findNodeAtPath(root, path); + const offset = node?.offset; + const location = offset !== undefined ? offsetToLineCol(source, offset) : undefined; + return location + ? { path: pathStr, message: issue.message, location } + : { path: pathStr, message: issue.message }; +} + +/** + * Coerce a Zod 4 issue path (`PropertyKey[]`) to the string|number form + * the rest of the loader uses. A symbol segment is impossible for JSONC + * input, but we render it defensively rather than crashing. + */ +function narrowZodPath(path: ReadonlyArray): Array { + return path.map((segment) => { + if (typeof segment === "string" || typeof segment === "number") return segment; + return segment.toString(); + }); +} + +/** + * Format a Zod path array as `authors[0].email`. Numbers become bracketed + * indices; strings become dot-prefixed (except the first). + */ +function formatZodPath(path: ReadonlyArray): string { + let out = ""; + for (const segment of path) { + if (typeof segment === "number") { + out += `[${segment}]`; + } else { + out += out.length === 0 ? segment : `.${segment}`; + } + } + return out; +} + +/** + * Walk the JSONC syntax tree to find the node at a given path. Returns + * `undefined` if the path traverses through a missing key (which is + * common for "required" issues: the field doesn't exist in the source, + * so we point at the parent object instead). + */ +function findNodeAtPath(root: Node, path: ReadonlyArray): Node | undefined { + let current: Node | undefined = root; + for (const segment of path) { + if (!current) return undefined; + if (typeof segment === "number") { + if (current.type !== "array" || !current.children) return current; + current = current.children[segment]; + } else { + if (current.type !== "object" || !current.children) return current; + const prop = current.children.find( + (c) => + c.type === "property" && + c.children?.[0]?.type === "string" && + c.children[0].value === segment, + ); + // `property` node's children are [keyNode, valueNode]. We want + // the value for further traversal. + current = prop?.children?.[1]; + } + } + return current; +} + +/** + * Convert a JSONC syntax-tree node to its plain JavaScript value. The + * `parseTree` API doesn't return values directly; this walks the tree. + * + * We can't use `jsonc-parser`'s `parse()` (which would give us the value + * directly) because we need the tree anyway for error-location mapping, + * and parsing twice doubles the work for a file we're about to validate. + */ +function nodeToValue(node: Node): unknown { + switch (node.type) { + case "object": { + const obj: Record = {}; + for (const prop of node.children ?? []) { + if (prop.type !== "property") continue; + const [keyNode, valueNode] = prop.children ?? []; + if (!keyNode || keyNode.type !== "string" || !valueNode) continue; + if (typeof keyNode.value !== "string") continue; + obj[keyNode.value] = nodeToValue(valueNode); + } + return obj; + } + case "array": + return (node.children ?? []).map((child) => nodeToValue(child)); + case "string": + case "number": + case "boolean": + case "null": + return node.value; + default: + return undefined; + } +} + +/** + * Convert a byte offset in `source` into 1-indexed line + column. Matches + * the convention `tsc` and `eslint` use for error pointers. + */ +function offsetToLineCol(source: string, offset: number): { line: number; column: number } { + let line = 1; + let column = 1; + const max = Math.min(offset, source.length); + for (let i = 0; i < max; i++) { + if (source.charCodeAt(i) === 10 /* \n */) { + line++; + column = 1; + } else { + column++; + } + } + return { line, column }; +} + +function isNodeNotFoundError(error: unknown): boolean { + return ( + error instanceof Error && "code" in error && (error as { code: unknown }).code === "ENOENT" + ); +} diff --git a/packages/registry-cli/src/manifest/publisher.ts b/packages/registry-cli/src/manifest/publisher.ts new file mode 100644 index 000000000..dccd15fda --- /dev/null +++ b/packages/registry-cli/src/manifest/publisher.ts @@ -0,0 +1,290 @@ +/** + * Verify that the active session matches the manifest's pinned `publisher`, + * and write the publisher back to the manifest on first publish. + * + * Two paths, depending on the manifest state at publish time: + * + * 1. Manifest pins a publisher (DID or handle). + * - DID: compare verbatim against the session DID. Mismatch is an + * immediate, no-override error. The user must `emdash-registry switch` + * to the right session, or edit the manifest if they're transferring + * the plugin. + * - Handle: resolve to a DID via `@atcute/identity-resolver`, then + * compare. Resolution failures surface as a distinct error code so + * the user can tell "wrong handle" from "wrong account". + * 2. Manifest omits `publisher`. + * - Publish proceeds with the active session. + * - On success, the CLI writes `"publisher": ""` back + * to the manifest file using `jsonc-parser`'s `modify` + `applyEdits` + * so comments and formatting are preserved. + * + * The write-back is a post-publish convenience: failures here MUST NOT + * roll back or fail the publish. The publish has already committed to the + * publisher's PDS by this point. + * + * The DID-only write-back rule (we never write a handle) is documented + * in #1028. Hand-written handles are respected verbatim; the user can + * still pin a handle if they prefer the readability. + */ + +import { randomUUID } from "node:crypto"; +import { readFile, rename, writeFile } from "node:fs/promises"; +import { dirname, join } from "node:path"; + +import { isDid, isHandle, type Did, type Handle } from "@atcute/lexicons/syntax"; +import { applyEdits, modify, parseTree, printParseErrorCode, type ParseError } from "jsonc-parser"; + +import { createActorResolver } from "../oauth.js"; + +/** + * Result of comparing a manifest's pinned publisher against the active + * session DID. The shape encodes the three downstream cases: + * + * - `match`: publisher pinned, resolved to the session DID. Publish + * proceeds; no write-back. + * - `unpinned`: publisher omitted. Publish proceeds; write-back + * scheduled for after the successful publish. + * - `mismatch`: publisher pinned but doesn't resolve to the session DID. + * Publish refuses; the caller throws. + */ +export type PublisherCheck = + | { kind: "match"; pinnedDid: Did } + | { kind: "unpinned" } + | { kind: "mismatch"; pinnedDid: Did; pinnedDisplay: string }; + +export type PublisherCheckErrorCode = "MANIFEST_PUBLISHER_UNRESOLVED"; + +export class PublisherCheckError extends Error { + override readonly name = "PublisherCheckError"; + readonly code: PublisherCheckErrorCode; + constructor(code: PublisherCheckErrorCode, message: string) { + super(message); + this.code = code; + } +} + +/** + * Compare a manifest's `publisher` value (if any) against the active + * session's DID. Returns a structured outcome rather than throwing on + * mismatch — the caller decides how to render the error so the CLI's + * human + JSON output paths can format consistently. + * + * Throws `PublisherCheckError` only for *failures of the check itself* + * (e.g. the handle couldn't be resolved to a DID). Logical mismatch is + * a successful check result with `kind: "mismatch"`. + */ +export async function checkPublisher(input: { + manifestPublisher: string | undefined; + sessionDid: Did; +}): Promise { + if (input.manifestPublisher === undefined) { + return { kind: "unpinned" }; + } + + const pinned = input.manifestPublisher; + + if (isDid(pinned)) { + if (pinned === input.sessionDid) { + return { kind: "match", pinnedDid: pinned }; + } + return { kind: "mismatch", pinnedDid: pinned, pinnedDisplay: pinned }; + } + + if (isHandle(pinned)) { + const resolved = await resolveHandleToDid(pinned); + if (resolved === input.sessionDid) { + return { kind: "match", pinnedDid: resolved }; + } + return { kind: "mismatch", pinnedDid: resolved, pinnedDisplay: pinned }; + } + + // Should be unreachable: the schema validates the syntax, so an + // invalid value can only reach here when the caller bypassed + // validation. We surface a generic resolver error rather than + // crashing, so the failure path stays consistent. + throw new PublisherCheckError( + "MANIFEST_PUBLISHER_UNRESOLVED", + `publisher value "${pinned}" is neither a DID nor a handle. Edit the manifest to use a valid DID or handle.`, + ); +} + +/** + * Resolve an atproto handle to a DID via the same actor-resolver the + * OAuth flow uses (DoH + .well-known). Surfaces resolution failures + * with a clear hint pointing the user at the DID-pin escape hatch. + */ +async function resolveHandleToDid(handle: Handle): Promise { + const resolver = createActorResolver(); + try { + const resolved = await resolver.resolve(handle); + return resolved.did; + } catch (error) { + const reason = error instanceof Error ? error.message : String(error); + throw new PublisherCheckError( + "MANIFEST_PUBLISHER_UNRESOLVED", + `could not resolve handle "${handle}" to a DID: ${reason}. ` + + `To avoid the lookup, replace the handle with the DID directly in the manifest (publisher: "did:plc:...").`, + ); + } +} + +/** + * Write the session DID back to the manifest as the `publisher` field, + * inserting it right after `license` to give a stable canonical order. + * + * The DID is the value the CLI compares against on subsequent publishes; + * the handle (when provided) is appended as a JSONC line comment for + * human readability of `git diff` output. The CLI ignores the comment + * — handle changes don't break the pin, only DID changes do. + * + * Re-reads the source from disk first and re-parses to detect concurrent + * edits. If the file changed (publisher already set, parse errors, or + * the file is gone), the write-back is skipped with a warning rather + * than overwriting the user's edits. + * + * Errors are caught and surfaced as warnings to `onWarn`. The publish + * has already succeeded by the time this runs; a failed write-back must + * not fail the publish. + */ +export async function writePublisherBack(input: { + manifestPath: string; + sessionDid: Did; + /** + * Optional handle of the active session, rendered as a line comment + * next to the inserted DID. The comment is purely informational; the + * CLI never reads it back. Omit for sessions that have no handle + * (e.g. did-only logins). + */ + sessionHandle?: string; + onInfo?: (message: string) => void; + onWarn?: (message: string) => void; +}): Promise { + const { manifestPath, sessionDid, sessionHandle, onInfo, onWarn } = input; + try { + const source = await readFile(manifestPath, "utf8"); + + // Defensive re-parse: confirm `publisher` is still absent. If + // the user added one while we were publishing, leave their value + // alone. Same if the file no longer parses cleanly. `parseTree` + // is lenient and returns a partial tree on malformed input, so + // we have to inspect the errors array — checking the root's + // type alone misses things like "missing closing brace". + const parseErrors: ParseError[] = []; + const root = parseTree(source, parseErrors, { + disallowComments: false, + allowTrailingComma: true, + allowEmptyContent: false, + }); + if (parseErrors.length > 0) { + const first = parseErrors[0]!; + onWarn?.( + `Skipped writing publisher to ${manifestPath} (file no longer parses: ${printParseErrorCode(first.error)}).`, + ); + return; + } + if (!root || root.type !== "object") { + onWarn?.( + `Skipped writing publisher to ${manifestPath} (file no longer parses as a JSONC object).`, + ); + return; + } + const hasPublisher = root.children?.some( + (prop) => + prop.type === "property" && + prop.children?.[0]?.type === "string" && + prop.children[0].value === "publisher", + ); + if (hasPublisher) { + onInfo?.(`Skipped writing publisher to ${manifestPath} (already set by user).`); + return; + } + + // `modify` returns a list of text edits; `applyEdits` resolves + // them against the source. This is the JSONC-aware path that + // preserves comments and existing whitespace. + // + // `formattingOptions.insertSpaces: false` matches the repo's + // tab-indented JSONC convention. The `getInsertionIndex` callback + // places `publisher` immediately after `license` (or at the end + // of the object if `license` isn't present, which shouldn't + // happen for a schema-valid manifest but is handled defensively). + const edits = modify(source, ["publisher"], sessionDid, { + formattingOptions: { insertSpaces: false, tabSize: 1 }, + getInsertionIndex: (existingProps) => { + const licenseIdx = existingProps.indexOf("license"); + if (licenseIdx >= 0) return licenseIdx + 1; + return existingProps.length; + }, + }); + if (edits.length === 0) { + onWarn?.( + `Skipped writing publisher to ${manifestPath} (no edits produced; file may be malformed).`, + ); + return; + } + const applied = applyEdits(source, edits); + + // Append a `// ` line comment to the inserted publisher + // line, if we have a handle. The comment is for human readers of + // `git diff`; the CLI itself never parses it back out. We locate + // the inserted line by matching on the DID string (opaque enough + // to be unique within a single-publisher manifest) and append + // the comment before the line terminator. + // + // The substitution runs ONCE: if the DID happens to appear + // elsewhere (it shouldn't for a fresh insertion, but defensively + // anyway), only the first match is annotated. + const updated = sessionHandle + ? annotatePublisherLine(applied, sessionDid, sessionHandle) + : applied; + + // Atomic write: tmpfile + rename. POSIX rename is atomic, so a + // crash mid-write leaves the previous file intact rather than + // truncating the publisher's manifest. + const tmp = join(dirname(manifestPath), `.${randomUUID()}.tmp`); + await writeFile(tmp, updated, "utf8"); + await rename(tmp, manifestPath); + onInfo?.(`Pinned publisher to ${sessionDid} in ${manifestPath}.`); + } catch (error) { + const reason = error instanceof Error ? error.message : String(error); + onWarn?.( + `Could not pin publisher to ${manifestPath}: ${reason}. ` + + `The publish succeeded; you can add publisher manually on your next edit.`, + ); + } +} + +/** + * Append `// ` to the line containing the freshly-inserted DID. + * + * The match is anchored to the line containing `""` (the DID is + * always quoted in JSON) so a substring collision in a different value + * is impossible. The line may end in `",\n"` (interior key) or `"\n"` + * (trailing key); we insert the comment BEFORE the line terminator so + * the comma stays adjacent to the value. + * + * If the DID isn't found, returns the input unchanged — the publish-back + * already succeeded; an unannotated line is a degraded outcome but not + * a failure. + * + * No sanitisation of the handle is needed: `session.handle` is + * populated by atproto's identity resolver at login time, which only + * accepts values that (a) match the handle syntax (no control chars, + * no `/`, no `*`, no whitespace) and (b) round-trip via DoH or + * `.well-known` to the session DID. An attacker who can put arbitrary + * bytes into `session.handle` already controls the user's identity. + */ +function annotatePublisherLine(source: string, did: Did, handle: string): string { + if (handle.length === 0) return source; + // Match the DID inside its quotes on one line. The DID was emitted + // by `JSON.stringify` via `jsonc-parser`, so it's safely escaped. + const needle = `"${did}"`; + const lineEnd = source.indexOf("\n", source.indexOf(needle)); + if (lineEnd < 0) return source; + // Walk back past any trailing CR so the comment lands at the end + // of the *content*, not after a literal "\r" on Windows-authored + // files. + let insertAt = lineEnd; + if (insertAt > 0 && source[insertAt - 1] === "\r") insertAt -= 1; + return `${source.slice(0, insertAt)} // ${handle}${source.slice(insertAt)}`; +} diff --git a/packages/registry-cli/src/manifest/schema.ts b/packages/registry-cli/src/manifest/schema.ts new file mode 100644 index 000000000..3a5b5de1d --- /dev/null +++ b/packages/registry-cli/src/manifest/schema.ts @@ -0,0 +1,350 @@ +/** + * Zod schema for `emdash-plugin.jsonc` — the publisher-authored manifest that + * sits next to a plugin's source and feeds the registry CLI's `publish`, + * `validate`, and `init` commands. + * + * Relationship to the lexicon + * --------------------------- + * + * This schema is NOT the lexicon. The lexicon + * (`com.emdashcms.experimental.package.profile`) is the on-wire atproto + * record format, optimised for content-addressed storage and aggregator + * indexing. This schema is the authoring format, optimised for a human + * editing a file in VS Code with `$schema`-powered IDE completion. + * + * Fields that exist in BOTH places use the lexicon's field names verbatim + * (`license`, `keywords`, `repo`, `name`, `description`). Fields that the + * publisher cannot reasonably write by hand are derived at publish time and + * do not appear here: `id` (full AT URI requires the publisher's DID), + * `type` (always `"emdash-plugin"` from this CLI), `slug` (derived from the + * bundled `manifest.json`'s `id`), `lastUpdated` (set at publish time), + * `artifacts.package` (filled in from the fetched tarball), `extensions` + * (computed from the bundled manifest's capabilities + allowedHosts). + * + * The translation step lives in `./translate.ts`. + * + * Single-vs-multi-author convenience + * ---------------------------------- + * + * The lexicon stores `authors` and `security` as arrays. The overwhelmingly + * common case is one author and one security contact, so the manifest + * accepts both shapes: + * + * // single-author + * { "author": { "name": "Jane Doe" }, "security": { "email": "..." } } + * + * // multi-author + * { "authors": [{ "name": "..." }, { "name": "..." }], + * "securityContacts": [{ "email": "..." }] } + * + * `loadManifest` normalises both forms to the array shape before passing to + * publish. You can't mix forms for the same field (e.g. `author` AND + * `authors`); the schema rejects that. + * + * Strict mode + * ----------- + * + * Unknown keys are rejected with `.strict()`. This catches typos like + * `"licens": "MIT"` rather than letting them silently fall through. The + * tradeoff is that adding a field requires a CLI release; we accept that + * cost for v1 and may revisit after one cycle of field-add (issue #1029). + */ + +import { isDid, isHandle } from "@atcute/lexicons/syntax"; +import { z } from "zod"; + +// ────────────────────────────────────────────────────────────────────────── +// Field-level schemas — exported so tests can target individual rules. +// +// Each field uses `.meta({ description })` so the descriptions flow into +// the generated JSON Schema and surface as inline hover hints in editors +// that support `$schema`-driven completion (VS Code, IntelliJ). +// ────────────────────────────────────────────────────────────────────────── + +/** + * SPDX license expression. The lexicon caps this at 256 chars. We don't + * validate the SPDX grammar here — the registry aggregator does that and + * gives clearer errors. We DO refuse the empty string and obvious garbage + * (whitespace-only) so the publish command can surface a useful message + * before any network round-trip. + */ +export const LicenseSchema = z + .string() + .min(1, 'license must be a non-empty SPDX expression (e.g. "MIT")') + .max(256, "license must be <= 256 characters (SPDX expressions are short)") + .refine((v) => v.trim().length > 0, "license cannot be whitespace-only") + .meta({ + title: "License", + description: + 'SPDX license expression (e.g. "MIT", "Apache-2.0", "MIT OR Apache-2.0"). Required on first publish; ignored on subsequent publishes (the existing profile wins).', + examples: ["MIT", "Apache-2.0", "MIT OR Apache-2.0"], + }); + +/** + * One author. Mirrors `profile.json#author`. The lexicon says authors + * "SHOULD specify at least one of url or email"; we don't enforce that + * here because anonymous-but-named authors are a legitimate (if + * discouraged) shape. The publish command surfaces it as a warning. + */ +export const AuthorSchema = z + .object({ + name: z + .string() + .min(1, "author.name cannot be empty") + .max(256, "author.name must be <= 256 characters") + .meta({ description: "Display name." }), + url: z + .string() + .url("author.url must be a valid URL") + .max(1024, "author.url must be <= 1024 characters") + .meta({ + description: "Author's homepage or profile URL. Either this or `email` is recommended.", + }) + .optional(), + email: z + .string() + .email("author.email must be a valid email") + .max(256, "author.email must be <= 256 characters") + .meta({ description: "Author's contact email. Either this or `url` is recommended." }) + .optional(), + }) + .strict() + .meta({ + title: "Author", + description: "A single author entry. Mirrors the lexicon's author shape.", + }); + +/** + * One security contact. Mirrors `profile.json#contact`. The lexicon + * mandates "at least one of url or email MUST be present"; Lexicon JSON + * can't express "required one-of", so we enforce it here. Without this + * check a publisher could write `{ "security": {} }` and the publish + * record would carry an empty contact (which aggregators reject anyway, + * but failing here is a better user experience). + */ +export const SecurityContactSchema = z + .object({ + url: z + .string() + .url("security.url must be a valid URL") + .max(1024, "security.url must be <= 1024 characters") + .meta({ + description: + "Security disclosure URL (e.g. a security.txt or vulnerability-reporting page). Either this or `email` is required.", + }) + .optional(), + email: z + .string() + .email("security.email must be a valid email") + .max(256, "security.email must be <= 256 characters") + .meta({ + description: "Security contact email. Either this or `url` is required.", + }) + .optional(), + }) + .strict() + .refine( + (v) => v.url !== undefined || v.email !== undefined, + "security contact must have at least one of `url` or `email`", + ) + .meta({ + title: "Security contact", + description: "A single security contact. At least one of `url` or `email` must be present.", + }); + +/** + * Publisher identity, used to verify the active session matches the + * manifest's pinned publisher at publish time. Accepts a DID or a handle. + * + * Recommended form: DID (`did:plc:...`). DIDs are durable — they survive + * handle changes. Handles are friendlier to read but mutable: if the + * publisher's handle changes, the manifest needs an update. + * + * Omitted on first publish, the CLI writes the active session's DID + * back into the manifest automatically. Subsequent publishes verify + * against the pinned value. + * + * Validation is structural only here: DID syntax (`did:method:id`) or + * handle syntax (`name.tld`). The actual resolve-to-DID step happens at + * publish time via `@atcute/identity-resolver`. + */ +export const PublisherSchema = z + .string() + .refine( + (v) => isDid(v) || isHandle(v), + 'publisher must be an atproto DID (e.g. "did:plc:abc123") or handle (e.g. "example.com")', + ) + .meta({ + title: "Publisher", + description: + "Atproto DID or handle of the publishing identity. Pinned on first publish to prevent accidental publishes from a different account. DIDs are recommended (durable); handles work but are mutable.", + examples: ["did:plc:abc123def456", "example.com"], + }); + +/** Optional human-readable display name. Mirrors `profile.json#name`. */ +export const NameSchema = z + .string() + .min(1, "name cannot be empty when set") + .max(1024, "name must be <= 1024 characters") + .meta({ + title: "Display name", + description: + "Human-readable name shown in directory listings. Defaults to the plugin's `id` when omitted.", + }); + +/** Short description. Mirrors `profile.json#description`. */ +export const DescriptionSchema = z + .string() + .min(1, "description cannot be empty when set") + .max(1024, "description must be <= 1024 characters") + .meta({ + title: "Description", + description: + "Short description (<= 140 graphemes by FAIR convention). Aggregators may truncate longer values when displaying in compact lists.", + }); + +/** Search keywords. Mirrors `profile.json#keywords`. */ +export const KeywordsSchema = z + .array( + z.string().min(1, "keyword cannot be empty").max(128, "each keyword must be <= 128 characters"), + ) + .max(5, "keywords array must have <= 5 entries (FAIR convention)") + .meta({ + title: "Keywords", + description: "Search keywords (<= 5 entries, FAIR convention).", + }); + +/** + * Source repository URL. Mirrors `release.json#repo`. The lexicon accepts + * either an HTTPS URL or an AT URI; v1 of the CLI accepts HTTPS only. + * AT-URI source repos can be added in a later issue without changing the + * field name. + * + * We use a regex `pattern` rather than `.refine` for the https-only rule + * so the constraint flows through to the generated JSON Schema. Editors + * doing client-side validation against the schema then surface the same + * error the CLI does. + */ +export const RepoSchema = z + .string() + .regex(/^https:\/\//, "repo must be an https:// URL (AT-URI source repos aren't supported yet)") + .url("repo must be a valid URL") + .max(1024, "repo must be <= 1024 characters") + .meta({ + title: "Source repository", + description: "HTTPS URL of the plugin's source repository. Surfaced in registry listings.", + examples: ["https://github.com/emdash-cms/plugin-gallery"], + }); + +// ────────────────────────────────────────────────────────────────────────── +// Top-level manifest +// ────────────────────────────────────────────────────────────────────────── + +/** + * The full v1 manifest. Unknown keys are rejected by `.strict()` so a + * typo'd field name produces an immediate error rather than passing + * through silently. The cost is that every later issue (#1029, #1030, ...) + * has to extend this schema, which is intentional: the manifest is a + * contract with users and we want changes to be deliberate. + * + * `$schema` is allowed because editors write it automatically for IDE + * completion. It is stripped before validation passes the value to the + * publish translation. + */ +export const ManifestSchema = z + .object({ + // `$schema` is for editor IDE support and the JSON Schema tooling + // chain. It carries no semantic meaning to publish; the loader + // strips it before handing the value off. + $schema: z + .string() + .meta({ + description: + "Path or URL to the JSON Schema describing this file. Editors use this for completion and validation.", + }) + .optional(), + + // Required on first publish, ignored on subsequent publishes (the + // existing profile wins). Same precedence rules as today's + // --license flag. + license: LicenseSchema, + + // Optional publisher pin. Omitted on first publish, the CLI + // writes the active session's DID back here automatically. + publisher: PublisherSchema.optional(), + + // Single-author form. Mutually exclusive with `authors`. + author: AuthorSchema.optional(), + // Multi-author form. Mutually exclusive with `author`. At least one + // entry is required when this field is used. + authors: z + .array(AuthorSchema) + .min(1, "authors[] must have at least one entry") + .max(32, "authors[] must have <= 32 entries (lexicon constraint)") + .meta({ + title: "Authors (multiple)", + description: + "Multi-author form. Mutually exclusive with `author`. Use the singular `author` if there is only one.", + }) + .optional(), + + // Single-contact form. Mutually exclusive with `securityContacts`. + security: SecurityContactSchema.optional(), + // Multi-contact form. Mutually exclusive with `security`. + securityContacts: z + .array(SecurityContactSchema) + .min(1, "securityContacts[] must have at least one entry") + .max(8, "securityContacts[] must have <= 8 entries (lexicon constraint)") + .meta({ + title: "Security contacts (multiple)", + description: + "Multi-contact form. Mutually exclusive with `security`. Use the singular `security` if there is only one.", + }) + .optional(), + + // Optional profile fields. + name: NameSchema.optional(), + description: DescriptionSchema.optional(), + keywords: KeywordsSchema.optional(), + + // Optional release fields. + repo: RepoSchema.optional(), + }) + .strict() + .refine((v) => !(v.author !== undefined && v.authors !== undefined), { + message: + "manifest has both `author` and `authors`. Use one form: `author: { ... }` for a single author, or `authors: [...]` for multiple.", + path: ["authors"], + }) + .refine((v) => !(v.security !== undefined && v.securityContacts !== undefined), { + message: + "manifest has both `security` and `securityContacts`. Use one form: `security: { ... }` for a single contact, or `securityContacts: [...]` for multiple.", + path: ["securityContacts"], + }) + .refine((v) => v.author !== undefined || v.authors !== undefined, { + message: "manifest must specify either `author: { ... }` or `authors: [...]`", + path: ["author"], + }) + .refine((v) => v.security !== undefined || v.securityContacts !== undefined, { + message: "manifest must specify either `security: { ... }` or `securityContacts: [...]`", + path: ["security"], + }) + .meta({ + title: "EmDash plugin manifest", + description: + "Hand-authored manifest for publishing a plugin to the EmDash plugin registry. Lives next to the plugin's `package.json` as `emdash-plugin.jsonc`.", + }); + +/** + * Validated manifest shape. Note: this is the SHAPE AFTER the schema's + * `.refine()` rules have run, not the on-disk shape. The single-form + * convenience fields (`author`, `security`) are still present at this + * stage; normalisation to the array forms happens in `./translate.ts`. + */ +export type Manifest = z.infer; + +/** A single author entry, normalised. */ +export type ManifestAuthor = z.infer; + +/** A single security contact entry, normalised. */ +export type ManifestSecurityContact = z.infer; diff --git a/packages/registry-cli/src/manifest/translate.ts b/packages/registry-cli/src/manifest/translate.ts new file mode 100644 index 000000000..a58e3fad8 --- /dev/null +++ b/packages/registry-cli/src/manifest/translate.ts @@ -0,0 +1,126 @@ +/** + * Translate a validated manifest into the existing publish-input shape. + * + * For v1 (issue #1028), publish consumes only the six `ProfileBootstrap` + * fields the original flag-based UX exposed. The manifest carries those + * plus a small handful of additional fields (`name`, `description`, + * `keywords`, `repo`) that aren't wired through publish yet — those land + * in issues #1029-#1033. + * + * The single-author / single-security-contact convenience forms are + * normalised here: by the time this returns, the caller sees only the + * array shapes the lexicon uses. + */ + +import type { ProfileBootstrap } from "../publish/api.js"; +import type { Manifest, ManifestAuthor, ManifestSecurityContact } from "./schema.js"; + +/** + * Normalised "after the schema's single/multi convenience has been + * collapsed" view of a manifest. The CLI passes this to the publish + * pipeline rather than the raw `Manifest` so the rest of the code + * never has to think about `author` vs `authors`. + * + * Fields not yet consumed by publish (name, description, keywords, repo) + * are passed through unchanged so issues #1029-#1033 can wire them up + * without revisiting this translation step. + */ +export interface NormalisedManifest { + license: string; + /** + * Pinned publisher (DID or handle). Undefined when the manifest + * doesn't pin a publisher; the CLI writes the active session's DID + * back after first publish so this is undefined only on first + * publish or in CI flows where the user opted out via `--no-manifest`. + */ + publisher: string | undefined; + authors: ManifestAuthor[]; + securityContacts: ManifestSecurityContact[]; + name: string | undefined; + description: string | undefined; + keywords: string[] | undefined; + repo: string | undefined; +} + +/** + * Collapse the convenience forms (`author`, `security`) into the array + * forms (`authors`, `securityContacts`). + * + * The manifest schema's `.refine()` rules already guarantee that exactly + * one of each pair is set, so the runtime checks here are defensive — a + * caller that bypassed validation would still produce a coherent result. + */ +export function normaliseManifest(manifest: Manifest): NormalisedManifest { + const authors = manifest.authors ?? (manifest.author ? [manifest.author] : []); + const securityContacts = + manifest.securityContacts ?? (manifest.security ? [manifest.security] : []); + return { + license: manifest.license, + publisher: manifest.publisher, + authors, + securityContacts, + name: manifest.name, + description: manifest.description, + keywords: manifest.keywords, + repo: manifest.repo, + }; +} + +/** + * Convert a normalised manifest into the legacy `ProfileBootstrap` shape + * that `publishRelease` already understands. This is a TEMPORARY bridge + * for v1 — it discards everything but the six fields publish reads today. + * + * Once issue #1029 lands and `publishRelease` accepts the richer profile + * shape directly, this function goes away and we pass the + * `NormalisedManifest` straight through. + * + * Behaviour for multi-author plugins: the first author wins. The publish + * lexicon supports an array, but `ProfileBootstrap` doesn't model that + * today. Issues #1029 fixes this; until then, multi-author manifests + * publish their first author and we emit a warning at the CLI layer. + */ +export function manifestToProfileBootstrap(manifest: NormalisedManifest): ProfileBootstrap { + const author = manifest.authors[0]; + const security = manifest.securityContacts[0]; + + const profile: ProfileBootstrap = { license: manifest.license }; + if (author?.name !== undefined) profile.authorName = author.name; + if (author?.url !== undefined) profile.authorUrl = author.url; + if (author?.email !== undefined) profile.authorEmail = author.email; + if (security?.email !== undefined) profile.securityEmail = security.email; + if (security?.url !== undefined) profile.securityUrl = security.url; + return profile; +} + +/** + * True if the manifest carries fields that v1 publish can't yet consume. + * The CLI uses this to warn the user that those fields are accepted (so + * `validate` passes) but won't appear in the published record until the + * relevant follow-up issue lands. + * + * Centralised here so each follow-up issue has one place to remove its + * field from the warning list when it wires through. + */ +export function findUnwiredManifestFields( + manifest: NormalisedManifest, +): Array<{ field: string; issue: string }> { + const unwired: Array<{ field: string; issue: string }> = []; + if (manifest.name !== undefined) unwired.push({ field: "name", issue: "#1029" }); + if (manifest.description !== undefined) { + unwired.push({ field: "description", issue: "#1029" }); + } + if (manifest.keywords !== undefined) { + unwired.push({ field: "keywords", issue: "#1029" }); + } + if (manifest.repo !== undefined) unwired.push({ field: "repo", issue: "#1029" }); + // Multi-author warning is separate: publish accepts the field but only + // publishes the first entry until #1029 lands. + if (manifest.authors.length > 1) { + unwired.push({ field: "authors[1..]", issue: "#1029" }); + } + if (manifest.securityContacts.length > 1) { + unwired.push({ field: "securityContacts[1..]", issue: "#1029" }); + } + return unwired; +} diff --git a/packages/registry-cli/tests/manifest-load.test.ts b/packages/registry-cli/tests/manifest-load.test.ts new file mode 100644 index 000000000..1ae54a89b --- /dev/null +++ b/packages/registry-cli/tests/manifest-load.test.ts @@ -0,0 +1,184 @@ +/** + * Coverage for the manifest loader. The loader's value-add over plain + * `JSON.parse + Zod.parse` is two-fold: + * + * 1. JSONC tolerance: trailing commas + comments are accepted (matches + * the wrangler.jsonc / tsconfig.json convention). + * 2. Source locations on validation errors: the error path is mapped + * back to a 1-indexed line:column so `emdash-registry validate` + * points editors at the offending field. + * + * The tests below use `parseAndValidateManifest` (the in-memory variant) + * to avoid touching disk for the happy paths and Zod-fail paths. The + * filesystem entry point `loadManifest` is exercised separately for its + * directory-vs-file resolution and ENOENT shape. + */ + +import { mkdtemp, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { afterAll, beforeAll, describe, expect, it } from "vitest"; + +import { + ManifestError, + loadManifest, + MANIFEST_FILENAME, + parseAndValidateManifest, +} from "../src/manifest/load.js"; + +const MINIMAL = `{ + "license": "MIT", + "author": { "name": "Jane Doe" }, + "security": { "email": "security@example.com" } +}`; + +describe("parseAndValidateManifest (in-memory)", () => { + it("parses + validates a minimal manifest", () => { + const { manifest, path } = parseAndValidateManifest(MINIMAL, "/virtual/manifest.jsonc"); + expect(path).toBe("/virtual/manifest.jsonc"); + expect(manifest.license).toBe("MIT"); + expect(manifest.author?.name).toBe("Jane Doe"); + expect(manifest.security?.email).toBe("security@example.com"); + }); + + it("accepts JSONC features (comments + trailing commas)", () => { + const source = `{ + // top-level comment + "license": "MIT", /* block comment */ + "author": { "name": "Jane Doe", }, + "security": { "email": "security@example.com", }, + }`; + const { manifest } = parseAndValidateManifest(source, "/virtual/manifest.jsonc"); + expect(manifest.license).toBe("MIT"); + }); + + describe("MANIFEST_PARSE_ERROR", () => { + it("reports a missing closing brace with line:column", () => { + const source = `{ + "license": "MIT" + "author": { "name": "x" } +`; + expect(() => parseAndValidateManifest(source, "/v/m.jsonc")).toThrow(ManifestError); + try { + parseAndValidateManifest(source, "/v/m.jsonc"); + } catch (error) { + expect(error).toBeInstanceOf(ManifestError); + const err = error as ManifestError; + expect(err.code).toBe("MANIFEST_PARSE_ERROR"); + // Error message includes the file:line:col pointer. + expect(err.message).toMatch(/\/v\/m\.jsonc:\d+:\d+/); + } + }); + + it("rejects an empty file", () => { + expect(() => parseAndValidateManifest("", "/v/m.jsonc")).toThrow(ManifestError); + }); + }); + + describe("MANIFEST_VALIDATION_ERROR", () => { + it("reports the field path", () => { + const source = `{ + "license": "", + "author": { "name": "Jane" }, + "security": { "email": "a@b.com" } +}`; + try { + parseAndValidateManifest(source, "/v/m.jsonc"); + expect.fail("expected ManifestError"); + } catch (error) { + const err = error as ManifestError; + expect(err.code).toBe("MANIFEST_VALIDATION_ERROR"); + const license = err.issues.find((i) => i.path === "license"); + expect(license).toBeDefined(); + // Source location points at the offending VALUE (the empty + // string on line 2), not the key. We don't pin the exact + // column because Zod can emit slightly different paths + // across versions; the line number is enough to confirm + // the mapping works. + expect(license?.location?.line).toBe(2); + } + }); + + it("collects multiple issues in one error", () => { + const source = `{ + "license": "", + "author": { "name": "" }, + "security": {} +}`; + try { + parseAndValidateManifest(source, "/v/m.jsonc"); + expect.fail("expected ManifestError"); + } catch (error) { + const err = error as ManifestError; + expect(err.issues.length).toBeGreaterThanOrEqual(3); + // Every issue must have a path and a message. + for (const issue of err.issues) { + expect(typeof issue.path).toBe("string"); + expect(typeof issue.message).toBe("string"); + } + } + }); + + it("rejects unknown top-level keys with strict mode", () => { + const source = `{ + "license": "MIT", + "licens": "MIT", + "author": { "name": "Jane" }, + "security": { "email": "a@b.com" } +}`; + try { + parseAndValidateManifest(source, "/v/m.jsonc"); + expect.fail("expected ManifestError"); + } catch (error) { + const err = error as ManifestError; + expect(err.code).toBe("MANIFEST_VALIDATION_ERROR"); + } + }); + }); +}); + +describe("loadManifest (filesystem)", () => { + let dir: string; + + beforeAll(async () => { + dir = await mkdtemp(join(tmpdir(), "emdash-manifest-test-")); + }); + + afterAll(async () => { + await rm(dir, { recursive: true, force: true }); + }); + + it("loads from a directory by appending the conventional filename", async () => { + await writeFile(join(dir, MANIFEST_FILENAME), MINIMAL, "utf8"); + const { manifest, path } = await loadManifest(dir); + expect(path.endsWith(MANIFEST_FILENAME)).toBe(true); + expect(manifest.license).toBe("MIT"); + }); + + it("loads from an explicit .jsonc path", async () => { + const filePath = join(dir, "explicit.jsonc"); + await writeFile(filePath, MINIMAL, "utf8"); + const { manifest, path } = await loadManifest(filePath); + expect(path).toBe(filePath); + expect(manifest.license).toBe("MIT"); + }); + + it("loads from an explicit .json path", async () => { + const filePath = join(dir, "explicit.json"); + await writeFile(filePath, MINIMAL, "utf8"); + const { manifest } = await loadManifest(filePath); + expect(manifest.license).toBe("MIT"); + }); + + it("returns MANIFEST_NOT_FOUND when the file is missing", async () => { + const missing = join(dir, "no-such-manifest.jsonc"); + try { + await loadManifest(missing); + expect.fail("expected ManifestError"); + } catch (error) { + expect(error).toBeInstanceOf(ManifestError); + expect((error as ManifestError).code).toBe("MANIFEST_NOT_FOUND"); + } + }); +}); diff --git a/packages/registry-cli/tests/manifest-publisher.test.ts b/packages/registry-cli/tests/manifest-publisher.test.ts new file mode 100644 index 000000000..cad10d4ed --- /dev/null +++ b/packages/registry-cli/tests/manifest-publisher.test.ts @@ -0,0 +1,284 @@ +/** + * Coverage for the manifest publisher field and its check/write-back. + * + * Two subjects: + * - The Zod schema's PublisherSchema (accepts DID or handle; rejects + * anything else). + * - `checkPublisher` and `writePublisherBack` in `manifest/publisher.ts`. + * + * The handle-resolution path is covered indirectly: we stub the resolver + * by passing values that look like a DID directly, which bypasses + * `@atcute/identity-resolver`. Real handle resolution requires DNS, which + * we don't exercise in unit tests. + */ + +import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import type { Did } from "@atcute/lexicons/syntax"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +import { checkPublisher, writePublisherBack } from "../src/manifest/publisher.js"; +import { ManifestSchema, PublisherSchema } from "../src/manifest/schema.js"; + +const SESSION_DID = "did:plc:abc123def456" as Did; +const OTHER_DID = "did:plc:xyz789otherrr" as Did; + +describe("PublisherSchema", () => { + it("accepts a did:plc identifier", () => { + expect(PublisherSchema.parse("did:plc:abc123")).toBe("did:plc:abc123"); + }); + + it("accepts a did:web identifier", () => { + expect(PublisherSchema.parse("did:web:example.com")).toBe("did:web:example.com"); + }); + + it("accepts a handle", () => { + expect(PublisherSchema.parse("example.com")).toBe("example.com"); + expect(PublisherSchema.parse("jane.bsky.social")).toBe("jane.bsky.social"); + }); + + it("rejects a bare slug", () => { + const result = PublisherSchema.safeParse("not-a-handle-or-did"); + expect(result.success).toBe(false); + }); + + it("rejects an empty string", () => { + const result = PublisherSchema.safeParse(""); + expect(result.success).toBe(false); + }); + + it("rejects a malformed did", () => { + // `did:` without method+id is not a valid DID. + const result = PublisherSchema.safeParse("did:"); + expect(result.success).toBe(false); + }); +}); + +describe("ManifestSchema with publisher", () => { + const minimal = { + license: "MIT", + author: { name: "Jane Doe" }, + security: { email: "security@example.com" }, + }; + + it("accepts a manifest with a DID publisher", () => { + const result = ManifestSchema.safeParse({ + ...minimal, + publisher: "did:plc:abc123", + }); + expect(result.success).toBe(true); + }); + + it("accepts a manifest with a handle publisher", () => { + const result = ManifestSchema.safeParse({ + ...minimal, + publisher: "example.com", + }); + expect(result.success).toBe(true); + }); + + it("accepts a manifest without a publisher (first-publish state)", () => { + const result = ManifestSchema.safeParse(minimal); + expect(result.success).toBe(true); + }); + + it("rejects a manifest with an invalid publisher", () => { + const result = ManifestSchema.safeParse({ + ...minimal, + publisher: "not-valid", + }); + expect(result.success).toBe(false); + }); +}); + +describe("checkPublisher", () => { + it("returns 'unpinned' when the manifest has no publisher", async () => { + const result = await checkPublisher({ + manifestPublisher: undefined, + sessionDid: SESSION_DID, + }); + expect(result.kind).toBe("unpinned"); + }); + + it("returns 'match' when a pinned DID equals the session DID", async () => { + const result = await checkPublisher({ + manifestPublisher: SESSION_DID, + sessionDid: SESSION_DID, + }); + expect(result.kind).toBe("match"); + if (result.kind === "match") { + expect(result.pinnedDid).toBe(SESSION_DID); + } + }); + + it("returns 'mismatch' when a pinned DID differs from the session DID", async () => { + const result = await checkPublisher({ + manifestPublisher: OTHER_DID, + sessionDid: SESSION_DID, + }); + expect(result.kind).toBe("mismatch"); + if (result.kind === "mismatch") { + expect(result.pinnedDid).toBe(OTHER_DID); + expect(result.pinnedDisplay).toBe(OTHER_DID); + } + }); + + // Handle resolution requires DNS / .well-known reachability. We test + // the DID code path directly; the handle path is exercised in + // integration tests separately (and against a mock resolver in + // publisher-handle.test.ts when we add one later). +}); + +describe("writePublisherBack", () => { + let dir: string; + + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), "emdash-publisher-test-")); + }); + + afterEach(async () => { + await rm(dir, { recursive: true, force: true }); + }); + + it("inserts publisher after license, preserving comments and order", async () => { + const path = join(dir, "emdash-plugin.jsonc"); + const source = `{ + // Top-level comment + "license": "MIT", + "author": { "name": "Jane Doe" }, + "security": { "email": "security@example.com" } +} +`; + await writeFile(path, source, "utf8"); + await writePublisherBack({ manifestPath: path, sessionDid: SESSION_DID }); + const updated = await readFile(path, "utf8"); + // The new key is present, the comment survives, and the DID is + // written verbatim. + expect(updated).toContain(`"publisher": "${SESSION_DID}"`); + expect(updated).toContain("// Top-level comment"); + expect(updated).toContain('"license": "MIT"'); + // The publisher must land AFTER license. We test ordering by + // finding the byte offset of each key. + const licenseIdx = updated.indexOf('"license"'); + const publisherIdx = updated.indexOf('"publisher"'); + const authorIdx = updated.indexOf('"author"'); + expect(licenseIdx).toBeLessThan(publisherIdx); + expect(publisherIdx).toBeLessThan(authorIdx); + }); + + it("appends a // comment when a session handle is provided", async () => { + const path = join(dir, "emdash-plugin.jsonc"); + const source = `{ + "license": "MIT", + "author": { "name": "Jane Doe" }, + "security": { "email": "security@example.com" } +} +`; + await writeFile(path, source, "utf8"); + await writePublisherBack({ + manifestPath: path, + sessionDid: SESSION_DID, + sessionHandle: "jane.bsky.social", + }); + const updated = await readFile(path, "utf8"); + // The comment lives on the same line as the DID; the comma (if + // any) is between the DID and the comment per JSONC convention. + expect(updated).toMatch(/"publisher": "did:plc:abc123def456",? \/\/ jane\.bsky\.social/); + // And the round-trip still parses cleanly (the comment doesn't + // break JSONC). + const { loadManifest } = await import("../src/manifest/load.js"); + const { manifest } = await loadManifest(path); + expect(manifest.publisher).toBe(SESSION_DID); + }); + + it("omits the comment when no handle is provided", async () => { + const path = join(dir, "emdash-plugin.jsonc"); + const source = `{ + "license": "MIT", + "author": { "name": "Jane Doe" }, + "security": { "email": "security@example.com" } +} +`; + await writeFile(path, source, "utf8"); + await writePublisherBack({ manifestPath: path, sessionDid: SESSION_DID }); + const updated = await readFile(path, "utf8"); + // The DID line has no trailing `//` comment. + const publisherLine = updated.split("\n").find((l) => l.includes('"publisher"')); + expect(publisherLine).toBeDefined(); + expect(publisherLine).not.toContain("//"); + }); + + it("does not overwrite an existing publisher (defensive re-parse)", async () => { + const path = join(dir, "emdash-plugin.jsonc"); + const source = `{ + "license": "MIT", + "publisher": "did:plc:user-pinned-already", + "author": { "name": "Jane Doe" }, + "security": { "email": "security@example.com" } +} +`; + await writeFile(path, source, "utf8"); + let warnings = 0; + let infos = 0; + await writePublisherBack({ + manifestPath: path, + sessionDid: SESSION_DID, + onInfo: () => infos++, + onWarn: () => warnings++, + }); + const updated = await readFile(path, "utf8"); + // File unchanged: the existing publisher wins. + expect(updated).toContain('"publisher": "did:plc:user-pinned-already"'); + expect(updated).not.toContain(SESSION_DID); + expect(infos).toBe(1); + expect(warnings).toBe(0); + }); + + it("does not fail when the file is missing (warns only)", async () => { + const path = join(dir, "no-such-file.jsonc"); + let warnings = 0; + await writePublisherBack({ + manifestPath: path, + sessionDid: SESSION_DID, + onWarn: () => warnings++, + }); + // The publish has already succeeded by the time write-back runs; + // a missing file at this point is surprising but never fatal. + expect(warnings).toBe(1); + }); + + it("does not fail when the file no longer parses (warns only)", async () => { + const path = join(dir, "broken.jsonc"); + // User broke the file while we were publishing. + await writeFile(path, '{ "license": "MIT", broken syntax', "utf8"); + let warnings = 0; + await writePublisherBack({ + manifestPath: path, + sessionDid: SESSION_DID, + onWarn: () => warnings++, + }); + expect(warnings).toBe(1); + }); + + it("produces a JSONC document that round-trips through the loader", async () => { + const path = join(dir, "emdash-plugin.jsonc"); + const source = `{ + "license": "MIT", + "author": { "name": "Jane Doe" }, + "security": { "email": "security@example.com" } +} +`; + await writeFile(path, source, "utf8"); + await writePublisherBack({ manifestPath: path, sessionDid: SESSION_DID }); + + // Re-load through the actual loader. If write-back produced + // malformed JSONC or a value the schema rejects, this throws. + const { loadManifest } = await import("../src/manifest/load.js"); + const { manifest } = await loadManifest(path); + expect(manifest.publisher).toBe(SESSION_DID); + expect(manifest.license).toBe("MIT"); + expect(manifest.author?.name).toBe("Jane Doe"); + }); +}); diff --git a/packages/registry-cli/tests/manifest-schema.test.ts b/packages/registry-cli/tests/manifest-schema.test.ts new file mode 100644 index 000000000..a3a28a636 --- /dev/null +++ b/packages/registry-cli/tests/manifest-schema.test.ts @@ -0,0 +1,256 @@ +/** + * Coverage for the `emdash-plugin.jsonc` Zod schema. + * + * The schema is the authoring contract with plugin publishers. A regression + * here means user-visible behaviour change in the field names, validation + * rules, or error messages that publishers rely on. Tests are organised by + * field so a future field add lands cleanly alongside its own test block. + * + * Where applicable, tests assert on the EXACT Zod issue path / message + * because those strings surface in `emdash-registry validate` output -- + * users see them, and silently changing them breaks anyone who built + * tooling around the strings. + */ + +import { describe, expect, it } from "vitest"; + +import { + AuthorSchema, + LicenseSchema, + ManifestSchema, + RepoSchema, + SecurityContactSchema, +} from "../src/manifest/schema.js"; + +describe("LicenseSchema", () => { + it("accepts a typical SPDX expression", () => { + expect(LicenseSchema.parse("MIT")).toBe("MIT"); + expect(LicenseSchema.parse("Apache-2.0")).toBe("Apache-2.0"); + expect(LicenseSchema.parse("MIT OR Apache-2.0")).toBe("MIT OR Apache-2.0"); + }); + + it("rejects the empty string", () => { + const result = LicenseSchema.safeParse(""); + expect(result.success).toBe(false); + }); + + it("rejects a whitespace-only license", () => { + const result = LicenseSchema.safeParse(" "); + expect(result.success).toBe(false); + }); + + it("rejects values over 256 characters", () => { + const result = LicenseSchema.safeParse("A".repeat(257)); + expect(result.success).toBe(false); + }); +}); + +describe("AuthorSchema", () => { + it("accepts the minimal name-only form", () => { + expect(AuthorSchema.parse({ name: "Jane Doe" })).toEqual({ name: "Jane Doe" }); + }); + + it("accepts name + url + email", () => { + const author = { + name: "Jane Doe", + url: "https://example.com", + email: "jane@example.com", + }; + expect(AuthorSchema.parse(author)).toEqual(author); + }); + + it("rejects an empty name", () => { + const result = AuthorSchema.safeParse({ name: "" }); + expect(result.success).toBe(false); + }); + + it("rejects unknown keys (strict mode)", () => { + const result = AuthorSchema.safeParse({ name: "Jane", website: "https://example.com" }); + expect(result.success).toBe(false); + }); + + it("rejects a malformed URL", () => { + const result = AuthorSchema.safeParse({ name: "Jane", url: "not-a-url" }); + expect(result.success).toBe(false); + }); + + it("rejects a malformed email", () => { + const result = AuthorSchema.safeParse({ name: "Jane", email: "not-an-email" }); + expect(result.success).toBe(false); + }); +}); + +describe("SecurityContactSchema", () => { + it("accepts an email-only contact", () => { + expect(SecurityContactSchema.parse({ email: "security@example.com" })).toEqual({ + email: "security@example.com", + }); + }); + + it("accepts a url-only contact", () => { + expect(SecurityContactSchema.parse({ url: "https://example.com/security" })).toEqual({ + url: "https://example.com/security", + }); + }); + + it("rejects an empty contact (no email or url)", () => { + const result = SecurityContactSchema.safeParse({}); + expect(result.success).toBe(false); + if (!result.success) { + // The exact message users see at validate-time; pin it. + expect(result.error.issues[0]?.message).toContain("at least one of `url` or `email`"); + } + }); +}); + +describe("RepoSchema", () => { + it("accepts an https:// URL", () => { + expect(RepoSchema.parse("https://github.com/example/plugin")).toBe( + "https://github.com/example/plugin", + ); + }); + + it("rejects http:// URLs", () => { + const result = RepoSchema.safeParse("http://github.com/example/plugin"); + expect(result.success).toBe(false); + }); + + it("rejects non-URL strings", () => { + const result = RepoSchema.safeParse("not a url"); + expect(result.success).toBe(false); + }); +}); + +describe("ManifestSchema (full document)", () => { + const minimal = { + license: "MIT", + author: { name: "Jane Doe" }, + security: { email: "security@example.com" }, + }; + + it("accepts the minimal required shape", () => { + const result = ManifestSchema.safeParse(minimal); + expect(result.success).toBe(true); + }); + + it("accepts a manifest with $schema for IDE completion", () => { + const result = ManifestSchema.safeParse({ + ...minimal, + $schema: "./node_modules/@emdash-cms/registry-cli/schemas/emdash-plugin.schema.json", + }); + expect(result.success).toBe(true); + }); + + it("accepts the multi-author/multi-contact form", () => { + const result = ManifestSchema.safeParse({ + license: "MIT", + authors: [{ name: "Alice" }, { name: "Bob" }], + securityContacts: [{ email: "alice@example.com" }, { url: "https://example.com/security" }], + }); + expect(result.success).toBe(true); + }); + + it("rejects mixing `author` and `authors`", () => { + const result = ManifestSchema.safeParse({ + license: "MIT", + author: { name: "Alice" }, + authors: [{ name: "Bob" }], + security: { email: "security@example.com" }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues[0]?.message).toContain("both `author` and `authors`"); + } + }); + + it("rejects mixing `security` and `securityContacts`", () => { + const result = ManifestSchema.safeParse({ + license: "MIT", + author: { name: "Alice" }, + security: { email: "a@example.com" }, + securityContacts: [{ email: "b@example.com" }], + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues[0]?.message).toContain("both `security` and `securityContacts`"); + } + }); + + it("requires either `author` or `authors`", () => { + const result = ManifestSchema.safeParse({ + license: "MIT", + security: { email: "security@example.com" }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues[0]?.message).toContain("`author: { ... }`"); + } + }); + + it("requires either `security` or `securityContacts`", () => { + const result = ManifestSchema.safeParse({ + license: "MIT", + author: { name: "Alice" }, + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error.issues[0]?.message).toContain("`security: { ... }`"); + } + }); + + it("rejects unknown top-level keys (strict mode catches typos)", () => { + const result = ManifestSchema.safeParse({ + ...minimal, + licens: "MIT", // typo + }); + expect(result.success).toBe(false); + }); + + it("rejects an empty authors array (lexicon requires >= 1)", () => { + const result = ManifestSchema.safeParse({ + license: "MIT", + authors: [], + security: { email: "security@example.com" }, + }); + expect(result.success).toBe(false); + }); + + it("rejects more than 32 authors (lexicon cap)", () => { + const authors = Array.from({ length: 33 }, (_, i) => ({ name: `Author ${i}` })); + const result = ManifestSchema.safeParse({ + license: "MIT", + authors, + security: { email: "security@example.com" }, + }); + expect(result.success).toBe(false); + }); + + it("rejects more than 5 keywords (FAIR convention)", () => { + const result = ManifestSchema.safeParse({ + ...minimal, + keywords: ["a", "b", "c", "d", "e", "f"], + }); + expect(result.success).toBe(false); + }); + + it("accepts a full populated manifest", () => { + const result = ManifestSchema.safeParse({ + $schema: "./node_modules/@emdash-cms/registry-cli/schemas/emdash-plugin.schema.json", + license: "MIT", + author: { + name: "Jane Doe", + url: "https://example.com", + email: "jane@example.com", + }, + security: { + email: "security@example.com", + url: "https://example.com/security", + }, + name: "Gallery", + description: "Image gallery block for EmDash.", + keywords: ["gallery", "images", "media"], + repo: "https://github.com/emdash-cms/plugin-gallery", + }); + expect(result.success).toBe(true); + }); +}); diff --git a/packages/registry-cli/tests/schema-drift.test.ts b/packages/registry-cli/tests/schema-drift.test.ts new file mode 100644 index 000000000..01882686b --- /dev/null +++ b/packages/registry-cli/tests/schema-drift.test.ts @@ -0,0 +1,60 @@ +/** + * Guard against drift between the Zod source of truth and the committed + * JSON Schema at `schemas/emdash-plugin.schema.json`. + * + * The committed JSON Schema is shipped to users via + * `node_modules/@emdash-cms/registry-cli/schemas/emdash-plugin.schema.json` + * so editors can offer completion and validation without running our CLI. + * If a contributor changes the Zod schema and forgets to regenerate, this + * test fails with a clear "run pnpm gen-schema" instruction. + * + * We assert byte-for-byte equality after re-running the same `toJSONSchema` + * call the generator script uses. The generator's wrapping fields (`$id`, + * `title`, `description`) are added on top so we replicate them here. + */ + +import { readFile } from "node:fs/promises"; +import { resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { describe, expect, it } from "vitest"; +import { z } from "zod"; + +import { ManifestSchema } from "../src/manifest/schema.js"; + +const HERE = fileURLToPath(new URL(".", import.meta.url)); +const COMMITTED_SCHEMA_PATH = resolve(HERE, "..", "schemas", "emdash-plugin.schema.json"); + +describe("JSON Schema drift", () => { + it("matches the output of z.toJSONSchema(ManifestSchema)", async () => { + const committed = await readFile(COMMITTED_SCHEMA_PATH, "utf8"); + + // Reproduce the generator script's emit. If this diverges from + // `scripts/gen-schema.ts`, update both (the script is the + // canonical version users run; this is its mirror). + const jsonSchema = z.toJSONSchema(ManifestSchema, { + target: "draft-2020-12", + reused: "ref", + }); + const document = { + $schema: "https://json-schema.org/draft/2020-12/schema", + $id: "https://emdashcms.com/schemas/emdash-plugin.schema.json", + title: "EmDash plugin manifest (emdash-plugin.jsonc)", + description: + "Authoring format for publishing plugins to the EmDash plugin registry. Translated to the on-wire atproto record format at publish time. See https://github.com/emdash-cms/emdash/issues/1028.", + ...jsonSchema, + }; + const regenerated = `${JSON.stringify(document, null, "\t")}\n`; + + // On failure, the diff is enormous and unreadable. Surface a + // pointer to the fix command instead. + if (committed !== regenerated) { + throw new Error( + "schemas/emdash-plugin.schema.json is out of date with the Zod schema.\n" + + "Run: pnpm --filter @emdash-cms/registry-cli gen-schema\n" + + "Then commit the result.", + ); + } + expect(committed).toBe(regenerated); + }); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1978a7350..a7a1950e4 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -210,6 +210,9 @@ catalogs: better-sqlite3: specifier: ^12.8.0 version: 12.8.0 + jsonc-parser: + specifier: ^3.3.1 + version: 3.3.1 publint: specifier: 0.3.17 version: 0.3.17 @@ -235,7 +238,7 @@ catalogs: specifier: ^4.83.0 version: 4.90.0 zod: - specifier: ^4.3.6 + specifier: ^4.4.1 version: 4.4.1 importers: @@ -838,6 +841,12 @@ importers: specifier: 'catalog:' version: 4.90.0(@cloudflare/workers-types@4.20260305.1) + infra/plugins-site: + devDependencies: + wrangler: + specifier: 'catalog:' + version: 4.90.0(@cloudflare/workers-types@4.20260305.1) + packages/admin: dependencies: '@atcute/identity-resolver': @@ -1855,6 +1864,9 @@ importers: image-size: specifier: ^2.0.2 version: 2.0.2 + jsonc-parser: + specifier: 'catalog:' + version: 3.3.1 modern-tar: specifier: ^0.7.5 version: 0.7.6 @@ -1864,6 +1876,9 @@ importers: tsdown: specifier: 'catalog:' version: 0.20.3(@arethetypeswrong/core@0.18.2)(@typescript/native-preview@7.0.0-dev.20260213.1)(oxc-resolver@11.16.4)(publint@0.3.17)(typescript@5.9.3) + zod: + specifier: 'catalog:' + version: 4.4.1 devDependencies: '@arethetypeswrong/cli': specifier: 'catalog:' @@ -14778,7 +14793,7 @@ snapshots: sirv: 3.0.2 tinyglobby: 0.2.16 tinyrainbow: 3.1.0 - vitest: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.10.13)(@vitest/browser-playwright@4.1.5)(@vitest/ui@4.1.5)(jsdom@26.1.0)(vite@6.4.1(@types/node@24.10.13)(jiti@2.6.1)(lightningcss@1.32.0)(tsx@4.21.0)(yaml@2.8.2)) + vitest: 4.1.5(@opentelemetry/api@1.9.0)(@types/node@24.10.13)(@vitest/browser-playwright@4.1.5)(@vitest/ui@4.1.5)(jsdom@26.1.0)(vite@8.0.11(@types/node@24.10.13)(esbuild@0.27.3)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.2)) '@vitest/utils@4.1.5': dependencies: diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index afc762847..a997cef15 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -13,6 +13,7 @@ packages: - infra/cache-demo - infra/perf-monitor - infra/discord-bot + - infra/plugins-site catalog: "@arethetypeswrong/cli": ^0.18.2 @@ -81,6 +82,7 @@ catalog: "@types/node": 24.10.13 "@types/react": 19.2.14 "@types/react-dom": 19.2.3 + jsonc-parser: ^3.3.1 astro: ^6.0.1 astro-iconset: ^0.0.4 better-sqlite3: ^12.8.0 @@ -92,9 +94,4 @@ catalog: vite: ^8.0.11 vitest: ^4.1.5 wrangler: ^4.83.0 - # Catalog-pin Zod so the whole workspace dedupes on a single instance. - # Zod 4 embeds the version in the type, so even ^4.3.6 vs ^4.4.1 produce - # structurally incompatible ZodType across packages that mix astro/zod - # and emdash's zod (e.g. trusted plugins like @emdash-cms/plugin-forms - # importing 'z' from astro/zod and passing schemas to definePlugin). - zod: ^4.3.6 + zod: ^4.4.1 From 94171cee978e84cb8000a6d956c2ecce4b8f4ca4 Mon Sep 17 00:00:00 2001 From: Matt Kane Date: Thu, 14 May 2026 11:41:36 +0100 Subject: [PATCH 2/6] fix(registry-cli): manifest loader hardening + drop in-development warnings Adversarial review findings: - Annotation in writePublisherBack now anchors to the publisher property via the parse tree, not a raw DID string search. Previously, a description (or any other field) that contained the DID would have taken the // comment. - Loader rejects manifests over 1 MiB (MANIFEST_TOO_LARGE) so a mistyped --manifest at a huge file fails fast instead of OOMing. - Loader rejects duplicate JSONC keys with a source location, so a reviewer can't be fooled by an honest "publisher": "did:plc:X" at the top of the file that's silently overridden by a hostile value below. - writePublisherBack hashes the original source, re-reads the file immediately before rename, and aborts if the bytes changed. Narrows the TOCTOU window between concurrent publishes / editor saves. - publish accepts --manifest=false and --manifest= as opt-out shorthands, matching the help text. - publish warns when --no-manifest is set and a manifest exists at the default path, so the publisher pin's safety story stays visible. Cleanup: - Removed the findUnwiredManifestFields helper and the in-product warnings about fields the schema accepts but publish doesn't yet consume. The release ships when the work is done; leaking intermediate state into the CLI is noise. Tests: 170 passing. --- .../schemas/emdash-plugin.schema.json | 23 ++- packages/registry-cli/src/commands/publish.ts | 28 ++-- .../registry-cli/src/commands/validate.ts | 23 +-- packages/registry-cli/src/manifest/load.ts | 101 ++++++++++++- .../registry-cli/src/manifest/publisher.ts | 134 ++++++++++++++---- .../registry-cli/src/manifest/translate.ts | 58 +------- .../tests/manifest-publisher.test.ts | 33 +++++ .../tests/manifest-translate.test.ts | 78 ++++++++++ 8 files changed, 364 insertions(+), 114 deletions(-) create mode 100644 packages/registry-cli/tests/manifest-translate.test.ts diff --git a/packages/registry-cli/schemas/emdash-plugin.schema.json b/packages/registry-cli/schemas/emdash-plugin.schema.json index 5de992e21..2615dcbe0 100644 --- a/packages/registry-cli/schemas/emdash-plugin.schema.json +++ b/packages/registry-cli/schemas/emdash-plugin.schema.json @@ -39,7 +39,9 @@ "$ref": "#/$defs/__schema18" } }, - "required": ["license"], + "required": [ + "license" + ], "additionalProperties": false, "$defs": { "__schema0": { @@ -52,13 +54,20 @@ "maxLength": 256, "title": "License", "description": "SPDX license expression (e.g. \"MIT\", \"Apache-2.0\", \"MIT OR Apache-2.0\"). Required on first publish; ignored on subsequent publishes (the existing profile wins).", - "examples": ["MIT", "Apache-2.0", "MIT OR Apache-2.0"] + "examples": [ + "MIT", + "Apache-2.0", + "MIT OR Apache-2.0" + ] }, "__schema2": { "type": "string", "title": "Publisher", "description": "Atproto DID or handle of the publishing identity. Pinned on first publish to prevent accidental publishes from a different account. DIDs are recommended (durable); handles work but are mutable.", - "examples": ["did:plc:abc123def456", "example.com"] + "examples": [ + "did:plc:abc123def456", + "example.com" + ] }, "__schema3": { "$ref": "#/$defs/__schema4" @@ -76,7 +85,9 @@ "$ref": "#/$defs/__schema7" } }, - "required": ["name"], + "required": [ + "name" + ], "additionalProperties": false, "title": "Author", "description": "A single author entry. Mirrors the lexicon's author shape." @@ -185,7 +196,9 @@ "pattern": "^https:\\/\\/", "title": "Source repository", "description": "HTTPS URL of the plugin's source repository. Surfaced in registry listings.", - "examples": ["https://github.com/emdash-cms/plugin-gallery"] + "examples": [ + "https://github.com/emdash-cms/plugin-gallery" + ] } } } diff --git a/packages/registry-cli/src/commands/publish.ts b/packages/registry-cli/src/commands/publish.ts index 3a9e22c58..1785d8ad4 100644 --- a/packages/registry-cli/src/commands/publish.ts +++ b/packages/registry-cli/src/commands/publish.ts @@ -32,7 +32,6 @@ import { formatBytes, MAX_BUNDLE_SIZE, validateBundleSize } from "../bundle/util import { loadManifest, MANIFEST_FILENAME, ManifestError } from "../manifest/load.js"; import { checkPublisher, PublisherCheckError, writePublisherBack } from "../manifest/publisher.js"; import { - findUnwiredManifestFields, manifestToProfileBootstrap, normaliseManifest, type NormalisedManifest, @@ -439,19 +438,32 @@ async function loadManifestBootstrap( args: PublishArgs, log: { info(m: string): void; warn(m: string): void }, ): Promise { - if (args["no-manifest"]) return null; + const optedOut = + args["no-manifest"] === true || args.manifest === "false" || args.manifest === ""; + if (optedOut) { + // `--no-manifest` is a power-user escape hatch (CI, debugging), + // but silently skipping a manifest at the default path defeats + // the publisher-pin safety story. If the file exists, warn that + // the pin (if any) won't be checked. We probe via stat to keep + // the path cheap: no parse, no schema validation. + const defaultPath = `./${MANIFEST_FILENAME}`; + try { + const { stat } = await import("node:fs/promises"); + await stat(defaultPath); + log.warn( + `Skipping manifest at ${defaultPath} (--no-manifest is set). Publisher pin and license/security defaults are NOT being applied for this publish.`, + ); + } catch { + // No manifest at the default path; nothing to warn about. + } + return null; + } const explicit = args.manifest !== undefined && args.manifest.length > 0; const path = args.manifest ?? `./${MANIFEST_FILENAME}`; try { const { manifest, path: resolvedPath } = await loadManifest(path); const normalised = normaliseManifest(manifest); log.info(`Loaded manifest: ${pc.dim(resolvedPath)}`); - const unwired = findUnwiredManifestFields(normalised); - for (const u of unwired) { - log.warn( - `Manifest field ${pc.bold(u.field)} is accepted but not yet published end-to-end (tracking in ${u.issue}). It will be ignored until that issue lands.`, - ); - } return { path: resolvedPath, manifest: normalised, diff --git a/packages/registry-cli/src/commands/validate.ts b/packages/registry-cli/src/commands/validate.ts index e373af9b0..09ef43072 100644 --- a/packages/registry-cli/src/commands/validate.ts +++ b/packages/registry-cli/src/commands/validate.ts @@ -20,7 +20,6 @@ import consola from "consola"; import pc from "picocolors"; import { ManifestError, loadManifest, MANIFEST_FILENAME } from "../manifest/load.js"; -import { findUnwiredManifestFields, normaliseManifest } from "../manifest/translate.js"; export const validateCommand = defineCommand({ meta: { @@ -37,36 +36,20 @@ export const validateCommand = defineCommand({ json: { type: "boolean", description: - "Emit machine-readable JSON instead of human-readable output. Stdout is { ok: true, path, unwired } or { ok: false, error: { code, message, issues } }. Exit code mirrors human mode.", + "Emit machine-readable JSON instead of human-readable output. Stdout is { ok: true, path } or { ok: false, error: { code, message, issues } }. Exit code mirrors human mode.", }, }, async run({ args }) { const path = args.path ?? "."; try { - const { manifest, path: resolved } = await loadManifest(path); - const normalised = normaliseManifest(manifest); - const unwired = findUnwiredManifestFields(normalised); + const { path: resolved } = await loadManifest(path); if (args.json) { - process.stdout.write( - `${JSON.stringify({ - ok: true, - path: resolved, - unwired: unwired.map((u) => ({ field: u.field, issue: u.issue })), - })}\n`, - ); + process.stdout.write(`${JSON.stringify({ ok: true, path: resolved })}\n`); return; } consola.success(`Manifest is valid: ${pc.dim(resolved)}`); - if (unwired.length > 0) { - consola.warn( - "Some fields are accepted by the manifest schema but aren't yet read by `publish`. They'll be wired through in the issues listed.", - ); - for (const u of unwired) { - consola.warn(` ${pc.bold(u.field)} -> ${u.issue}`); - } - } } catch (error) { if (error instanceof ManifestError) { if (args.json) { diff --git a/packages/registry-cli/src/manifest/load.ts b/packages/registry-cli/src/manifest/load.ts index 5b06e0f76..3ba57494a 100644 --- a/packages/registry-cli/src/manifest/load.ts +++ b/packages/registry-cli/src/manifest/load.ts @@ -17,7 +17,7 @@ * pointer they'd get from `tsc` or `eslint`, not a Zod issue tree. */ -import { readFile } from "node:fs/promises"; +import { readFile, stat } from "node:fs/promises"; import { resolve } from "node:path"; import { parseTree, type Node, type ParseError, printParseErrorCode } from "jsonc-parser"; @@ -30,8 +30,19 @@ import { ManifestSchema, type Manifest } from "./schema.js"; */ export const MANIFEST_FILENAME = "emdash-plugin.jsonc"; +/** + * Hard cap on the bytes we'll buffer for a manifest. The largest real-world + * v1 manifest under the current schema is a few hundred bytes; even a heavily- + * populated future version with all the long-form sections from issue #1030 + * (5 sections × 20 KB cap each from the lexicon) tops out under 128 KB. We + * pick 1 MiB so accidental mis-targets (`--manifest ./large.tar`) fail fast + * with a clear error rather than OOMing the CLI. + */ +export const MANIFEST_MAX_BYTES = 1024 * 1024; + export type ManifestErrorCode = | "MANIFEST_NOT_FOUND" + | "MANIFEST_TOO_LARGE" | "MANIFEST_PARSE_ERROR" | "MANIFEST_VALIDATION_ERROR"; @@ -93,11 +104,37 @@ export async function loadManifest(path: string): Promise { ? resolved : resolve(resolved, MANIFEST_FILENAME); + // Stat first so a wildly-oversized file fails before we buffer it. + // `readFile` itself doesn't honour any cap, so we'd otherwise read + // the whole thing into memory before any size check could fire. + let size: number; + try { + const info = await stat(filePath); + size = info.size; + } catch (error) { + if (isNodeNotFoundError(error)) { + throw new ManifestError( + "MANIFEST_NOT_FOUND", + `No manifest at ${filePath}. Create one with: emdash-registry init`, + filePath, + ); + } + throw error; + } + if (size > MANIFEST_MAX_BYTES) { + throw new ManifestError( + "MANIFEST_TOO_LARGE", + `Manifest at ${filePath} is ${size} bytes; the maximum supported size is ${MANIFEST_MAX_BYTES} bytes. Check that you pointed --manifest at the right file.`, + filePath, + ); + } + let source: string; try { source = await readFile(filePath, "utf8"); } catch (error) { if (isNodeNotFoundError(error)) { + // Race: file was removed between stat and readFile. throw new ManifestError( "MANIFEST_NOT_FOUND", `No manifest at ${filePath}. Create one with: emdash-registry init`, @@ -150,7 +187,28 @@ function parseAndValidate(source: string, filePath: string): LoadManifestResult if (!root) { // Shouldn't be reachable when `allowEmptyContent: false` is set and // `parseErrors` is empty, but `parseTree`'s return type is nullable. - throw new ManifestError("MANIFEST_PARSE_ERROR", `${filePath}: file is empty`, filePath); + throw new ManifestError( + "MANIFEST_PARSE_ERROR", + `${filePath}: file is empty`, + filePath, + ); + } + + // Reject duplicate keys before validation. `nodeToValue` is last-wins + // (matches JSON.parse semantics) which silently shadows earlier keys + // — review-hostile for a security-sensitive document like this. We + // scan once for duplicates and surface them as a parse error with a + // source location, so a `git diff` reviewer can't be fooled by a + // "publisher": "" at the top of the file that gets overridden + // by "publisher": "" further down. + const duplicate = findDuplicateKey(root); + if (duplicate) { + const { line, column } = offsetToLineCol(source, duplicate.offset); + throw new ManifestError( + "MANIFEST_PARSE_ERROR", + `${filePath}:${line}:${column}: duplicate key "${duplicate.key}". Each property may only be declared once.`, + filePath, + ); } const value = nodeToValue(root); @@ -253,6 +311,45 @@ function findNodeAtPath(root: Node, path: ReadonlyArray): Node return current; } +/** + * Recursively scan an object node for duplicate property names. Returns + * the FIRST duplicate found (innermost-first within the recursion, but + * order across siblings is the order in the source) with its offset for + * line:column reporting. + * + * We scan the entire tree, not just the root: duplicate keys inside + * `author: { ... }` or `security: { ... }` are equally review-hostile. + */ +function findDuplicateKey( + node: Node, +): { key: string; offset: number } | undefined { + if (node.type === "object" && node.children) { + const seen = new Set(); + for (const prop of node.children) { + if (prop.type !== "property") continue; + const keyNode = prop.children?.[0]; + if (!keyNode || keyNode.type !== "string" || typeof keyNode.value !== "string") { + continue; + } + if (seen.has(keyNode.value)) { + return { key: keyNode.value, offset: keyNode.offset }; + } + seen.add(keyNode.value); + const valueNode = prop.children?.[1]; + if (valueNode) { + const nested = findDuplicateKey(valueNode); + if (nested) return nested; + } + } + } else if (node.type === "array" && node.children) { + for (const child of node.children) { + const nested = findDuplicateKey(child); + if (nested) return nested; + } + } + return undefined; +} + /** * Convert a JSONC syntax-tree node to its plain JavaScript value. The * `parseTree` API doesn't return values directly; this walks the tree. diff --git a/packages/registry-cli/src/manifest/publisher.ts b/packages/registry-cli/src/manifest/publisher.ts index dccd15fda..3b37581f8 100644 --- a/packages/registry-cli/src/manifest/publisher.ts +++ b/packages/registry-cli/src/manifest/publisher.ts @@ -27,13 +27,15 @@ * still pin a handle if they prefer the readability. */ -import { randomUUID } from "node:crypto"; -import { readFile, rename, writeFile } from "node:fs/promises"; +import { createHash, randomUUID } from "node:crypto"; +import { readFile, rename, stat, writeFile } from "node:fs/promises"; import { dirname, join } from "node:path"; import { isDid, isHandle, type Did, type Handle } from "@atcute/lexicons/syntax"; import { applyEdits, modify, parseTree, printParseErrorCode, type ParseError } from "jsonc-parser"; +import { MANIFEST_MAX_BYTES } from "./load.js"; + import { createActorResolver } from "../oauth.js"; /** @@ -161,6 +163,17 @@ export async function writePublisherBack(input: { }): Promise { const { manifestPath, sessionDid, sessionHandle, onInfo, onWarn } = input; try { + // Stat first: if the file ballooned between load and write-back + // (e.g. user pasted in a large changelog mid-publish), don't + // buffer the new contents. The post-publish path should never + // touch a file the loader wouldn't have accepted. + const info = await stat(manifestPath); + if (info.size > MANIFEST_MAX_BYTES) { + onWarn?.( + `Skipped writing publisher to ${manifestPath} (file is now ${info.size} bytes, exceeding the ${MANIFEST_MAX_BYTES}-byte cap; the publish succeeded).`, + ); + return; + } const source = await readFile(manifestPath, "utf8"); // Defensive re-parse: confirm `publisher` is still absent. If @@ -227,22 +240,40 @@ export async function writePublisherBack(input: { // Append a `// ` line comment to the inserted publisher // line, if we have a handle. The comment is for human readers of // `git diff`; the CLI itself never parses it back out. We locate - // the inserted line by matching on the DID string (opaque enough - // to be unique within a single-publisher manifest) and append - // the comment before the line terminator. - // - // The substitution runs ONCE: if the DID happens to appear - // elsewhere (it shouldn't for a fresh insertion, but defensively - // anyway), only the first match is annotated. + // the inserted line by re-parsing the updated source and looking + // up the `publisher` property node's exact source offset, so a + // DID string that happens to appear elsewhere in the document + // (e.g. in `description`) can't deflect the comment to the + // wrong line. const updated = sessionHandle - ? annotatePublisherLine(applied, sessionDid, sessionHandle) + ? annotatePublisherLine(applied, sessionHandle) : applied; // Atomic write: tmpfile + rename. POSIX rename is atomic, so a // crash mid-write leaves the previous file intact rather than // truncating the publisher's manifest. + // + // TOCTOU narrowing: re-read the file IMMEDIATELY before rename + // and compare to the bytes we processed. If anything changed + // (editor save, concurrent publish, manual edit), abort the + // write-back. This doesn't eliminate the race — between the + // final read and the rename a writer could still land — but + // it shrinks the window to milliseconds. The publish has + // already succeeded; losing a convenience pin is preferable + // to overwriting a user's edit. + const expectedHash = sha256(source); const tmp = join(dirname(manifestPath), `.${randomUUID()}.tmp`); await writeFile(tmp, updated, "utf8"); + const currentSource = await readFile(manifestPath, "utf8").catch(() => null); + const currentHash = currentSource === null ? null : sha256(currentSource); + if (currentHash !== expectedHash) { + // File changed under us. Clean up the tmpfile and bail. + await unlinkIgnoreMissing(tmp); + onWarn?.( + `Skipped writing publisher to ${manifestPath} (file changed during publish; no edits made). The publish succeeded; you can pin manually on your next edit.`, + ); + return; + } await rename(tmp, manifestPath); onInfo?.(`Pinned publisher to ${sessionDid} in ${manifestPath}.`); } catch (error) { @@ -254,18 +285,52 @@ export async function writePublisherBack(input: { } } +/** Compute a stable hash of the source bytes, used for TOCTOU narrowing. */ +function sha256(source: string): string { + return createHash("sha256").update(source, "utf8").digest("hex"); +} + /** - * Append `// ` to the line containing the freshly-inserted DID. + * Remove a file path, treating ENOENT as success. Used to clean up the + * tmpfile when the write-back is aborted post-write but pre-rename. + */ +async function unlinkIgnoreMissing(path: string): Promise { + const { unlink } = await import("node:fs/promises"); + try { + await unlink(path); + } catch (error) { + if ( + error instanceof Error && + "code" in error && + (error as { code: unknown }).code === "ENOENT" + ) { + return; + } + // Anything else is also non-fatal here (the tmpfile is best- + // effort cleanup); we swallow it so the outer warn message + // stays the dominant signal. + } +} + +/** + * Append `// ` to the line containing the inserted `publisher` + * property's value. + * + * The implementation re-parses the updated source, locates the + * `publisher` property's value node by name (NOT by string-matching the + * DID), and uses that node's source offset as the anchor. Two reasons + * for the re-parse rather than a raw string search: * - * The match is anchored to the line containing `""` (the DID is - * always quoted in JSON) so a substring collision in a different value - * is impossible. The line may end in `",\n"` (interior key) or `"\n"` - * (trailing key); we insert the comment BEFORE the line terminator so - * the comma stays adjacent to the value. + * 1. A DID string can legitimately appear elsewhere in the manifest + * (e.g. as the value of `description` or a custom comment-like + * field). String search would attach the comment to the first + * occurrence, not the inserted property. + * 2. The parse tree gives us byte-exact offsets, so the line-end + * lookup can't degrade silently when `indexOf` returns -1. * - * If the DID isn't found, returns the input unchanged — the publish-back - * already succeeded; an unannotated line is a degraded outcome but not - * a failure. + * If the `publisher` property can't be located (unexpected — we just + * inserted it), returns the input unchanged. Annotation is a nice-to- + * have; the publish has already succeeded. * * No sanitisation of the handle is needed: `session.handle` is * populated by atproto's identity resolver at login time, which only @@ -274,13 +339,32 @@ export async function writePublisherBack(input: { * `.well-known` to the session DID. An attacker who can put arbitrary * bytes into `session.handle` already controls the user's identity. */ -function annotatePublisherLine(source: string, did: Did, handle: string): string { +function annotatePublisherLine(source: string, handle: string): string { if (handle.length === 0) return source; - // Match the DID inside its quotes on one line. The DID was emitted - // by `JSON.stringify` via `jsonc-parser`, so it's safely escaped. - const needle = `"${did}"`; - const lineEnd = source.indexOf("\n", source.indexOf(needle)); - if (lineEnd < 0) return source; + + const tree = parseTree(source); + if (!tree || tree.type !== "object") return source; + const publisherProp = tree.children?.find( + (prop) => + prop.type === "property" && + prop.children?.[0]?.type === "string" && + prop.children[0].value === "publisher", + ); + const valueNode = publisherProp?.children?.[1]; + if (!valueNode) return source; + + // Find the end of the line containing the value's last byte. The + // value's offset+length lands right after the closing `"` of the + // DID string; `indexOf("\n", endOfValue)` walks forward to the + // newline that terminates the property's line. The intervening + // bytes can be `,`, whitespace, or already-existing comment text. + const endOfValue = valueNode.offset + valueNode.length; + const lineEnd = source.indexOf("\n", endOfValue); + if (lineEnd < 0) { + // `publisher` is on the last line of the file with no trailing + // newline. Append the comment to the end-of-file content. + return `${source} // ${handle}`; + } // Walk back past any trailing CR so the comment lands at the end // of the *content*, not after a literal "\r" on Windows-authored // files. diff --git a/packages/registry-cli/src/manifest/translate.ts b/packages/registry-cli/src/manifest/translate.ts index a58e3fad8..3d5625f84 100644 --- a/packages/registry-cli/src/manifest/translate.ts +++ b/packages/registry-cli/src/manifest/translate.ts @@ -1,12 +1,6 @@ /** * Translate a validated manifest into the existing publish-input shape. * - * For v1 (issue #1028), publish consumes only the six `ProfileBootstrap` - * fields the original flag-based UX exposed. The manifest carries those - * plus a small handful of additional fields (`name`, `description`, - * `keywords`, `repo`) that aren't wired through publish yet — those land - * in issues #1029-#1033. - * * The single-author / single-security-contact convenience forms are * normalised here: by the time this returns, the caller sees only the * array shapes the lexicon uses. @@ -20,10 +14,6 @@ import type { Manifest, ManifestAuthor, ManifestSecurityContact } from "./schema * collapsed" view of a manifest. The CLI passes this to the publish * pipeline rather than the raw `Manifest` so the rest of the code * never has to think about `author` vs `authors`. - * - * Fields not yet consumed by publish (name, description, keywords, repo) - * are passed through unchanged so issues #1029-#1033 can wire them up - * without revisiting this translation step. */ export interface NormalisedManifest { license: string; @@ -67,18 +57,10 @@ export function normaliseManifest(manifest: Manifest): NormalisedManifest { } /** - * Convert a normalised manifest into the legacy `ProfileBootstrap` shape - * that `publishRelease` already understands. This is a TEMPORARY bridge - * for v1 — it discards everything but the six fields publish reads today. - * - * Once issue #1029 lands and `publishRelease` accepts the richer profile - * shape directly, this function goes away and we pass the - * `NormalisedManifest` straight through. - * - * Behaviour for multi-author plugins: the first author wins. The publish - * lexicon supports an array, but `ProfileBootstrap` doesn't model that - * today. Issues #1029 fixes this; until then, multi-author manifests - * publish their first author and we emit a warning at the CLI layer. + * Convert a normalised manifest into the `ProfileBootstrap` shape that + * `publishRelease` consumes. For multi-author manifests, the first + * author wins (the publish lexicon supports an array, but + * `ProfileBootstrap` doesn't model that yet). */ export function manifestToProfileBootstrap(manifest: NormalisedManifest): ProfileBootstrap { const author = manifest.authors[0]; @@ -92,35 +74,3 @@ export function manifestToProfileBootstrap(manifest: NormalisedManifest): Profil if (security?.url !== undefined) profile.securityUrl = security.url; return profile; } - -/** - * True if the manifest carries fields that v1 publish can't yet consume. - * The CLI uses this to warn the user that those fields are accepted (so - * `validate` passes) but won't appear in the published record until the - * relevant follow-up issue lands. - * - * Centralised here so each follow-up issue has one place to remove its - * field from the warning list when it wires through. - */ -export function findUnwiredManifestFields( - manifest: NormalisedManifest, -): Array<{ field: string; issue: string }> { - const unwired: Array<{ field: string; issue: string }> = []; - if (manifest.name !== undefined) unwired.push({ field: "name", issue: "#1029" }); - if (manifest.description !== undefined) { - unwired.push({ field: "description", issue: "#1029" }); - } - if (manifest.keywords !== undefined) { - unwired.push({ field: "keywords", issue: "#1029" }); - } - if (manifest.repo !== undefined) unwired.push({ field: "repo", issue: "#1029" }); - // Multi-author warning is separate: publish accepts the field but only - // publishes the first entry until #1029 lands. - if (manifest.authors.length > 1) { - unwired.push({ field: "authors[1..]", issue: "#1029" }); - } - if (manifest.securityContacts.length > 1) { - unwired.push({ field: "securityContacts[1..]", issue: "#1029" }); - } - return unwired; -} diff --git a/packages/registry-cli/tests/manifest-publisher.test.ts b/packages/registry-cli/tests/manifest-publisher.test.ts index cad10d4ed..13d7e0a97 100644 --- a/packages/registry-cli/tests/manifest-publisher.test.ts +++ b/packages/registry-cli/tests/manifest-publisher.test.ts @@ -193,6 +193,39 @@ describe("writePublisherBack", () => { expect(manifest.publisher).toBe(SESSION_DID); }); + it("anchors the comment to the publisher property even when the DID appears in another field", async () => { + // Regression: a previous implementation searched for the DID + // string anywhere in the document, which would attach the + // `// ` comment to whichever field happened to contain + // the DID-shaped substring first. Real plugins that document a + // "previous publisher was did:plc:..." in the description (e.g. + // after a maintainer transfer) would have triggered this. + const path = join(dir, "emdash-plugin.jsonc"); + const source = `{ + "license": "MIT", + "description": "Originally published as ${SESSION_DID}. See changelog.", + "author": { "name": "Jane Doe" }, + "security": { "email": "security@example.com" } +} +`; + await writeFile(path, source, "utf8"); + await writePublisherBack({ + manifestPath: path, + sessionDid: SESSION_DID, + sessionHandle: "jane.bsky.social", + }); + const updated = await readFile(path, "utf8"); + // The handle comment lives on the publisher line, NOT on the + // description line — confirm by inspecting each. + const lines = updated.split("\n"); + const descriptionLine = lines.find((l) => l.includes('"description"'))!; + const publisherLine = lines.find((l) => l.includes('"publisher"'))!; + expect(descriptionLine).toBeDefined(); + expect(publisherLine).toBeDefined(); + expect(descriptionLine).not.toMatch(/\/\/ jane\.bsky\.social/); + expect(publisherLine).toMatch(/\/\/ jane\.bsky\.social/); + }); + it("omits the comment when no handle is provided", async () => { const path = join(dir, "emdash-plugin.jsonc"); const source = `{ diff --git a/packages/registry-cli/tests/manifest-translate.test.ts b/packages/registry-cli/tests/manifest-translate.test.ts new file mode 100644 index 000000000..7f8818f76 --- /dev/null +++ b/packages/registry-cli/tests/manifest-translate.test.ts @@ -0,0 +1,78 @@ +/** + * Coverage for the manifest -> publish translation layer. + */ + +import { describe, expect, it } from "vitest"; + +import { + manifestToProfileBootstrap, + normaliseManifest, + type NormalisedManifest, +} from "../src/manifest/translate.js"; + +describe("normaliseManifest", () => { + it("collapses single-author into authors[]", () => { + const normalised = normaliseManifest({ + license: "MIT", + author: { name: "Jane" }, + security: { email: "s@example.com" }, + }); + expect(normalised.authors).toEqual([{ name: "Jane" }]); + // Single security contact normalised to array. + expect(normalised.securityContacts).toEqual([{ email: "s@example.com" }]); + }); + + it("passes the multi-author array through unchanged", () => { + const normalised = normaliseManifest({ + license: "MIT", + authors: [{ name: "A" }, { name: "B" }], + securityContacts: [{ email: "s@example.com" }], + }); + expect(normalised.authors).toEqual([{ name: "A" }, { name: "B" }]); + }); + + it("propagates publisher when set", () => { + const normalised = normaliseManifest({ + license: "MIT", + publisher: "did:plc:abc", + author: { name: "Jane" }, + security: { email: "s@example.com" }, + }); + expect(normalised.publisher).toBe("did:plc:abc"); + }); +}); + +describe("manifestToProfileBootstrap", () => { + it("maps the publish-relevant subset of fields", () => { + const normalised: NormalisedManifest = { + license: "MIT", + publisher: "did:plc:abc", + authors: [{ name: "Jane", url: "https://example.com" }], + securityContacts: [{ email: "s@example.com" }], + name: "Test", + description: "desc", + keywords: ["k"], + repo: "https://github.com/example/p", + }; + const bootstrap = manifestToProfileBootstrap(normalised); + expect(bootstrap.license).toBe("MIT"); + expect(bootstrap.authorName).toBe("Jane"); + expect(bootstrap.authorUrl).toBe("https://example.com"); + expect(bootstrap.securityEmail).toBe("s@example.com"); + }); + + it("uses the first author when multiple are provided", () => { + const normalised: NormalisedManifest = { + license: "MIT", + publisher: undefined, + authors: [{ name: "First" }, { name: "Second" }], + securityContacts: [{ email: "s@example.com" }], + name: undefined, + description: undefined, + keywords: undefined, + repo: undefined, + }; + const bootstrap = manifestToProfileBootstrap(normalised); + expect(bootstrap.authorName).toBe("First"); + }); +}); From 91db9873f6e16f633d97afe6d32d3eba7a7ab6d7 Mon Sep 17 00:00:00 2001 From: Matt Kane Date: Thu, 14 May 2026 11:53:51 +0100 Subject: [PATCH 3/6] fix(registry-cli): bounded manifest read + tmpfile cleanup on verify failure Adversarial review round 3 findings: - Replace stat+readFile with a bounded read primitive in both the loader and writePublisherBack. open() + a pre-allocated MANIFEST_MAX_BYTES+1 buffer means the file can't grow between syscalls to OOM the CLI. Returns a discriminated union so the type system enforces handling oversize/missing/ok cases. - writePublisherBack's drift-check read now routes unexpected fs errors (EISDIR after file replacement, EACCES, etc.) through the same tmpfile-cleanup path as legitimate drift. Previously an unexpected throw would leak the tmpfile next to the manifest. - Fix stale doc comments in load.ts (failure-modes list missing MANIFEST_TOO_LARGE and duplicate-key) and publish.ts (no longer warns about unwired fields). - Replace lazy `await import("node:fs/promises")` with a top-of-file import; the dynamic form was carry-over from an earlier iteration. - New test asserts MANIFEST_TOO_LARGE fires on an oversized file. --- packages/registry-cli/src/commands/publish.ts | 13 +-- packages/registry-cli/src/manifest/load.ts | 87 ++++++++++------ .../registry-cli/src/manifest/publisher.ts | 99 ++++++++++++++++--- .../registry-cli/tests/manifest-load.test.ts | 18 ++++ 4 files changed, 168 insertions(+), 49 deletions(-) diff --git a/packages/registry-cli/src/commands/publish.ts b/packages/registry-cli/src/commands/publish.ts index 1785d8ad4..0a95ec66c 100644 --- a/packages/registry-cli/src/commands/publish.ts +++ b/packages/registry-cli/src/commands/publish.ts @@ -19,7 +19,7 @@ */ import { lookup as dnsLookup } from "node:dns/promises"; -import { readFile } from "node:fs/promises"; +import { readFile, stat } from "node:fs/promises"; import { resolve } from "node:path"; import type { PluginManifest } from "@emdash-cms/plugin-types"; @@ -426,13 +426,9 @@ interface ManifestLoadOutcome { * Resolve the manifest layer for `runPublish`. Returns `null` when no * manifest was loaded (either suppressed by --no-manifest or the * default-path file is missing). Throws a CliError when the user - * explicitly named a manifest path that couldn't be loaded. - * - * Surfaces a warning for any field the manifest carries that publish - * doesn't yet read end-to-end (issues #1029-#1033). Those fields are - * not silently dropped at publish — they're accepted in the manifest - * today so plugin authors can start writing them; the rest of the - * pipeline catches up issue-by-issue. + * explicitly named a manifest path that couldn't be loaded, and warns + * when `--no-manifest` is used while a manifest exists at the default + * path so the publisher-pin safety story stays visible. */ async function loadManifestBootstrap( args: PublishArgs, @@ -448,7 +444,6 @@ async function loadManifestBootstrap( // the path cheap: no parse, no schema validation. const defaultPath = `./${MANIFEST_FILENAME}`; try { - const { stat } = await import("node:fs/promises"); await stat(defaultPath); log.warn( `Skipping manifest at ${defaultPath} (--no-manifest is set). Publisher pin and license/security defaults are NOT being applied for this publish.`, diff --git a/packages/registry-cli/src/manifest/load.ts b/packages/registry-cli/src/manifest/load.ts index 3ba57494a..324a98ab8 100644 --- a/packages/registry-cli/src/manifest/load.ts +++ b/packages/registry-cli/src/manifest/load.ts @@ -1,13 +1,15 @@ /** * Read and validate an `emdash-plugin.jsonc` manifest from disk. * - * Three failure modes, each with a distinct error code for scriptable - * consumers (`validate --json`, programmatic API users): + * Failure modes, each with a distinct error code for scriptable consumers + * (`validate --json`, programmatic API users): * * - `MANIFEST_NOT_FOUND` — file doesn't exist at the resolved path. + * - `MANIFEST_TOO_LARGE` — file exceeds `MANIFEST_MAX_BYTES`. Reading + * stops at the cap; the file is never fully buffered. * - `MANIFEST_PARSE_ERROR` — JSONC parse failure (trailing comma, missing - * bracket, control char in string). Includes line + column from - * `jsonc-parser`'s offset. + * bracket, control char in string, duplicate keys). Includes line + + * column from `jsonc-parser`'s offset. * - `MANIFEST_VALIDATION_ERROR` — JSONC parsed cleanly but the value * failed the Zod schema. Includes the field path and the offending * value's location in the source where possible. @@ -17,7 +19,7 @@ * pointer they'd get from `tsc` or `eslint`, not a Zod issue tree. */ -import { readFile, stat } from "node:fs/promises"; +import { open } from "node:fs/promises"; import { resolve } from "node:path"; import { parseTree, type Node, type ParseError, printParseErrorCode } from "jsonc-parser"; @@ -104,13 +106,35 @@ export async function loadManifest(path: string): Promise { ? resolved : resolve(resolved, MANIFEST_FILENAME); - // Stat first so a wildly-oversized file fails before we buffer it. - // `readFile` itself doesn't honour any cap, so we'd otherwise read - // the whole thing into memory before any size check could fire. - let size: number; + // Bounded read: open the file and read at most MANIFEST_MAX_BYTES+1 + // bytes. The extra byte is a sentinel — if we get it, the file is + // definitely over the cap regardless of what `stat` would say. + // This closes the stat-then-readFile race where a concurrent writer + // could grow the file between size check and buffer. + const source = await readBoundedUtf8(filePath); + return parseAndValidate(source, filePath); +} + +/** + * Read a UTF-8 file with a hard cap of `MANIFEST_MAX_BYTES` bytes. + * Throws `ManifestError(MANIFEST_TOO_LARGE)` if the file exceeds the cap, + * `ManifestError(MANIFEST_NOT_FOUND)` for ENOENT. + * + * We allocate a buffer of `MANIFEST_MAX_BYTES + 1` and read into it; if + * the read fills the whole buffer, the file is at least one byte over + * the limit and we reject. This avoids the TOCTOU window of a separate + * `stat` call: a concurrent writer can grow the file between syscalls, + * but it can never make our buffer larger than what we allocated up + * front. + * + * `read` returns a single chunk synchronously from kernel buffers when + * available; for files of our cap size (1 MiB) this is one syscall on + * Linux/macOS. We loop in case the kernel returns a short read. + */ +async function readBoundedUtf8(filePath: string): Promise { + let handle: Awaited>; try { - const info = await stat(filePath); - size = info.size; + handle = await open(filePath, "r"); } catch (error) { if (isNodeNotFoundError(error)) { throw new ManifestError( @@ -121,30 +145,35 @@ export async function loadManifest(path: string): Promise { } throw error; } - if (size > MANIFEST_MAX_BYTES) { - throw new ManifestError( - "MANIFEST_TOO_LARGE", - `Manifest at ${filePath} is ${size} bytes; the maximum supported size is ${MANIFEST_MAX_BYTES} bytes. Check that you pointed --manifest at the right file.`, - filePath, - ); - } - - let source: string; try { - source = await readFile(filePath, "utf8"); - } catch (error) { - if (isNodeNotFoundError(error)) { - // Race: file was removed between stat and readFile. + // One extra byte so we can detect oversize without reading + // arbitrarily much. + const buffer = Buffer.allocUnsafe(MANIFEST_MAX_BYTES + 1); + let totalRead = 0; + while (totalRead < buffer.length) { + const { bytesRead } = await handle.read( + buffer, + totalRead, + buffer.length - totalRead, + totalRead, + ); + if (bytesRead === 0) break; + totalRead += bytesRead; + } + if (totalRead > MANIFEST_MAX_BYTES) { throw new ManifestError( - "MANIFEST_NOT_FOUND", - `No manifest at ${filePath}. Create one with: emdash-registry init`, + "MANIFEST_TOO_LARGE", + `Manifest at ${filePath} is larger than the ${MANIFEST_MAX_BYTES}-byte cap. Check that you pointed --manifest at the right file.`, filePath, ); } - throw error; + return buffer.subarray(0, totalRead).toString("utf8"); + } finally { + await handle.close().catch(() => { + // Closing a handle should never fail in practice; if it + // does, swallow it — the read result is already in hand. + }); } - - return parseAndValidate(source, filePath); } /** diff --git a/packages/registry-cli/src/manifest/publisher.ts b/packages/registry-cli/src/manifest/publisher.ts index 3b37581f8..b41c67416 100644 --- a/packages/registry-cli/src/manifest/publisher.ts +++ b/packages/registry-cli/src/manifest/publisher.ts @@ -28,7 +28,7 @@ */ import { createHash, randomUUID } from "node:crypto"; -import { readFile, rename, stat, writeFile } from "node:fs/promises"; +import { open, rename, writeFile } from "node:fs/promises"; import { dirname, join } from "node:path"; import { isDid, isHandle, type Did, type Handle } from "@atcute/lexicons/syntax"; @@ -163,18 +163,26 @@ export async function writePublisherBack(input: { }): Promise { const { manifestPath, sessionDid, sessionHandle, onInfo, onWarn } = input; try { - // Stat first: if the file ballooned between load and write-back - // (e.g. user pasted in a large changelog mid-publish), don't - // buffer the new contents. The post-publish path should never - // touch a file the loader wouldn't have accepted. - const info = await stat(manifestPath); - if (info.size > MANIFEST_MAX_BYTES) { + // Bounded read with the same cap the loader uses. If the file + // ballooned between load and write-back (e.g. the user pasted + // in a huge changelog while we were publishing), abort the + // write-back rather than buffering the new contents — the + // post-publish path should never touch a file the loader + // wouldn't have accepted. + const initial = await readManifestBounded(manifestPath); + if (initial.kind === "oversize") { onWarn?.( - `Skipped writing publisher to ${manifestPath} (file is now ${info.size} bytes, exceeding the ${MANIFEST_MAX_BYTES}-byte cap; the publish succeeded).`, + `Skipped writing publisher to ${manifestPath} (file is now larger than the ${MANIFEST_MAX_BYTES}-byte cap; the publish succeeded).`, ); return; } - const source = await readFile(manifestPath, "utf8"); + if (initial.kind === "missing") { + onWarn?.( + `Skipped writing publisher to ${manifestPath} (file was removed during publish; the publish succeeded).`, + ); + return; + } + const source = initial.source; // Defensive re-parse: confirm `publisher` is still absent. If // the user added one while we were publishing, leave their value @@ -264,8 +272,24 @@ export async function writePublisherBack(input: { const expectedHash = sha256(source); const tmp = join(dirname(manifestPath), `.${randomUUID()}.tmp`); await writeFile(tmp, updated, "utf8"); - const currentSource = await readFile(manifestPath, "utf8").catch(() => null); - const currentHash = currentSource === null ? null : sha256(currentSource); + // Re-read with the same bounded primitive so a file that grew + // past the cap during publish doesn't OOM the verification step. + // Oversize and missing both indicate the file is no longer the + // bytes we hashed; treat them as drift and bail. + // + // Wrap in try/catch so genuinely-unexpected fs failures here + // (EISDIR if the file was replaced with a directory, EACCES, + // etc.) ALSO route through the drift-cleanup path. Otherwise + // the tmpfile we just wrote would leak into the manifest's + // directory because the outer catch handles the failure but + // doesn't know there's a tmpfile to clean up. + let currentHash: string | null; + try { + const current = await readManifestBounded(manifestPath); + currentHash = current.kind === "ok" ? sha256(current.source) : null; + } catch { + currentHash = null; + } if (currentHash !== expectedHash) { // File changed under us. Clean up the tmpfile and bail. await unlinkIgnoreMissing(tmp); @@ -285,6 +309,59 @@ export async function writePublisherBack(input: { } } +/** + * Discriminated result of a bounded manifest read. + */ +type BoundedReadResult = + | { kind: "ok"; source: string } + | { kind: "oversize" } + | { kind: "missing" }; + +/** + * Read a manifest with a hard size cap. Returns a discriminated result: + * - `ok` carrying the UTF-8 contents, + * - `oversize` when the file exceeds `MANIFEST_MAX_BYTES`, + * - `missing` for ENOENT. + * + * Bounded variant of the loader's read; same TOCTOU-free pattern (one + * pre-allocated buffer, never grows). We use a discriminated union + * rather than sentinel strings so the type system catches a caller + * that forgets to handle the failure cases. + */ +async function readManifestBounded(filePath: string): Promise { + let handle: Awaited>; + try { + handle = await open(filePath, "r"); + } catch (error) { + if ( + error instanceof Error && + "code" in error && + (error as { code: unknown }).code === "ENOENT" + ) { + return { kind: "missing" }; + } + throw error; + } + try { + const buffer = Buffer.allocUnsafe(MANIFEST_MAX_BYTES + 1); + let totalRead = 0; + while (totalRead < buffer.length) { + const { bytesRead } = await handle.read( + buffer, + totalRead, + buffer.length - totalRead, + totalRead, + ); + if (bytesRead === 0) break; + totalRead += bytesRead; + } + if (totalRead > MANIFEST_MAX_BYTES) return { kind: "oversize" }; + return { kind: "ok", source: buffer.subarray(0, totalRead).toString("utf8") }; + } finally { + await handle.close().catch(() => {}); + } +} + /** Compute a stable hash of the source bytes, used for TOCTOU narrowing. */ function sha256(source: string): string { return createHash("sha256").update(source, "utf8").digest("hex"); diff --git a/packages/registry-cli/tests/manifest-load.test.ts b/packages/registry-cli/tests/manifest-load.test.ts index 1ae54a89b..b76e30775 100644 --- a/packages/registry-cli/tests/manifest-load.test.ts +++ b/packages/registry-cli/tests/manifest-load.test.ts @@ -181,4 +181,22 @@ describe("loadManifest (filesystem)", () => { expect((error as ManifestError).code).toBe("MANIFEST_NOT_FOUND"); } }); + + it("returns MANIFEST_TOO_LARGE when the file exceeds the cap", async () => { + // Build a file just over the 1 MiB cap. Filler is JSONC-friendly + // (a long string value) so the bytes can't be misread as a + // syntactic short-circuit. + const { MANIFEST_MAX_BYTES } = await import("../src/manifest/load.js"); + const filler = "x".repeat(MANIFEST_MAX_BYTES); + const oversize = `{ "license": "${filler}", "author": {"name":"J"}, "security": {"email":"a@b.com"} }`; + const filePath = join(dir, "oversize.jsonc"); + await writeFile(filePath, oversize, "utf8"); + try { + await loadManifest(filePath); + expect.fail("expected ManifestError"); + } catch (error) { + expect(error).toBeInstanceOf(ManifestError); + expect((error as ManifestError).code).toBe("MANIFEST_TOO_LARGE"); + } + }); }); From f565fa3ca0b85fbd7f2e6df500b4b215b5c6ecc0 Mon Sep 17 00:00:00 2001 From: "emdashbot[bot]" Date: Thu, 14 May 2026 10:55:55 +0000 Subject: [PATCH 4/6] style: format --- .../schemas/emdash-plugin.schema.json | 23 ++++--------------- packages/registry-cli/src/manifest/load.ts | 10 ++------ .../registry-cli/src/manifest/publisher.ts | 7 ++---- 3 files changed, 9 insertions(+), 31 deletions(-) diff --git a/packages/registry-cli/schemas/emdash-plugin.schema.json b/packages/registry-cli/schemas/emdash-plugin.schema.json index 2615dcbe0..5de992e21 100644 --- a/packages/registry-cli/schemas/emdash-plugin.schema.json +++ b/packages/registry-cli/schemas/emdash-plugin.schema.json @@ -39,9 +39,7 @@ "$ref": "#/$defs/__schema18" } }, - "required": [ - "license" - ], + "required": ["license"], "additionalProperties": false, "$defs": { "__schema0": { @@ -54,20 +52,13 @@ "maxLength": 256, "title": "License", "description": "SPDX license expression (e.g. \"MIT\", \"Apache-2.0\", \"MIT OR Apache-2.0\"). Required on first publish; ignored on subsequent publishes (the existing profile wins).", - "examples": [ - "MIT", - "Apache-2.0", - "MIT OR Apache-2.0" - ] + "examples": ["MIT", "Apache-2.0", "MIT OR Apache-2.0"] }, "__schema2": { "type": "string", "title": "Publisher", "description": "Atproto DID or handle of the publishing identity. Pinned on first publish to prevent accidental publishes from a different account. DIDs are recommended (durable); handles work but are mutable.", - "examples": [ - "did:plc:abc123def456", - "example.com" - ] + "examples": ["did:plc:abc123def456", "example.com"] }, "__schema3": { "$ref": "#/$defs/__schema4" @@ -85,9 +76,7 @@ "$ref": "#/$defs/__schema7" } }, - "required": [ - "name" - ], + "required": ["name"], "additionalProperties": false, "title": "Author", "description": "A single author entry. Mirrors the lexicon's author shape." @@ -196,9 +185,7 @@ "pattern": "^https:\\/\\/", "title": "Source repository", "description": "HTTPS URL of the plugin's source repository. Surfaced in registry listings.", - "examples": [ - "https://github.com/emdash-cms/plugin-gallery" - ] + "examples": ["https://github.com/emdash-cms/plugin-gallery"] } } } diff --git a/packages/registry-cli/src/manifest/load.ts b/packages/registry-cli/src/manifest/load.ts index 324a98ab8..fd0e5a2f8 100644 --- a/packages/registry-cli/src/manifest/load.ts +++ b/packages/registry-cli/src/manifest/load.ts @@ -216,11 +216,7 @@ function parseAndValidate(source: string, filePath: string): LoadManifestResult if (!root) { // Shouldn't be reachable when `allowEmptyContent: false` is set and // `parseErrors` is empty, but `parseTree`'s return type is nullable. - throw new ManifestError( - "MANIFEST_PARSE_ERROR", - `${filePath}: file is empty`, - filePath, - ); + throw new ManifestError("MANIFEST_PARSE_ERROR", `${filePath}: file is empty`, filePath); } // Reject duplicate keys before validation. `nodeToValue` is last-wins @@ -349,9 +345,7 @@ function findNodeAtPath(root: Node, path: ReadonlyArray): Node * We scan the entire tree, not just the root: duplicate keys inside * `author: { ... }` or `security: { ... }` are equally review-hostile. */ -function findDuplicateKey( - node: Node, -): { key: string; offset: number } | undefined { +function findDuplicateKey(node: Node): { key: string; offset: number } | undefined { if (node.type === "object" && node.children) { const seen = new Set(); for (const prop of node.children) { diff --git a/packages/registry-cli/src/manifest/publisher.ts b/packages/registry-cli/src/manifest/publisher.ts index b41c67416..a2baec546 100644 --- a/packages/registry-cli/src/manifest/publisher.ts +++ b/packages/registry-cli/src/manifest/publisher.ts @@ -34,9 +34,8 @@ import { dirname, join } from "node:path"; import { isDid, isHandle, type Did, type Handle } from "@atcute/lexicons/syntax"; import { applyEdits, modify, parseTree, printParseErrorCode, type ParseError } from "jsonc-parser"; -import { MANIFEST_MAX_BYTES } from "./load.js"; - import { createActorResolver } from "../oauth.js"; +import { MANIFEST_MAX_BYTES } from "./load.js"; /** * Result of comparing a manifest's pinned publisher against the active @@ -253,9 +252,7 @@ export async function writePublisherBack(input: { // DID string that happens to appear elsewhere in the document // (e.g. in `description`) can't deflect the comment to the // wrong line. - const updated = sessionHandle - ? annotatePublisherLine(applied, sessionHandle) - : applied; + const updated = sessionHandle ? annotatePublisherLine(applied, sessionHandle) : applied; // Atomic write: tmpfile + rename. POSIX rename is atomic, so a // crash mid-write leaves the previous file intact rather than From c9b2e8009ff53055ee5c705437f40c26bbd3798b Mon Sep 17 00:00:00 2001 From: Matt Kane Date: Thu, 14 May 2026 12:52:06 +0100 Subject: [PATCH 5/6] fix(registry-cli): address copilot review feedback - Restore the zod catalog-pin rationale comment that got dropped when pnpm add rewrote pnpm-workspace.yaml. The comment is still relevant after the 4.4.1 bump. - Drop "templates" from the package files array. The directory lands with the init command in a follow-up PR; including a non-existent path causes pnpm pack warnings. - Document in translate.ts that name/description/keywords/repo are intentionally deferred to a follow-up issue, not silently lost. --- packages/registry-cli/package.json | 3 +-- packages/registry-cli/src/manifest/translate.ts | 6 ++++++ pnpm-workspace.yaml | 5 +++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/packages/registry-cli/package.json b/packages/registry-cli/package.json index ef5c7c49b..dcebb4a16 100644 --- a/packages/registry-cli/package.json +++ b/packages/registry-cli/package.json @@ -15,8 +15,7 @@ }, "files": [ "dist", - "schemas", - "templates" + "schemas" ], "scripts": { "build": "node --run gen-schema && tsdown", diff --git a/packages/registry-cli/src/manifest/translate.ts b/packages/registry-cli/src/manifest/translate.ts index 3d5625f84..0ecb10ad5 100644 --- a/packages/registry-cli/src/manifest/translate.ts +++ b/packages/registry-cli/src/manifest/translate.ts @@ -61,6 +61,12 @@ export function normaliseManifest(manifest: Manifest): NormalisedManifest { * `publishRelease` consumes. For multi-author manifests, the first * author wins (the publish lexicon supports an array, but * `ProfileBootstrap` doesn't model that yet). + * + * `name`, `description`, `keywords`, and `repo` are accepted by the + * manifest schema but not wired through here. They land in publish in a + * follow-up issue alongside the broader profile-fields work. The fields + * are not silently lost — the manifest is the source of truth and we'll + * read them again when the publish API accepts them. */ export function manifestToProfileBootstrap(manifest: NormalisedManifest): ProfileBootstrap { const author = manifest.authors[0]; diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index a997cef15..3a3b2c488 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -94,4 +94,9 @@ catalog: vite: ^8.0.11 vitest: ^4.1.5 wrangler: ^4.83.0 + # Catalog-pin Zod so the whole workspace dedupes on a single instance. + # Zod 4 embeds the version in the type, so even ^4.3.6 vs ^4.4.1 produce + # structurally incompatible ZodType across packages that mix astro/zod + # and emdash's zod (e.g. trusted plugins like @emdash-cms/plugin-forms + # importing 'z' from astro/zod and passing schemas to definePlugin). zod: ^4.4.1 From a763150b116fefbe3e76f6a704e2064c634124c1 Mon Sep 17 00:00:00 2001 From: Matt Kane Date: Thu, 14 May 2026 14:24:32 +0100 Subject: [PATCH 6/6] fix(registry-cli): address ask-bonk review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Five findings from the second-opinion review on PR #1040: CRITICAL: schema-drift CI flap - pnpm format collapsed short arrays in the generated JSON Schema (`required: ["license"]`, `examples: [...]`, etc.) into single-line form, but `JSON.stringify(_, null, "\t")` from gen-schema.ts always emits multi-line. The drift test failed on every PR after format ran, driving a regen-format-fail loop. Add the schemas dir to oxfmt's ignorePatterns — generated artifacts shouldn't be reformatted. LOW: typo'd field names lose source location - `findNodeAtPath` returned undefined when a path traversed into a missing key, so an unrecognized-key issue (the most common error) reported with no line:col. Now falls back to the parent node, and for `unrecognized_keys` issues specifically resolves the first offending key inside the parent object so the pointer lands on the actual typo's line+column instead of the parent's opening brace. LOW: publisher mismatch only fires after tarball fetch - Move the publisher-mismatch check ahead of fetchTarball so a wrong-account publish fails in ms rather than after a 384 KB download + decompress + manifest extract. The check is offline; it only needs the session DID and the manifest's publisher value, both available before any network access. MEDIUM: writePublisherBack reformats non-tab manifests - Hard-coding `insertSpaces: false, tabSize: 1` made jsonc-parser's modify() rewrite a 2-space-indented manifest with tabs on first publish. Sniff the source's indentation and match it. LOW: README advertises fields publish silently discards - Per maintainer direction (drop in-development warnings), this stays as a JSDoc note in translate.ts. README documents the target shape, not the current implementation. Already addressed in the previous commit. --- .oxfmtrc.json | 3 +- .../schemas/emdash-plugin.schema.json | 23 ++++-- packages/registry-cli/src/commands/publish.ts | 68 +++++++++--------- packages/registry-cli/src/manifest/load.ts | 71 ++++++++++++++++--- .../registry-cli/src/manifest/publisher.ts | 46 ++++++++++-- .../registry-cli/tests/manifest-load.test.ts | 9 +++ .../tests/manifest-publisher.test.ts | 28 ++++++++ 7 files changed, 193 insertions(+), 55 deletions(-) diff --git a/.oxfmtrc.json b/.oxfmtrc.json index 0c384ead9..b45920654 100644 --- a/.oxfmtrc.json +++ b/.oxfmtrc.json @@ -7,6 +7,7 @@ "**/*.mdx", "**/package.json", "**/emdash-env.d.ts", - "packages/registry-lexicons/src/generated/**" + "packages/registry-lexicons/src/generated/**", + "packages/registry-cli/schemas/**" ] } diff --git a/packages/registry-cli/schemas/emdash-plugin.schema.json b/packages/registry-cli/schemas/emdash-plugin.schema.json index 5de992e21..2615dcbe0 100644 --- a/packages/registry-cli/schemas/emdash-plugin.schema.json +++ b/packages/registry-cli/schemas/emdash-plugin.schema.json @@ -39,7 +39,9 @@ "$ref": "#/$defs/__schema18" } }, - "required": ["license"], + "required": [ + "license" + ], "additionalProperties": false, "$defs": { "__schema0": { @@ -52,13 +54,20 @@ "maxLength": 256, "title": "License", "description": "SPDX license expression (e.g. \"MIT\", \"Apache-2.0\", \"MIT OR Apache-2.0\"). Required on first publish; ignored on subsequent publishes (the existing profile wins).", - "examples": ["MIT", "Apache-2.0", "MIT OR Apache-2.0"] + "examples": [ + "MIT", + "Apache-2.0", + "MIT OR Apache-2.0" + ] }, "__schema2": { "type": "string", "title": "Publisher", "description": "Atproto DID or handle of the publishing identity. Pinned on first publish to prevent accidental publishes from a different account. DIDs are recommended (durable); handles work but are mutable.", - "examples": ["did:plc:abc123def456", "example.com"] + "examples": [ + "did:plc:abc123def456", + "example.com" + ] }, "__schema3": { "$ref": "#/$defs/__schema4" @@ -76,7 +85,9 @@ "$ref": "#/$defs/__schema7" } }, - "required": ["name"], + "required": [ + "name" + ], "additionalProperties": false, "title": "Author", "description": "A single author entry. Mirrors the lexicon's author shape." @@ -185,7 +196,9 @@ "pattern": "^https:\\/\\/", "title": "Source repository", "description": "HTTPS URL of the plugin's source repository. Surfaced in registry listings.", - "examples": ["https://github.com/emdash-cms/plugin-gallery"] + "examples": [ + "https://github.com/emdash-cms/plugin-gallery" + ] } } } diff --git a/packages/registry-cli/src/commands/publish.ts b/packages/registry-cli/src/commands/publish.ts index 0a95ec66c..0e88eead5 100644 --- a/packages/registry-cli/src/commands/publish.ts +++ b/packages/registry-cli/src/commands/publish.ts @@ -177,35 +177,11 @@ async function runPublish(args: PublishArgs): Promise { const manifestLoad = await loadManifestBootstrap(args, consola); const manifestBase = manifestLoad?.bootstrap ?? null; - // Fetch + checksum the tarball, then extract the manifest BEFORE we - // print any reassuring "tarball looks fine" lines. A 200 from a CDN - // can serve an HTML 404 page; we want the failure to land before the - // user sees apparent success. - consola.start(`Fetching ${args.url}...`); - const tarballBytes = await fetchTarball(args.url); - const checksum = sha256Multihash(tarballBytes); - const manifest = await extractManifestFromTarball(tarballBytes); - - consola.info(`Tarball: ${formatBytes(tarballBytes.length)}`); - consola.info(`Multihash: ${pc.dim(checksum)}`); - consola.info(`Manifest: ${pc.bold(manifest.id)}@${manifest.version}`); - - // Optional local cross-check. - if (args.local) { - const localPath = resolve(args.local); - const localBytes = await readFile(localPath); - const localChecksum = sha256Multihash(localBytes); - if (localChecksum !== checksum) { - throw new CliError( - `Local file ${localPath} does not match the bytes served at ${args.url}. Local multihash: ${localChecksum}; remote multihash: ${checksum}. Re-upload the correct tarball, or drop --local to publish whatever's at the URL.`, - 1, - "LOCAL_CHECKSUM_MISMATCH", - ); - } - consola.success(`Local file at ${pc.dim(localPath)} matches the URL`); - } - - // Resume the active publisher session. + // Resume the active publisher session BEFORE any network access. + // The publisher-mismatch check below depends only on the session DID + // and the manifest's pinned publisher; running both up front means + // a wrong-account publish fails in milliseconds rather than after a + // full tarball fetch + decompress + manifest extract. const credentials = new FileCredentialStore(); const session = await credentials.current(); if (!session) { @@ -218,9 +194,9 @@ async function runPublish(args: PublishArgs): Promise { consola.info(`Publishing as ${pc.bold(session.handle ?? session.did)} (${pc.dim(session.did)})`); // Verify the manifest's pinned publisher matches the active session - // before we open any network connections to the PDS. The check runs - // here (rather than at manifest-load time) because it depends on - // session.did, which we only know now. + // before fetching the tarball. The check is offline (DID compare is + // verbatim; handle resolution only runs if the manifest pins a handle) + // so we can fail fast on the wrong-account case. if (manifestLoad?.manifest.publisher !== undefined) { try { const check = await checkPublisher({ @@ -243,6 +219,34 @@ async function runPublish(args: PublishArgs): Promise { } } + // Fetch + checksum the tarball, then extract the manifest BEFORE we + // print any reassuring "tarball looks fine" lines. A 200 from a CDN + // can serve an HTML 404 page; we want the failure to land before the + // user sees apparent success. + consola.start(`Fetching ${args.url}...`); + const tarballBytes = await fetchTarball(args.url); + const checksum = sha256Multihash(tarballBytes); + const manifest = await extractManifestFromTarball(tarballBytes); + + consola.info(`Tarball: ${formatBytes(tarballBytes.length)}`); + consola.info(`Multihash: ${pc.dim(checksum)}`); + consola.info(`Manifest: ${pc.bold(manifest.id)}@${manifest.version}`); + + // Optional local cross-check. + if (args.local) { + const localPath = resolve(args.local); + const localBytes = await readFile(localPath); + const localChecksum = sha256Multihash(localBytes); + if (localChecksum !== checksum) { + throw new CliError( + `Local file ${localPath} does not match the bytes served at ${args.url}. Local multihash: ${localChecksum}; remote multihash: ${checksum}. Re-upload the correct tarball, or drop --local to publish whatever's at the URL.`, + 1, + "LOCAL_CHECKSUM_MISMATCH", + ); + } + consola.success(`Local file at ${pc.dim(localPath)} matches the URL`); + } + const oauthSession = await resumeSession(session.did); const publisher = PublishingClient.fromHandler({ handler: oauthSession, diff --git a/packages/registry-cli/src/manifest/load.ts b/packages/registry-cli/src/manifest/load.ts index fd0e5a2f8..fe3ad6541 100644 --- a/packages/registry-cli/src/manifest/load.ts +++ b/packages/registry-cli/src/manifest/load.ts @@ -267,12 +267,40 @@ function parseAndValidate(source: string, filePath: string): LoadManifestResult * Symbols cannot appear in a JSON-parsed value's path (JSON has no symbol * keys), so we narrow defensively and treat any stray symbol as an * opaque "" string in the displayed path. + * + * Special-cases `unrecognized_keys` (typo'd field names): Zod reports the + * issue at the parent path with the offending key(s) in `issue.keys`. + * Without special handling, the line:col points at the parent object's + * opening brace, not the actual typo. We resolve the first listed key + * inside the parent and use ITS source offset, so a `"licens": "MIT"` + * mistake gets the pointer landing on the bad key's line and column. */ function zodIssueToManifestIssue(issue: ZodIssue, source: string, root: Node): ManifestIssue { const path = narrowZodPath(issue.path); const pathStr = formatZodPath(path); - const node = findNodeAtPath(root, path); - const offset = node?.offset; + + let offset: number | undefined; + if (issue.code === "unrecognized_keys") { + const keys = (issue as ZodIssue & { keys?: readonly string[] }).keys; + const firstKey = keys?.[0]; + if (firstKey !== undefined) { + const parent = findNodeAtPath(root, path); + if (parent?.type === "object" && parent.children) { + const prop: Node | undefined = parent.children.find( + (c) => + c.type === "property" && + c.children?.[0]?.type === "string" && + c.children[0].value === firstKey, + ); + const keyNode = prop?.children?.[0]; + if (keyNode) offset = keyNode.offset; + } + } + } + if (offset === undefined) { + offset = findNodeAtPath(root, path)?.offset; + } + const location = offset !== undefined ? offsetToLineCol(source, offset) : undefined; return location ? { path: pathStr, message: issue.message, location } @@ -308,30 +336,51 @@ function formatZodPath(path: ReadonlyArray): string { } /** - * Walk the JSONC syntax tree to find the node at a given path. Returns - * `undefined` if the path traverses through a missing key (which is - * common for "required" issues: the field doesn't exist in the source, - * so we point at the parent object instead). + * Walk the JSONC syntax tree to find the node at a given path. When the + * path traverses into a missing key or a wrong-shape value, returns the + * deepest ancestor that DID exist — so the resulting line:col still + * points at something useful (the parent object, where the missing + * property "should have been"). This matters most for two error + * classes: + * + * - Missing required key: Zod's path is `[key]`, the value doesn't + * exist; returning the root object's offset puts the pointer at the + * opening brace, which an editor highlights as "issue with this + * object". + * - Unknown key (typo): Zod's path is `[wrongKey]`, the value doesn't + * exist in the parent. Same parent-fallback gives the pointer the + * line of the parent object. + * + * Both cases used to return undefined and lose the line:col entirely. */ function findNodeAtPath(root: Node, path: ReadonlyArray): Node | undefined { let current: Node | undefined = root; + let lastResolved: Node | undefined = root; for (const segment of path) { - if (!current) return undefined; + if (!current) return lastResolved; if (typeof segment === "number") { if (current.type !== "array" || !current.children) return current; - current = current.children[segment]; + const next: Node | undefined = current.children[segment]; + if (!next) return current; + current = next; } else { if (current.type !== "object" || !current.children) return current; - const prop = current.children.find( + const prop: Node | undefined = current.children.find( (c) => c.type === "property" && c.children?.[0]?.type === "string" && c.children[0].value === segment, ); // `property` node's children are [keyNode, valueNode]. We want - // the value for further traversal. - current = prop?.children?.[1]; + // the value for further traversal. If the property is missing + // entirely (e.g. typo'd key, missing required field), fall + // back to the current object so the caller gets a source + // location for the containing structure. + const next: Node | undefined = prop?.children?.[1]; + if (!next) return current; + current = next; } + lastResolved = current; } return current; } diff --git a/packages/registry-cli/src/manifest/publisher.ts b/packages/registry-cli/src/manifest/publisher.ts index a2baec546..06de6b181 100644 --- a/packages/registry-cli/src/manifest/publisher.ts +++ b/packages/registry-cli/src/manifest/publisher.ts @@ -223,13 +223,17 @@ export async function writePublisherBack(input: { // them against the source. This is the JSONC-aware path that // preserves comments and existing whitespace. // - // `formattingOptions.insertSpaces: false` matches the repo's - // tab-indented JSONC convention. The `getInsertionIndex` callback - // places `publisher` immediately after `license` (or at the end - // of the object if `license` isn't present, which shouldn't - // happen for a schema-valid manifest but is handled defensively). + // Indentation is sniffed from the user's existing source rather + // than hard-coded. Without this, a 2-space-indented manifest + // gets silently rewritten to tabs on first publish — a + // surprising behaviour for a write-back that's supposed to be + // a small, targeted edit. `getInsertionIndex` places `publisher` + // immediately after `license` (or at the end of the object if + // `license` isn't present, which shouldn't happen for a + // schema-valid manifest but is handled defensively). + const indent = detectIndent(source); const edits = modify(source, ["publisher"], sessionDid, { - formattingOptions: { insertSpaces: false, tabSize: 1 }, + formattingOptions: { insertSpaces: !indent.useTabs, tabSize: indent.size }, getInsertionIndex: (existingProps) => { const licenseIdx = existingProps.indexOf("license"); if (licenseIdx >= 0) return licenseIdx + 1; @@ -359,6 +363,36 @@ async function readManifestBounded(filePath: string): Promise } } +/** + * Sniff the indentation style used by the source so the write-back can + * match it. Looks at the first indented line and reports whether the + * leading whitespace is tabs or spaces, and the run length. + * + * Falls back to tabs-with-tabSize-1 when: + * - no indented line is found (single-line manifest), or + * - the file is unreadable in a way we can't infer from. + * + * The tab-fallback matches the conventions of the repo's own JSONC files + * (wrangler.jsonc, tsconfig.json, this very package's templates). + */ +function detectIndent(source: string): { useTabs: boolean; size: number } { + const lines = source.split("\n"); + for (const line of lines) { + if (line.length === 0) continue; + const first = line[0]; + if (first === "\t") return { useTabs: true, size: 1 }; + if (first === " ") { + let count = 0; + while (count < line.length && line[count] === " ") count++; + // Indent runs of 1 are weird; round up to 2 as the most + // common non-tab indent. Anything 2-8 we use verbatim. + return { useTabs: false, size: Math.max(2, Math.min(count, 8)) }; + } + // Non-whitespace first char → this line isn't indented; keep looking. + } + return { useTabs: true, size: 1 }; +} + /** Compute a stable hash of the source bytes, used for TOCTOU narrowing. */ function sha256(source: string): string { return createHash("sha256").update(source, "utf8").digest("hex"); diff --git a/packages/registry-cli/tests/manifest-load.test.ts b/packages/registry-cli/tests/manifest-load.test.ts index b76e30775..480f5cb3e 100644 --- a/packages/registry-cli/tests/manifest-load.test.ts +++ b/packages/registry-cli/tests/manifest-load.test.ts @@ -133,6 +133,15 @@ describe("parseAndValidateManifest (in-memory)", () => { } catch (error) { const err = error as ManifestError; expect(err.code).toBe("MANIFEST_VALIDATION_ERROR"); + // The line:col must point at the typo'd key, not at the + // parent object's opening brace. Typos are the most + // common error class; landing on the right line matters. + // Regression: previously this returned undefined because + // `findNodeAtPath` couldn't resolve a key that didn't + // exist in the schema. + const typoIssue = err.issues.find((i) => i.message.includes('"licens"')); + expect(typoIssue).toBeDefined(); + expect(typoIssue?.location?.line).toBe(3); } }); }); diff --git a/packages/registry-cli/tests/manifest-publisher.test.ts b/packages/registry-cli/tests/manifest-publisher.test.ts index 13d7e0a97..83d70e5fa 100644 --- a/packages/registry-cli/tests/manifest-publisher.test.ts +++ b/packages/registry-cli/tests/manifest-publisher.test.ts @@ -243,6 +243,34 @@ describe("writePublisherBack", () => { expect(publisherLine).not.toContain("//"); }); + it("preserves the source's indentation (2-space)", async () => { + // Regression: an earlier version hard-coded tab indentation in + // the modify() formattingOptions, which silently reformatted + // any 2-space-indented manifest to tabs on first publish. The + // detector sniffs the source's existing indent and matches it. + const path = join(dir, "emdash-plugin.jsonc"); + const source = [ + "{", + ' "slug": "my-plugin",', + ' "version": "0.1.0",', + ' "license": "MIT",', + ' "author": { "name": "Jane Doe" },', + ' "security": { "email": "security@example.com" }', + "}", + "", + ].join("\n"); + await writeFile(path, source, "utf8"); + await writePublisherBack({ manifestPath: path, sessionDid: SESSION_DID }); + const updated = await readFile(path, "utf8"); + // Pre-existing 2-space lines should still be 2-space, no tab + // characters should have appeared anywhere. + expect(updated).not.toContain("\t"); + // The new publisher line should also use 2 spaces. + const publisherLine = updated.split("\n").find((l) => l.includes('"publisher"'))!; + expect(publisherLine.startsWith(" ")).toBe(true); + expect(publisherLine.startsWith("\t")).toBe(false); + }); + it("does not overwrite an existing publisher (defensive re-parse)", async () => { const path = join(dir, "emdash-plugin.jsonc"); const source = `{