diff --git a/.changeset/site-default-og-image.md b/.changeset/site-default-og-image.md new file mode 100644 index 000000000..4f9e11fcd --- /dev/null +++ b/.changeset/site-default-og-image.md @@ -0,0 +1,14 @@ +--- +"emdash": minor +"@emdash-cms/admin": minor +--- + +Adds support for a site-wide default Open Graph image. The setting is exposed in the admin SEO settings page (Settings -> SEO -> Default Social Image), resolved to a URL on read by `getSiteSettings()`, and automatically emitted as `og:image` / `twitter:image` (and BlogPosting JSON-LD `image`) by `EmDashHead.astro` whenever a page has no image of its own. Per-page images still take precedence. + +This wires up an existing data model that was previously defined in the schema and MCP tools but never used: stored values were not resolved and no template path read the setting. + +Emitted URLs are absolutized using `SiteSettings.url`, the page's `siteUrl`, or the request origin so crawlers and JSON-LD consumers that reject relative URLs work correctly. + +Also adds a `localOnly` prop to `MediaPickerModal` that suppresses the "Insert from URL" input and external provider tabs. Used by SEO settings to ensure the picker only returns locally-stored media (since the setting only persists a local `mediaId`). + +Media metadata updates and deletes now invalidate the worker-scoped site-settings cache, so resolved logo/favicon/default-social-image URLs and dimensions stay in sync with the underlying media row. diff --git a/packages/admin/src/components/MediaPickerModal.tsx b/packages/admin/src/components/MediaPickerModal.tsx index 176b734c9..df302b967 100644 --- a/packages/admin/src/components/MediaPickerModal.tsx +++ b/packages/admin/src/components/MediaPickerModal.tsx @@ -79,6 +79,18 @@ export interface MediaPickerModalProps { mimeTypeFilters?: string[]; /** `_emdash_fields` row id for server-side MIME widening. */ fieldId?: string; + /** + * Restrict the picker to the local Library only — hides the "Insert from URL" + * input and suppresses external provider tabs. + * + * Use this for fields whose storage model only persists a local `mediaId`. + * Selecting an external URL or provider item would return an item the + * server cannot later resolve back to a URL (the `id` is either empty + * for "Insert from URL" or a provider-namespaced string that won't match + * a row in the `media` table). Site settings (logo, favicon, + * `seo.defaultOgImage`) are the canonical callers. + */ + localOnly?: boolean; } /** @@ -110,6 +122,7 @@ export function MediaPickerModal({ title: providedTitle, hideUrlInput = false, mediaKind = "image", + localOnly = false, }: MediaPickerModalProps) { const { t } = useLingui(); const isFileKind = mediaKind === "file"; @@ -144,7 +157,11 @@ export function MediaPickerModal({ Record >({}); - // Reset state when modal opens + // Reset state when modal opens, or when `localOnly` flips on while it's + // already open. Without the `localOnly` dependency a parent that toggles + // the prop mid-session could leave `activeProvider` on a non-local tab + // (the tab UI is suppressed, but the selection state and provider-media + // query would still target the external provider). React.useEffect(() => { if (open) { setSelectedItem(null); @@ -155,13 +172,16 @@ export function MediaPickerModal({ setUploadError(null); setProviderDimensions({}); } - }, [open]); + }, [open, localOnly]); - // Fetch available providers + // Fetch available providers — skipped when `localOnly` is set since the + // list isn't used (provider tabs are suppressed and the active provider + // stays "local"). Avoids a request to /providers on every modal open + // when we'll just throw the result away. const { data: providers } = useQuery({ queryKey: ["media-providers"], queryFn: fetchMediaProviders, - enabled: open, + enabled: open && !localOnly, // Default to just local if fetch fails placeholderData: [], }); @@ -190,7 +210,10 @@ export function MediaPickerModal({ enabled: open && activeProvider === "local", }); - // Fetch provider media list + // Fetch provider media list. Belt-and-suspenders: the reset effect + // forces `activeProvider` back to "local" when `localOnly` is true, but + // also gate this query directly so a stale render can't fire an + // external request between state updates. const { data: providerData, isLoading: providerLoading } = useQuery({ queryKey: ["provider-media", activeProvider, filters?.join(",") ?? "", searchQuery], queryFn: () => @@ -199,7 +222,7 @@ export function MediaPickerModal({ limit: 50, query: searchQuery || undefined, }), - enabled: open && activeProvider !== "local", + enabled: open && !localOnly && activeProvider !== "local", }); const isLoading = activeProvider === "local" ? localLoading : providerLoading; @@ -397,12 +420,14 @@ export function MediaPickerModal({ const canSearch = activeProviderInfo?.capabilities.search ?? false; // Build provider tabs - always show local first, then add external providers - // Filter out "local" from API response since we add it manually + // Filter out "local" from API response since we add it manually. + // When `localOnly` is set, suppress external providers entirely so the + // picker can only return locally-stored media (see prop docs). const providerTabs = React.useMemo(() => { const tabs: Array<{ id: string; name: string; icon?: string }> = [ { id: "local", name: "Library", icon: undefined }, ]; - if (providers) { + if (providers && !localOnly) { for (const p of providers) { if (p.id !== "local") { tabs.push({ id: p.id, name: p.name, icon: p.icon }); @@ -410,7 +435,7 @@ export function MediaPickerModal({ } } return tabs; - }, [providers]); + }, [providers, localOnly]); return ( @@ -437,7 +462,7 @@ export function MediaPickerModal({ {/* URL Input (image pickers only — probes image dimensions) */} - {!hideUrlInput && ( + {!hideUrlInput && !localOnly && ( <>
diff --git a/packages/admin/src/components/settings/GeneralSettings.tsx b/packages/admin/src/components/settings/GeneralSettings.tsx index d1ecb42c1..601c9e034 100644 --- a/packages/admin/src/components/settings/GeneralSettings.tsx +++ b/packages/admin/src/components/settings/GeneralSettings.tsx @@ -172,16 +172,31 @@ export function GeneralSettings() { description={t`The public URL of your site (used for canonical links and sitemaps)`} /> - {/* Logo Picker */} + {/* Logo Picker -- + "configured" gates on `mediaId`, not `url`, so an orphaned + reference (media row deleted, or a stale provider id stored + pre-localOnly fix) still renders Remove. Otherwise the user + would see "Select Logo" and silently re-save the dangling + `mediaId` on any unrelated change. */}
- {formData.logo?.url ? ( + {formData.logo?.mediaId ? (
- {formData.logo.alt + {formData.logo.url ? ( + {formData.logo.alt + ) : ( +
+
+ )}
diff --git a/packages/admin/src/components/settings/SeoSettings.tsx b/packages/admin/src/components/settings/SeoSettings.tsx index e899b2db9..ce4887df4 100644 --- a/packages/admin/src/components/settings/SeoSettings.tsx +++ b/packages/admin/src/components/settings/SeoSettings.tsx @@ -4,14 +4,22 @@ * Title separator, search engine verification codes, and robots.txt. */ -import { Button, Input, InputArea } from "@cloudflare/kumo"; +import { Button, Input, InputArea, Label } from "@cloudflare/kumo"; import { useLingui } from "@lingui/react/macro"; -import { FloppyDisk, CheckCircle, WarningCircle, MagnifyingGlass } from "@phosphor-icons/react"; +import { + FloppyDisk, + CheckCircle, + WarningCircle, + MagnifyingGlass, + Upload, + X, +} from "@phosphor-icons/react"; import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query"; import * as React from "react"; -import { fetchSettings, updateSettings, type SiteSettings } from "../../lib/api"; +import { fetchSettings, updateSettings, type SiteSettings, type MediaItem } from "../../lib/api"; import { EditorHeader } from "../EditorHeader"; +import { MediaPickerModal } from "../MediaPickerModal"; import { BackToSettingsLink } from "./BackToSettingsLink.js"; export function SeoSettings() { @@ -29,6 +37,7 @@ export function SeoSettings() { type: "success" | "error"; message: string; } | null>(null); + const [ogImagePickerOpen, setOgImagePickerOpen] = React.useState(false); React.useEffect(() => { if (settings) setFormData(settings); @@ -70,6 +79,24 @@ export function SeoSettings() { })); }; + const handleDefaultOgImageSelect = (media: MediaItem) => { + setFormData((prev) => ({ + ...prev, + seo: { + ...prev.seo, + defaultOgImage: { mediaId: media.id, alt: media.alt || "", url: media.url }, + }, + })); + setOgImagePickerOpen(false); + }; + + const handleDefaultOgImageRemove = () => { + setFormData((prev) => ({ + ...prev, + seo: { ...prev.seo, defaultOgImage: undefined }, + })); + }; + if (isLoading) { return (
@@ -134,6 +161,68 @@ export function SeoSettings() { onChange={(e) => handleSeoChange("titleSeparator", e.target.value)} description={t`Character between page title and site name (e.g., "My Post | My Site")`} /> + + {/* Default OG Image Picker -- + "configured" is determined by presence of `mediaId`, not `url`. + When the referenced media row is deleted, the resolver returns the + bare ref without a URL; we still need to show Remove so the user can + clear the dangling reference. */} +
+ +

+ {t`Used as the fallback Open Graph image when a page has none. Recommended size: 1200×630.`} +

+ {formData.seo?.defaultOgImage?.mediaId ? ( +
+ {formData.seo.defaultOgImage.url ? ( + {formData.seo.defaultOgImage.alt + ) : ( +
+
+ )} +
+ + +
+
+ ) : ( + + )} +
+
+ + {/* Media Picker Modal -- + localOnly: storage shape is `{ mediaId }`, so URL/provider selections would + yield references the server cannot resolve. See MediaPickerModalProps.localOnly. + mimeTypeFilters: social-card scrapers expect rasterised content; SVG also gets + served as `Content-Disposition: attachment` by the media file route, making it + unusable as an OG image. */} +
); } diff --git a/packages/admin/tests/components/MediaPickerModal.test.tsx b/packages/admin/tests/components/MediaPickerModal.test.tsx index 67b9ef033..e3e3710e2 100644 --- a/packages/admin/tests/components/MediaPickerModal.test.tsx +++ b/packages/admin/tests/components/MediaPickerModal.test.tsx @@ -241,6 +241,64 @@ describe("MediaPickerModal", () => { const urlInput = document.querySelector('input[aria-label="Image URL"]'); expect(urlInput).toBeNull(); }); + + it("localOnly hides the URL input section", async () => { + // `localOnly` is for fields whose storage model only persists a local + // mediaId (e.g. site `logo`, `favicon`, `seo.defaultOgImage`). Selecting + // an external URL would return an item the server cannot resolve later. + const screen = await renderModal({ localOnly: true }); + + await expect.element(screen.getByText("Select Image")).toBeInTheDocument(); + expect(document.body.textContent).not.toContain("Insert from URL"); + + const urlInput = document.querySelector('input[aria-label="Image URL"]'); + expect(urlInput).toBeNull(); + }); + + it("renders external provider tabs by default (control for localOnly)", async () => { + // Establishes that providers DO appear without `localOnly`. Without + // this control assertion, the suppression test below could pass + // purely because the providers query hadn't resolved yet. + const api = await import("../../src/lib/api"); + (api.fetchMediaProviders as any).mockResolvedValueOnce([ + { + id: "cloudflare-images", + name: "Cloudflare Images", + capabilities: { upload: true, search: false }, + }, + ]); + + const screen = await renderModal(); + await expect.element(screen.getByText("Cloudflare Images")).toBeInTheDocument(); + }); + + it("localOnly suppresses external provider tabs and skips the providers fetch", async () => { + const api = await import("../../src/lib/api"); + (api.fetchMediaProviders as any).mockResolvedValueOnce([ + { + id: "cloudflare-images", + name: "Cloudflare Images", + capabilities: { upload: true, search: false }, + }, + { + id: "unsplash", + name: "Unsplash", + capabilities: { upload: false, search: true }, + }, + ]); + + const screen = await renderModal({ localOnly: true }); + + await expect.element(screen.getByText("Select Image")).toBeInTheDocument(); + // External providers must not be reachable through any tab when + // localOnly is set, even if the API would report them. + expect(document.body.textContent).not.toContain("Cloudflare Images"); + expect(document.body.textContent).not.toContain("Unsplash"); + // `enabled: open && !localOnly` short-circuits the query, so the + // fetch should never have been issued. This proves the assertion + // above isn't just racing the resolve. + expect(api.fetchMediaProviders).not.toHaveBeenCalled(); + }); }); describe("mediaKind", () => { diff --git a/packages/core/src/api/schemas/settings.ts b/packages/core/src/api/schemas/settings.ts index f469ca445..b890fc987 100644 --- a/packages/core/src/api/schemas/settings.ts +++ b/packages/core/src/api/schemas/settings.ts @@ -4,9 +4,14 @@ import { httpUrl } from "./common.js"; // --------------------------------------------------------------------------- // Settings: Input schemas +// +// Media references on write are just `{ mediaId, alt? }` -- the resolved +// fields (`url`, `contentType`, `width`, `height`) are server-computed and +// stripped from any submitted body via Zod's default strip mode. See +// `packages/core/src/settings/types.ts` for the in-memory shape. // --------------------------------------------------------------------------- -const mediaReference = z.object({ +const mediaReferenceInput = z.object({ mediaId: z.string(), alt: z.string().optional(), }); @@ -20,9 +25,9 @@ const socialSettings = z.object({ youtube: z.string().optional(), }); -const seoSettings = z.object({ +const seoSettingsInput = z.object({ titleSeparator: z.string().max(10).optional(), - defaultOgImage: mediaReference.optional(), + defaultOgImage: mediaReferenceInput.optional(), robotsTxt: z.string().max(5000).optional(), googleVerification: z.string().max(100).optional(), bingVerification: z.string().max(100).optional(), @@ -32,32 +37,59 @@ export const settingsUpdateBody = z .object({ title: z.string().optional(), tagline: z.string().optional(), - logo: mediaReference.optional(), - favicon: mediaReference.optional(), + logo: mediaReferenceInput.optional(), + favicon: mediaReferenceInput.optional(), url: z.union([httpUrl, z.literal("")]).optional(), postsPerPage: z.number().int().min(1).max(100).optional(), dateFormat: z.string().optional(), timezone: z.string().optional(), social: socialSettings.optional(), - seo: seoSettings.optional(), + seo: seoSettingsInput.optional(), }) .meta({ id: "SettingsUpdateBody" }); // --------------------------------------------------------------------------- // Settings: Response schemas +// +// Responses carry the resolved fields populated by `resolveMediaReference` +// in `settings/index.ts`. Generated OpenAPI clients need to see them so +// they don't have to re-resolve the URL on the client. Fields stay +// optional because the resolver returns the bare ref if the underlying +// media row was deleted (orphaned reference). // --------------------------------------------------------------------------- +const mediaReferenceResponse = z.object({ + mediaId: z.string(), + alt: z.string().optional(), + /** Resolved media file URL; absent if the underlying row is missing. */ + url: z.string().optional(), + /** Stored MIME type (e.g. `image/svg+xml`). Populated alongside `url`. */ + contentType: z.string().optional(), + /** Pixel width if known. Populated alongside `url`. */ + width: z.number().int().optional(), + /** Pixel height if known. Populated alongside `url`. */ + height: z.number().int().optional(), +}); + +const seoSettingsResponse = z.object({ + titleSeparator: z.string().max(10).optional(), + defaultOgImage: mediaReferenceResponse.optional(), + robotsTxt: z.string().max(5000).optional(), + googleVerification: z.string().max(100).optional(), + bingVerification: z.string().max(100).optional(), +}); + export const siteSettingsSchema = z .object({ title: z.string().optional(), tagline: z.string().optional(), - logo: mediaReference.optional(), - favicon: mediaReference.optional(), + logo: mediaReferenceResponse.optional(), + favicon: mediaReferenceResponse.optional(), url: z.string().optional(), postsPerPage: z.number().int().optional(), dateFormat: z.string().optional(), timezone: z.string().optional(), social: socialSettings.optional(), - seo: seoSettings.optional(), + seo: seoSettingsResponse.optional(), }) .meta({ id: "SiteSettings" }); diff --git a/packages/core/src/astro/routes/api/media/[id].ts b/packages/core/src/astro/routes/api/media/[id].ts index caaaee1af..ae5adee37 100644 --- a/packages/core/src/astro/routes/api/media/[id].ts +++ b/packages/core/src/astro/routes/api/media/[id].ts @@ -135,7 +135,8 @@ export const DELETE: APIRoute = async ({ params, locals }) => { } } - // Delete from database + // Delete from database — site-settings cache invalidation happens + // in `EmDashRuntime.handleMediaDelete` so MCP/plugin paths inherit it. const result = await emdash.handleMediaDelete(id); return unwrapResult(result); diff --git a/packages/core/src/components/EmDashHead.astro b/packages/core/src/components/EmDashHead.astro index 79d964def..20628a965 100644 --- a/packages/core/src/components/EmDashHead.astro +++ b/packages/core/src/components/EmDashHead.astro @@ -3,8 +3,11 @@ * Renders base SEO metadata, plugin-contributed metadata, and trusted head fragments. * * Base SEO metadata (meta tags, OG, Twitter Card, canonical, JSON-LD) is generated - * from the page context's seo/articleMeta/siteName fields. Plugin contributions - * come after, so they can override base tags via first-wins dedup in resolvePageMetadata(). + * from the page context's seo/articleMeta/siteName fields. Contributions are + * composed in the order `[...plugin, ...site, ...base]` and resolved by + * `resolvePageMetadata()` with first-wins dedup. Plugins sit at the front of + * the array, so for any given key plugin contributions override site-level + * ones, which override base ones. * * Usage: * ```astro @@ -23,6 +26,7 @@ import { import { renderSiteIdentity } from "../page/site-identity.js"; import { getPageRuntime } from "../page/index.js"; import { getSiteSettings } from "../settings/index.js"; +import { absolutizeMediaUrl } from "../page/absolute-url.js"; interface Props { page: PublicPageContext; @@ -31,9 +35,6 @@ interface Props { const { page } = Astro.props; const runtime = getPageRuntime(Astro.locals as Record); -// Base SEO contributions from page context (always generated) -const baseContributions: PageMetadataContribution[] = generateBaseSeoContributions(page); - let metadataHtml = ""; let siteIdentityHtml = ""; let fragmentsHtml = ""; @@ -56,6 +57,25 @@ if (runtime) { runtime.collectPageFragments(page), ]); + // Site-level default OG image: applied per-page in base contributions + // rather than emitted unconditionally, so per-content images still win. + // `resolveMediaReference` populates `.url` on read; only the mediaId is + // stored, so an empty/orphaned reference safely yields undefined here. + // + // Absolutize so `og:image` / `twitter:image` / JSON-LD `image` carry a + // fully-qualified URL: many social-card scrapers (Slack, LinkedIn) refuse + // to follow relative paths even when the rest of the page provides + // canonical context. + const defaultOgImage = absolutizeMediaUrl( + siteSettings.seo?.defaultOgImage?.url, + siteSettings.url, + page, + ); + const baseContributions: PageMetadataContribution[] = generateBaseSeoContributions( + page, + defaultOgImage, + ); + const siteContributions = generateSiteSeoContributions(siteSettings.seo); const allContributions = [...pluginContributions, ...siteContributions, ...baseContributions]; const resolved = resolvePageMetadata(allContributions); @@ -64,6 +84,7 @@ if (runtime) { fragmentsHtml = renderFragments(fragments, "head"); } else { // No runtime (EmDash not initialized) — still render base SEO + const baseContributions: PageMetadataContribution[] = generateBaseSeoContributions(page); const resolved = resolvePageMetadata(baseContributions); metadataHtml = renderPageMetadata(resolved); } diff --git a/packages/core/src/emdash-runtime.ts b/packages/core/src/emdash-runtime.ts index 9edd2226c..b846eb85b 100644 --- a/packages/core/src/emdash-runtime.ts +++ b/packages/core/src/emdash-runtime.ts @@ -164,6 +164,7 @@ import { PluginStateRepository } from "./plugins/state.js"; import { requestCached } from "./request-cache.js"; import { getRequestContext } from "./request-context.js"; import { FTSManager } from "./search/fts-manager.js"; +import { invalidateSiteSettingsCache } from "./settings/index.js"; /** * Map schema field types to editor field kinds @@ -2055,11 +2056,29 @@ export class EmDashRuntime { id: string, input: { alt?: string; caption?: string; width?: number; height?: number }, ) { - return handleMediaUpdate(this.db, id, input); + const result = await handleMediaUpdate(this.db, id, input); + // Resolved media references in site settings (`logo`, `favicon`, + // `seo.defaultOgImage`) bake in the media row's `contentType`, + // `width`, and `height`. A metadata edit invalidates that snapshot + // for every entry point: REST routes, MCP tools, plugin code, and + // any future caller of `handleMediaUpdate`. Cross-isolate staleness + // remains bounded by isolate lifetime. + if (result.success) { + invalidateSiteSettingsCache(); + } + return result; } async handleMediaDelete(id: string) { - return handleMediaDelete(this.db, id); + const result = await handleMediaDelete(this.db, id); + // Same reasoning as `handleMediaUpdate`: if the deleted media row + // was referenced by a setting, the cached resolved URL now points + // at a 404. Invalidation is unconditional on success — cheaper than + // querying which settings reference the id. + if (result.success) { + invalidateSiteSettingsCache(); + } + return result; } // ========================================================================= diff --git a/packages/core/src/media/local-runtime.ts b/packages/core/src/media/local-runtime.ts index 13e534f43..d257f0be0 100644 --- a/packages/core/src/media/local-runtime.ts +++ b/packages/core/src/media/local-runtime.ts @@ -14,6 +14,7 @@ import type { Kysely } from "kysely"; import { MediaRepository } from "../database/repositories/media.js"; import type { Database } from "../database/types.js"; import type { Storage } from "../index.js"; +import { invalidateSiteSettingsCache } from "../settings/index.js"; import type { CreateMediaProviderFn, MediaProvider, @@ -120,6 +121,12 @@ export const createMediaProvider: CreateMediaProviderFn } await repo.delete(id); + + // If this row was referenced by `logo`, `favicon`, or + // `seo.defaultOgImage`, the worker-scoped settings cache now + // holds a stale URL. The provider routes (and any future caller) + // bypass `handleMediaDelete`, so we invalidate here too. + invalidateSiteSettingsCache(); }, getEmbed(value: MediaValue, _options?: EmbedOptions): EmbedResult { diff --git a/packages/core/src/page/absolute-url.ts b/packages/core/src/page/absolute-url.ts new file mode 100644 index 000000000..202f69c62 --- /dev/null +++ b/packages/core/src/page/absolute-url.ts @@ -0,0 +1,146 @@ +/** + * Helpers for resolving relative media URLs to absolute URLs for SEO output. + * + * Social-card scrapers (Facebook, LinkedIn, Slack, Twitter) and JSON-LD + * consumers expect absolute URLs in `og:image`, `twitter:image`, and + * structured-data `image` fields. EmDash's media file route returns a + * site-relative path (`/_emdash/api/media/file/...`), so anywhere the + * resolved URL feeds into crawler-facing markup we have to join it with + * the public site origin. + */ + +import type { PublicPageContext } from "../plugins/types.js"; + +const HTTP_URL_RE = /^https?:\/\//i; +/** + * Protocol-relative URLs (`//cdn.example.com/x.png`) are dropped outright. + * They have no legitimate use in `og:image` (scrapers want a full URL) and + * are a well-known SSRF vector when reflected through server-side + * fetchers. Anything starting with `//` returns `null`. + */ +const PROTOCOL_RELATIVE_RE = /^\/\//; +/** + * URL schemes we pass through unchanged because they are legitimately + * useful as OG image values. `data:image/*` is sometimes used for inline + * social cards (rare, but legal). Everything else with a scheme + * (`mailto:`, `tel:`, `file:`, `blob:`, custom protocols) would be garbage + * in an `og:image`; we return `null` so the caller can decide whether to + * fall back or drop the tag. + */ +const PASSTHROUGH_SCHEME_RE = /^data:image\//i; +/** + * Detects URLs that have a scheme other than http/https (and other than + * the data:image/ form we pass through). Used to short-circuit garbage + * input rather than treating it as a relative path. + */ +const OTHER_SCHEME_RE = /^[a-z][a-z0-9+.-]*:/i; +/** + * Any ASCII whitespace or C0/C1 control character anywhere in the URL is + * an injection signal — legitimate media URLs never contain them. Without + * this guard, an input like `" https://attacker/x"` would slip past the + * scheme regexes (which are anchored at offset 0) and get joined as a + * relative path with the site origin, producing + * `https://site.example/ https://attacker/x` — confusing but not + * exploitable, plus more pathological shapes like leading newlines that + * could inject across header boundaries downstream. + */ +// eslint-disable-next-line eslint(no-control-regex) -- intentional: rejecting control chars is the whole point of this regex +const WHITESPACE_OR_CONTROL_RE = /[\s\u0000-\u001f\u007f-\u009f]/; +const TRAILING_SLASH_RE = /\/$/; + +/** + * `URL.origin` returns the literal string `"null"` (not the `null` value) + * for opaque origins like `data:`, `blob:`, and `about:blank`. Treating + * that as a valid origin would produce `null/og.png` in the output. + */ +function isUsableOrigin(origin: string): boolean { + return origin !== "null" && origin !== ""; +} + +/** + * Resolve the public origin to use when absolutizing a media URL. + * + * Precedence: + * 1. The configured `SiteSettings.url` (admin-controlled, canonical). + * 2. `PublicPageContext.siteUrl` (set by themes that override the origin, + * e.g. when running behind a reverse proxy). + * 3. The origin parsed from `page.url`, which is the live request URL. + * + * Only `http:` and `https:` candidates count — anything else (e.g. `file:`, + * `data:`, `blob:`) would yield an unusable origin and is skipped. Returns + * `null` if no candidate parses to a usable HTTP(S) origin; callers should + * treat that as "leave the URL relative" rather than throw. + */ +export function resolveSiteOrigin( + configuredSiteUrl: string | undefined, + page: PublicPageContext, +): string | null { + const candidates = [configuredSiteUrl, page.siteUrl, page.url]; + for (const candidate of candidates) { + if (!candidate || typeof candidate !== "string") continue; + try { + const parsed = new URL(candidate); + if (parsed.protocol !== "http:" && parsed.protocol !== "https:") continue; + if (!isUsableOrigin(parsed.origin)) continue; + return parsed.origin; + } catch { + // Fall through to the next candidate. Configured URLs and page + // URLs can be malformed (e.g. an admin pasted "example.com" + // without a scheme); we don't want a bad config to break head + // rendering. + } + } + return null; +} + +/** + * Absolutize a media URL using the best available site origin. + * + * - Returns `null` for missing/empty input. + * - Passes through already-absolute `http(s):` URLs unchanged. + * - Passes through `data:image/*` URLs unchanged (rare but legal as OG + * image content). + * - Returns `null` for protocol-relative URLs (`//cdn.com/x`): no + * legitimate `og:image` use case, and a known SSRF vector when reflected + * through server-side fetchers. + * - Returns `null` for any other scheme (`mailto:`, `blob:`, `file:`, + * custom protocols): emitting those into `og:image` is worse than + * omitting the tag. + * - Returns the original (relative) URL when no origin can be resolved — + * preferable to dropping `og:image` outright because scrapers that follow + * relative URLs are better off than ones that get nothing. + * + * @param url - The (possibly relative) media URL, e.g. `/_emdash/api/media/file/abc.jpg`. + * @param configuredSiteUrl - `SiteSettings.url` value (admin-controlled). + * @param page - The page context providing `siteUrl` and `url` fallbacks. + */ +export function absolutizeMediaUrl( + url: string | undefined, + configuredSiteUrl: string | undefined, + page: PublicPageContext, +): string | null { + if (!url) return null; + + // Any whitespace or control character means this isn't a real media URL. + // Rejecting up front prevents scheme-regex evasion (` https://x` would + // otherwise fall through to the relative-path join below). + if (WHITESPACE_OR_CONTROL_RE.test(url)) return null; + + if (HTTP_URL_RE.test(url)) return url; + if (PASSTHROUGH_SCHEME_RE.test(url)) return url; + + // Reject protocol-relative URLs before any other handling. Order + // matters: `OTHER_SCHEME_RE` wouldn't match `//x` (no leading scheme), + // so a missing check here would fall through to the relative-path + // join below and produce `https://site.example//cdn.evil.com/x`. + if (PROTOCOL_RELATIVE_RE.test(url)) return null; + + // Any remaining `:` form is something we'd silently mangle by + // prepending an origin. Drop it. + if (OTHER_SCHEME_RE.test(url)) return null; + + const origin = resolveSiteOrigin(configuredSiteUrl, page); + if (!origin) return url; + const safePath = url.startsWith("/") ? url : `/${url}`; + return `${origin.replace(TRAILING_SLASH_RE, "")}${safePath}`; +} diff --git a/packages/core/src/page/jsonld.ts b/packages/core/src/page/jsonld.ts index 00300d3a8..2a2be7f77 100644 --- a/packages/core/src/page/jsonld.ts +++ b/packages/core/src/page/jsonld.ts @@ -29,13 +29,21 @@ export function cleanJsonLd(obj: Record): Record | null { +export function buildBlogPostingJsonLd( + page: PublicPageContext, + defaultOgImage?: string | null, +): Record | null { if (page.pageType !== "article" || !page.canonical) return null; const ogTitle = page.seo?.ogTitle ?? page.pageTitle ?? page.title; const description = page.seo?.ogDescription || page.description; - const ogImage = page.seo?.ogImage || page.image; + const ogImage = page.seo?.ogImage || page.image || defaultOgImage || null; const publishedTime = page.articleMeta?.publishedTime; const modifiedTime = page.articleMeta?.modifiedTime; const author = page.articleMeta?.author; diff --git a/packages/core/src/page/seo-contributions.ts b/packages/core/src/page/seo-contributions.ts index dc53bf5ce..403dcbf3f 100644 --- a/packages/core/src/page/seo-contributions.ts +++ b/packages/core/src/page/seo-contributions.ts @@ -1,9 +1,11 @@ /** * Generate base SEO metadata contributions from PublicPageContext. * - * These contributions are prepended BEFORE plugin contributions in - * resolvePageMetadata(), which uses first-wins dedup. This means - * plugins can override any base SEO tag by contributing the same key. + * EmDashHead.astro composes the final contribution list as + * `[...plugin, ...site, ...base]` and feeds it to `resolvePageMetadata()`, + * which is first-wins. That ordering means plugin contributions override + * site-level ones override base ones for any given key — base values are + * the fallback, not the source of truth. * * This replaces the per-template SEO.astro components, eliminating * the class of XSS bugs where templates hand-rolled JSON-LD serialization. @@ -15,15 +17,24 @@ import { buildBlogPostingJsonLd, buildWebSiteJsonLd } from "./jsonld.js"; /** * Generate base metadata contributions from a page context's SEO data. + * + * @param page - Page context produced by the runtime for the current request. + * @param defaultOgImage - Optional site-wide fallback OG image URL, used when + * the page has no own OG image (i.e., neither `seo.ogImage` nor `image`). + * Sourced from `SiteSettings.seo.defaultOgImage` by `EmDashHead`. + * * Returns an empty array if no SEO-relevant data is present. */ -export function generateBaseSeoContributions(page: PublicPageContext): PageMetadataContribution[] { +export function generateBaseSeoContributions( + page: PublicPageContext, + defaultOgImage?: string | null, +): PageMetadataContribution[] { const contributions: PageMetadataContribution[] = []; const description = page.description; const ogTitle = page.seo?.ogTitle ?? page.pageTitle ?? page.title; const ogDescription = page.seo?.ogDescription || description; - const ogImage = page.seo?.ogImage || page.image; + const ogImage = page.seo?.ogImage || page.image || defaultOgImage || null; const robots = page.seo?.robots; const canonical = page.canonical; const siteName = page.siteName; @@ -122,7 +133,7 @@ export function generateBaseSeoContributions(page: PublicPageContext): PageMetad // -- JSON-LD -- if (page.pageType === "article") { - const blogPosting = buildBlogPostingJsonLd(page); + const blogPosting = buildBlogPostingJsonLd(page, defaultOgImage ?? null); if (blogPosting) { contributions.push({ kind: "jsonld", id: "primary", graph: blogPosting }); } diff --git a/packages/core/src/plugins/context.ts b/packages/core/src/plugins/context.ts index 553c3e7f3..dbbb6d33a 100644 --- a/packages/core/src/plugins/context.ts +++ b/packages/core/src/plugins/context.ts @@ -21,6 +21,7 @@ import { SsrfError, stripCredentialHeaders, } from "../import/ssrf.js"; +import { invalidateSiteSettingsCache } from "../settings/index.js"; import type { Storage } from "../storage/types.js"; import { CronAccessImpl } from "./cron.js"; import type { EmailPipeline } from "./email.js"; @@ -502,7 +503,16 @@ export function createMediaAccessWithWrite( }, async delete(id: string): Promise { - return mediaRepo.delete(id); + const deleted = await mediaRepo.delete(id); + // Plugins can delete media that's referenced by site settings + // (`logo`, `favicon`, `seo.defaultOgImage`); the worker-scoped + // resolved-URL cache must be dropped or it will keep serving + // 404s. Matches the invalidation in + // `EmDashRuntime.handleMediaDelete`. + if (deleted) { + invalidateSiteSettingsCache(); + } + return deleted; }, }; } diff --git a/packages/core/src/query.ts b/packages/core/src/query.ts index 5a73c60ec..0060fc6cf 100644 --- a/packages/core/src/query.ts +++ b/packages/core/src/query.ts @@ -1,3 +1,4 @@ +/// /** * Query functions for EmDash content * @@ -10,6 +11,17 @@ * Preview mode is handled implicitly via ALS request context — * no parameters needed. The middleware verifies the preview token * and sets the context; query functions read it automatically. + * + * The triple-slash directive above pulls in the ambient declaration for + * `astro:content` (used by the dynamic imports below) so this source + * file typechecks even when reached transitively by a sibling package + * whose tsconfig doesn't list `astro/client` in `compilerOptions.types`. + * + * Note: the directive is stripped from the compiled output (`dist/*`) + * by tsdown, so it does not propagate to downstream consumers of the + * published package. Consumers are Astro sites and already provide their + * own `astro/client` ambient surface anyway, so the runtime dynamic + * import resolves there at typecheck time without our help. */ import { encodeCursor } from "./database/repositories/types.js"; diff --git a/packages/core/src/settings/index.ts b/packages/core/src/settings/index.ts index 27e7f2faa..4dfac181c 100644 --- a/packages/core/src/settings/index.ts +++ b/packages/core/src/settings/index.ts @@ -13,7 +13,7 @@ import type { Database } from "../database/types.js"; import { getDb } from "../loader.js"; import { peekRequestCache, requestCached } from "../request-cache.js"; import type { Storage } from "../storage/types.js"; -import type { SiteSettings, SiteSettingKey, MediaReference } from "./types.js"; +import type { SiteSettings, SiteSettingKey, MediaReference, SeoSettings } from "./types.js"; /** Prefix for site settings in the options table */ const SETTINGS_PREFIX = "site:"; @@ -178,6 +178,19 @@ export async function getSiteSettingWithDb( return resolved as SiteSettings[K] | undefined; } + if (key === "seo" && value && typeof value === "object") { + // eslint-disable-next-line typescript-eslint(no-unsafe-type-assertion) -- TS can't narrow generic K from key equality + const seo = value as SeoSettings; + if (seo.defaultOgImage) { + const resolved = { + ...seo, + defaultOgImage: await resolveMediaReference(seo.defaultOgImage, db, storage), + }; + // eslint-disable-next-line typescript-eslint(no-unsafe-type-assertion) -- TS can't narrow generic K from key equality + return resolved as SiteSettings[K] | undefined; + } + } + return value; } @@ -248,6 +261,12 @@ export async function getSiteSettingsWithDb( if (typedSettings.favicon) { typedSettings.favicon = await resolveMediaReference(typedSettings.favicon, db, storage); } + if (typedSettings.seo?.defaultOgImage) { + typedSettings.seo = { + ...typedSettings.seo, + defaultOgImage: await resolveMediaReference(typedSettings.seo.defaultOgImage, db, storage), + }; + } return typedSettings; } diff --git a/packages/core/src/settings/types.ts b/packages/core/src/settings/types.ts index dd0f9883c..5695910ba 100644 --- a/packages/core/src/settings/types.ts +++ b/packages/core/src/settings/types.ts @@ -5,19 +5,23 @@ */ /** - * Media reference for logo/favicon. + * Media reference for logo/favicon/seo.defaultOgImage. * * Stored shape is just `{ mediaId, alt? }`. The remaining fields are * populated by `resolveMediaReference` on read so templates can emit * correct head tags without a second round-trip to the media table. * - * The Zod schemas at the REST/MCP boundary (`mediaReference`) define - * only `mediaId` and `alt` and rely on default strip-mode parsing to - * discard the resolved fields if a client posts them back. If you - * ever switch those schemas to `passthrough`, you must also strip the - * resolved fields explicitly in `setSiteSettings`, or stored options - * will accumulate stale `url` / `contentType` / `width` / `height` - * snapshots. + * The Zod schemas at the REST/MCP boundary are split: + * - `mediaReferenceInput` (used by `settingsUpdateBody`) defines only + * `mediaId` and `alt`. Default strip-mode parsing discards any + * resolved fields a client posts back, so they never reach storage. + * - `mediaReferenceResponse` (used by `siteSettingsSchema`) includes + * the resolved fields so generated OpenAPI clients see them. + * + * If you ever switch `mediaReferenceInput` to `passthrough`, you must + * also strip the resolved fields explicitly in `setSiteSettings`, or + * stored options will accumulate stale `url` / `contentType` / `width` + * / `height` snapshots. */ export interface MediaReference { mediaId: string; diff --git a/packages/core/tests/unit/page/absolute-url.test.ts b/packages/core/tests/unit/page/absolute-url.test.ts new file mode 100644 index 000000000..7894c68c2 --- /dev/null +++ b/packages/core/tests/unit/page/absolute-url.test.ts @@ -0,0 +1,261 @@ +import { describe, expect, it } from "vitest"; + +import { absolutizeMediaUrl, resolveSiteOrigin } from "../../../src/page/absolute-url.js"; +import type { PublicPageContext } from "../../../src/plugins/types.js"; + +function createPage(overrides: Partial = {}): PublicPageContext { + return { + url: "https://example.com/posts/hello", + path: "/posts/hello", + locale: null, + kind: "content", + pageType: "article", + title: "Hello", + description: null, + canonical: "https://example.com/posts/hello", + image: null, + ...overrides, + }; +} + +describe("resolveSiteOrigin", () => { + it("prefers configured site URL when valid", () => { + const page = createPage({ siteUrl: "https://page.example.com" }); + expect(resolveSiteOrigin("https://configured.example.com", page)).toBe( + "https://configured.example.com", + ); + }); + + it("strips path and query from configured URL, keeping origin", () => { + const page = createPage(); + expect(resolveSiteOrigin("https://configured.example.com/some/path?q=1", page)).toBe( + "https://configured.example.com", + ); + }); + + it("falls back to page.siteUrl when configured is unset", () => { + const page = createPage({ siteUrl: "https://page.example.com/extra" }); + expect(resolveSiteOrigin(undefined, page)).toBe("https://page.example.com"); + }); + + it("falls back to page.url origin when neither configured nor siteUrl is set", () => { + const page = createPage(); + expect(resolveSiteOrigin(undefined, page)).toBe("https://example.com"); + }); + + it("skips unparseable configured URL and uses next candidate", () => { + // "example.com" without scheme cannot be parsed by URL. + const page = createPage({ siteUrl: "https://page.example.com" }); + expect(resolveSiteOrigin("example.com", page)).toBe("https://page.example.com"); + }); + + it("returns null when all candidates are unparseable", () => { + const page = createPage({ url: "not a url", siteUrl: "also not a url" }); + expect(resolveSiteOrigin("nope", page)).toBeNull(); + }); +}); + +describe("absolutizeMediaUrl", () => { + it("returns null for undefined", () => { + expect(absolutizeMediaUrl(undefined, "https://example.com", createPage())).toBeNull(); + }); + + it("returns null for empty string", () => { + expect(absolutizeMediaUrl("", "https://example.com", createPage())).toBeNull(); + }); + + it("passes through already-absolute https URL unchanged", () => { + const page = createPage(); + const url = "https://cdn.example.com/image.png"; + expect(absolutizeMediaUrl(url, "https://example.com", page)).toBe(url); + }); + + it("passes through already-absolute http URL unchanged", () => { + const page = createPage(); + const url = "http://cdn.example.com/image.png"; + expect(absolutizeMediaUrl(url, "https://example.com", page)).toBe(url); + }); + + it("joins relative URL with configured site origin", () => { + const page = createPage(); + expect(absolutizeMediaUrl("/_emdash/api/media/file/abc.png", "https://example.com", page)).toBe( + "https://example.com/_emdash/api/media/file/abc.png", + ); + }); + + it("strips trailing slash from configured origin before joining", () => { + const page = createPage(); + expect( + absolutizeMediaUrl("/_emdash/api/media/file/abc.png", "https://example.com/", page), + ).toBe("https://example.com/_emdash/api/media/file/abc.png"); + }); + + it("falls back to page.siteUrl when configured site URL is missing", () => { + const page = createPage({ siteUrl: "https://proxy.example.com" }); + expect(absolutizeMediaUrl("/og.png", undefined, page)).toBe("https://proxy.example.com/og.png"); + }); + + it("falls back to page.url origin when no configured URL or siteUrl set", () => { + const page = createPage({ url: "https://live.example.com/some/page" }); + expect(absolutizeMediaUrl("/og.png", undefined, page)).toBe("https://live.example.com/og.png"); + }); + + it("prepends slash to relative URL without leading slash", () => { + const page = createPage(); + expect(absolutizeMediaUrl("og.png", "https://example.com", page)).toBe( + "https://example.com/og.png", + ); + }); + + it("returns original relative URL when no origin can be resolved", () => { + // Construct a page where every origin candidate is unparseable. + // Better to emit a relative URL than to drop og:image entirely; + // some scrapers do resolve relative URLs against the page URL. + const page = createPage({ url: "garbage", siteUrl: undefined }); + expect(absolutizeMediaUrl("/og.png", undefined, page)).toBe("/og.png"); + }); + + describe("scheme handling", () => { + it("passes through data:image/* URLs unchanged", () => { + // data:image/* URLs are valid OG image values in some contexts; + // rewriting them into `https://site/data:...` produces garbage. + const dataUrl = "data:image/png;base64,iVBORw0KGgo="; + expect(absolutizeMediaUrl(dataUrl, "https://example.com", createPage())).toBe(dataUrl); + }); + + it("returns null for non-image data: URLs", () => { + // `data:text/plain` etc. are not valid OG images. + expect( + absolutizeMediaUrl("data:text/plain,hello", "https://example.com", createPage()), + ).toBeNull(); + }); + + it("returns null for blob: URLs", () => { + // blob: URLs are browser-only references that no crawler can resolve. + expect( + absolutizeMediaUrl("blob:https://example.com/abc", "https://example.com", createPage()), + ).toBeNull(); + }); + + it("returns null for mailto: URLs", () => { + expect( + absolutizeMediaUrl("mailto:hi@example.com", "https://example.com", createPage()), + ).toBeNull(); + }); + + it("returns null for file: URLs", () => { + // file: URLs leak local-FS config mistakes into crawler-visible markup. + expect( + absolutizeMediaUrl("file:///etc/passwd", "https://example.com", createPage()), + ).toBeNull(); + }); + + it("returns null for protocol-relative URLs (SSRF guard)", () => { + // Protocol-relative URLs have no legitimate `og:image` use case and + // are a well-known SSRF vector when reflected through server-side + // fetchers. We drop them outright. + const page = createPage(); + expect( + absolutizeMediaUrl("//cdn.example.com/og.png", "https://example.com", page), + ).toBeNull(); + }); + + it("returns null for protocol-relative URLs even when no origin resolves", () => { + // Drop happens before origin resolution, so a malformed page context + // can't accidentally let one through. + const page = createPage({ url: "garbage", siteUrl: undefined }); + expect(absolutizeMediaUrl("//cdn.example.com/og.png", undefined, page)).toBeNull(); + }); + + it("returns null for unknown custom schemes", () => { + expect( + absolutizeMediaUrl("javascript:alert(1)", "https://example.com", createPage()), + ).toBeNull(); + expect(absolutizeMediaUrl("foo:bar/baz", "https://example.com", createPage())).toBeNull(); + }); + }); + + describe("whitespace / control char defense", () => { + // Real media URLs never contain whitespace or control characters. + // Rejecting them up front prevents scheme-regex evasion: an input + // like " https://attacker/x" would otherwise slip past HTTP_URL_RE + // (anchored at offset 0) and get joined as a relative path with the + // site origin. + + it("rejects URLs with leading spaces", () => { + expect( + absolutizeMediaUrl(" https://attacker.example/x.png", "https://example.com", createPage()), + ).toBeNull(); + }); + + it("rejects URLs with leading tabs", () => { + expect( + absolutizeMediaUrl("\thttps://attacker.example/x.png", "https://example.com", createPage()), + ).toBeNull(); + }); + + it("rejects URLs with leading newlines (header injection vector)", () => { + expect( + absolutizeMediaUrl("\nhttps://attacker.example/x.png", "https://example.com", createPage()), + ).toBeNull(); + expect( + absolutizeMediaUrl( + "\r\nhttps://attacker.example/x.png", + "https://example.com", + createPage(), + ), + ).toBeNull(); + }); + + it("rejects URLs with embedded whitespace", () => { + expect( + absolutizeMediaUrl( + "https://example.com/path with space.png", + "https://example.com", + createPage(), + ), + ).toBeNull(); + }); + + it("rejects URLs with control characters", () => { + // C0 control (NUL) — never valid in a URL. + expect( + absolutizeMediaUrl("https://example.com/\u0000.png", "https://example.com", createPage()), + ).toBeNull(); + // DEL. + expect( + absolutizeMediaUrl("https://example.com/\u007f.png", "https://example.com", createPage()), + ).toBeNull(); + }); + + it("rejects space-prefixed protocol-relative URLs", () => { + // Without the whitespace guard, " //evil" wouldn't match + // PROTOCOL_RELATIVE_RE either and would be joined as a relative + // path. + expect( + absolutizeMediaUrl(" //attacker.example/x.png", "https://example.com", createPage()), + ).toBeNull(); + }); + }); + + describe("opaque origin rejection", () => { + it("rejects data: configured site URL and falls back to next candidate", () => { + // `new URL("data:...").origin` returns the literal string "null". + // Don't emit `null/og.png`. + const page = createPage({ siteUrl: "https://page.example.com" }); + expect(absolutizeMediaUrl("/og.png", "data:text/plain,hello", page)).toBe( + "https://page.example.com/og.png", + ); + }); + + it("rejects file: URLs in the origin chain", () => { + const page = createPage({ + url: "https://live.example.com/x", + siteUrl: "file:///local/path", + }); + expect(absolutizeMediaUrl("/og.png", undefined, page)).toBe( + "https://live.example.com/og.png", + ); + }); + }); +}); diff --git a/packages/core/tests/unit/plugins/page-seo.test.ts b/packages/core/tests/unit/plugins/page-seo.test.ts index eeb2a50af..87dd07601 100644 --- a/packages/core/tests/unit/plugins/page-seo.test.ts +++ b/packages/core/tests/unit/plugins/page-seo.test.ts @@ -82,4 +82,102 @@ describe("page SEO metadata", () => { headline: "Hello World", }); }); + + describe("defaultOgImage fallback", () => { + const defaultOg = "https://example.com/site-default-og.png"; + + it("emits og:image and twitter:image from defaultOgImage when page has none", () => { + const page = createPage({ image: null }); + + const contributions = generateBaseSeoContributions(page, defaultOg); + + expect(contributions).toContainEqual({ + kind: "property", + property: "og:image", + content: defaultOg, + }); + expect(contributions).toContainEqual({ + kind: "meta", + name: "twitter:image", + content: defaultOg, + }); + // Card upgrades to summary_large_image once an image is present. + expect(contributions).toContainEqual({ + kind: "meta", + name: "twitter:card", + content: "summary_large_image", + }); + }); + + it("prefers page.image over the site default", () => { + const page = createPage({ image: "https://example.com/post-hero.png" }); + + const contributions = generateBaseSeoContributions(page, defaultOg); + + expect(contributions).toContainEqual({ + kind: "property", + property: "og:image", + content: "https://example.com/post-hero.png", + }); + // Site default should NOT appear when the page has its own image. + expect( + contributions.some( + (c) => c.kind === "property" && c.property === "og:image" && c.content === defaultOg, + ), + ).toBe(false); + }); + + it("prefers seo.ogImage over both page.image and the site default", () => { + const page = createPage({ + image: "https://example.com/post-hero.png", + seo: { ogImage: "https://example.com/explicit-og.png" }, + }); + + const contributions = generateBaseSeoContributions(page, defaultOg); + + expect(contributions).toContainEqual({ + kind: "property", + property: "og:image", + content: "https://example.com/explicit-og.png", + }); + }); + + it("emits no og:image when neither page image nor defaultOgImage are set", () => { + const page = createPage({ image: null }); + + const contributions = generateBaseSeoContributions(page); + + expect(contributions.some((c) => c.kind === "property" && c.property === "og:image")).toBe( + false, + ); + // Card stays at summary when no image. + expect(contributions).toContainEqual({ + kind: "meta", + name: "twitter:card", + content: "summary", + }); + }); + + it("propagates defaultOgImage into BlogPosting JSON-LD image", () => { + const page = createPage({ + image: null, + articleMeta: { publishedTime: "2026-04-03T12:00:00.000Z" }, + }); + + const graph = buildBlogPostingJsonLd(page, defaultOg); + + expect(graph).toMatchObject({ image: defaultOg }); + }); + + it("BlogPosting JSON-LD prefers explicit page image over the default", () => { + const page = createPage({ + image: "https://example.com/post-hero.png", + articleMeta: { publishedTime: "2026-04-03T12:00:00.000Z" }, + }); + + const graph = buildBlogPostingJsonLd(page, defaultOg); + + expect(graph).toMatchObject({ image: "https://example.com/post-hero.png" }); + }); + }); }); diff --git a/packages/core/tests/unit/settings/settings.test.ts b/packages/core/tests/unit/settings/settings.test.ts index 8be5f6329..9fcbc3c24 100644 --- a/packages/core/tests/unit/settings/settings.test.ts +++ b/packages/core/tests/unit/settings/settings.test.ts @@ -221,6 +221,91 @@ describe("Site Settings", () => { const favicon = await getSiteSettingWithDb("favicon", db, null); expect(favicon?.mediaId).toBe("med_456"); }); + + // Regression: seo.defaultOgImage was defined in the data model and schemas + // but getSiteSettings*() never resolved the media reference, so the URL + // field was always absent and the documented MCP behavior was a lie. + it("resolves seo.defaultOgImage to a media file URL via getSiteSettings", async () => { + const mediaId = "med_og_default"; + const now = new Date().toISOString(); + await db + .insertInto("media" as never) + .values({ + id: mediaId, + filename: "og.png", + mime_type: "image/png", + size: 2048, + width: 1200, + height: 630, + storage_key: `media/${mediaId}.png`, + created_at: now, + } as never) + .execute(); + + await setSiteSettings( + { + seo: { + defaultOgImage: { mediaId, alt: "Default OG" }, + }, + }, + db, + ); + + const settings = await getSiteSettingsWithDb(db); + expect(settings.seo?.defaultOgImage?.mediaId).toBe(mediaId); + expect(settings.seo?.defaultOgImage?.alt).toBe("Default OG"); + expect(settings.seo?.defaultOgImage?.url).toBe( + `/_emdash/api/media/file/media/${mediaId}.png`, + ); + expect(settings.seo?.defaultOgImage?.contentType).toBe("image/png"); + expect(settings.seo?.defaultOgImage?.width).toBe(1200); + expect(settings.seo?.defaultOgImage?.height).toBe(630); + }); + + it("resolves seo.defaultOgImage via getSiteSetting('seo')", async () => { + const mediaId = "med_og_per_key"; + const now = new Date().toISOString(); + await db + .insertInto("media" as never) + .values({ + id: mediaId, + filename: "og.png", + mime_type: "image/png", + size: 1024, + storage_key: `media/${mediaId}.png`, + created_at: now, + } as never) + .execute(); + + await setSiteSettings( + { + seo: { + defaultOgImage: { mediaId }, + titleSeparator: " | ", + }, + }, + db, + ); + + const seo = await getSiteSettingWithDb("seo", db); + expect(seo?.defaultOgImage?.url).toBe(`/_emdash/api/media/file/media/${mediaId}.png`); + // Sibling fields preserved through the resolve+spread. + expect(seo?.titleSeparator).toBe(" | "); + }); + + it("returns seo settings unchanged when no defaultOgImage is set", async () => { + await setSiteSettings( + { + seo: { titleSeparator: " — ", googleVerification: "g123" }, + }, + db, + ); + + const settings = await getSiteSettingsWithDb(db); + expect(settings.seo?.titleSeparator).toBe(" — "); + expect(settings.seo?.googleVerification).toBe("g123"); + expect(settings.seo?.defaultOgImage).toBeUndefined(); + }); }); }); @@ -368,3 +453,108 @@ describe("Site Settings caching", () => { expect(prefixScans.length).toBe(1); }); }); + +// --------------------------------------------------------------------------- +// Cross-mutation cache invalidation +// +// `seo.defaultOgImage` (and `logo`/`favicon`) bake the media row's resolved +// URL, contentType, width, and height into a worker-scoped cache. Every code +// path that mutates the media table must therefore invalidate the cache, or +// readers will keep serving stale snapshots until the isolate dies. +// +// Invalidation lives in three places (any caller of any of these clears +// the cache): +// - `EmDashRuntime.handleMediaUpdate` / `handleMediaDelete` — REST + MCP. +// - Plugin context `media.delete()` — the plugin-facing public API. +// - `local-runtime.delete()` — the provider DELETE route's local path. +// +// The tests below pin the runtime-level contract. The plugin-context and +// local-runtime paths share `invalidateSiteSettingsCache()` directly, so +// their wiring is verified by inspection plus the cache tests above that +// prove `invalidateSiteSettingsCache()` itself drops the cache. +// --------------------------------------------------------------------------- + +describe("Media mutations invalidate site settings cache", () => { + beforeEach(() => { + invalidateSiteSettingsCache(); + }); + + it("EmDashRuntime.handleMediaDelete invalidates the cache on success", async () => { + const { createTestRuntime } = await import("../../utils/mcp-runtime.js"); + const { setupTestDatabaseWithCollections } = await import("../../utils/test-db.js"); + + const db = await setupTestDatabaseWithCollections(); + const runtime = createTestRuntime(db); + + // Seed a media row and reference it from settings so we can prove + // invalidation flushes the resolved snapshot. + const mediaId = "med_invalidation_delete"; + const now = new Date().toISOString(); + await db + .insertInto("media" as never) + .values({ + id: mediaId, + filename: "og.png", + mime_type: "image/png", + size: 1024, + storage_key: `media/${mediaId}.png`, + created_at: now, + } as never) + .execute(); + await setSiteSettings({ seo: { defaultOgImage: { mediaId } } }, db); + + await runWithContext({ editMode: false, db }, async () => { + const before = await getSiteSettings(); + expect(before.seo?.defaultOgImage?.url).toContain(mediaId); + }); + + const result = await runtime.handleMediaDelete(mediaId); + expect(result.success).toBe(true); + + // After delete, the resolved snapshot must be re-fetched; with the + // media row gone, the URL field disappears. + await runWithContext({ editMode: false, db }, async () => { + const after = await getSiteSettings(); + expect(after.seo?.defaultOgImage?.url).toBeUndefined(); + }); + }); + + it("EmDashRuntime.handleMediaUpdate invalidates the cache on success", async () => { + const { createTestRuntime } = await import("../../utils/mcp-runtime.js"); + const { setupTestDatabaseWithCollections } = await import("../../utils/test-db.js"); + + const db = await setupTestDatabaseWithCollections(); + const runtime = createTestRuntime(db); + + const mediaId = "med_invalidation_update"; + const now = new Date().toISOString(); + await db + .insertInto("media" as never) + .values({ + id: mediaId, + filename: "og.png", + mime_type: "image/png", + size: 1024, + width: 800, + height: 600, + storage_key: `media/${mediaId}.png`, + created_at: now, + } as never) + .execute(); + await setSiteSettings({ seo: { defaultOgImage: { mediaId } } }, db); + + await runWithContext({ editMode: false, db }, async () => { + const before = await getSiteSettings(); + expect(before.seo?.defaultOgImage?.width).toBe(800); + }); + + const result = await runtime.handleMediaUpdate(mediaId, { width: 1200, height: 630 }); + expect(result.success).toBe(true); + + await runWithContext({ editMode: false, db }, async () => { + const after = await getSiteSettings(); + expect(after.seo?.defaultOgImage?.width).toBe(1200); + expect(after.seo?.defaultOgImage?.height).toBe(630); + }); + }); +});