From fefba5b75ce0f10194fbd56a4051f16bd62bb997 Mon Sep 17 00:00:00 2001 From: underscope Date: Fri, 27 Mar 2026 09:44:49 +0100 Subject: [PATCH 01/28] Improve asset description --- apps/backend/asset/extraction/synthetic-content.ts | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/apps/backend/asset/extraction/synthetic-content.ts b/apps/backend/asset/extraction/synthetic-content.ts index 58bd20d62..7167b3cd7 100644 --- a/apps/backend/asset/extraction/synthetic-content.ts +++ b/apps/backend/asset/extraction/synthetic-content.ts @@ -7,11 +7,19 @@ import type { Asset } from '../asset.model.js'; // Builds a markdown document from asset metadata and optional body text. -// Returns null when there's nothing meaningful to index (name-only). export function buildSyntheticContent( asset: Asset, bodyText = '', ): string | null { const parts: string[] = [`# ${asset.name}`]; + // Asset reference metadata - used by AI to identify and + // reference this asset when generating content elements. + parts.push([ + `[Asset ID: ${asset.id}]`, + `[Type: ${asset.type}]`, + asset.storageKey + ? `[Storage: ${asset.storageKey}]` + : `[URL: ${(asset.meta as any)?.url || ''}]`, + ].join(' ')); const desc = asset.meta?.description; const tags = asset.meta?.tags; if (desc) parts.push(`Description: ${desc}`); From 7f3fede3e40bf46259e74cefa7ac5c3770bbdf4c Mon Sep 17 00:00:00 2001 From: underscope Date: Fri, 27 Mar 2026 09:47:16 +0100 Subject: [PATCH 02/28] Add getter / setter for storeId --- apps/backend/repository/repository.model.js | 28 ++++++++------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/apps/backend/repository/repository.model.js b/apps/backend/repository/repository.model.js index af48624c0..bd105a143 100644 --- a/apps/backend/repository/repository.model.js +++ b/apps/backend/repository/repository.model.js @@ -1,4 +1,4 @@ -import { literal, Model, Op } from 'sequelize'; +import { Model, Op } from 'sequelize'; import first from 'lodash/first.js'; import intersection from 'lodash/intersection.js'; import map from 'lodash/map.js'; @@ -162,24 +162,18 @@ class Repository extends Model { return false; } - /** - * Atomically sets the AI vector store ID in the repository's - * data JSONB. Returns true if the value was written, false if - * another request already set it (concurrent indexing race). - */ async setVectorStoreId(storeId) { - const path = `{$$,ai,storeId}`; - const [count] = await Repository.update( - { data: literal(`jsonb_set(COALESCE(data,'{}'),'${path}','"${storeId}"')`) }, - { - where: { - id: this.id, - [Op.and]: literal(`data->'$$'->'ai'->'storeId' IS NULL`), - }, + const current = this.data || {}; + if (current.$$?.ai?.storeId) return false; + const merged = { + ...current, + $$: { + ...current.$$, + ai: { ...current.$$?.ai, storeId }, }, - ); - if (count > 0) await this.reload(); - return count > 0; + }; + await this.update({ data: merged }); + return true; } getVectorStoreId() { From 41fa490418cdacbed08ebf4d82ea9414917c26ef Mon Sep 17 00:00:00 2001 From: underscope Date: Fri, 27 Mar 2026 11:22:32 +0100 Subject: [PATCH 03/28] Improve indexing --- .../asset/extraction/synthetic-content.ts | 29 ++++++++++--------- .../asset/indexing/indexing.service.ts | 11 ++++++- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/apps/backend/asset/extraction/synthetic-content.ts b/apps/backend/asset/extraction/synthetic-content.ts index 7167b3cd7..b3f8f8822 100644 --- a/apps/backend/asset/extraction/synthetic-content.ts +++ b/apps/backend/asset/extraction/synthetic-content.ts @@ -6,25 +6,26 @@ */ import type { Asset } from '../asset.model.js'; -// Builds a markdown document from asset metadata and optional body text. +// Builds a markdown document from asset metadata +// and optional body text (captions, page content, etc.). +// Asset ID and type are included so the AI can discover +// and reference relevant media via vector store search. export function buildSyntheticContent( asset: Asset, bodyText = '', ): string | null { + const meta = asset.meta as any; const parts: string[] = [`# ${asset.name}`]; - // Asset reference metadata - used by AI to identify and - // reference this asset when generating content elements. - parts.push([ - `[Asset ID: ${asset.id}]`, - `[Type: ${asset.type}]`, - asset.storageKey - ? `[Storage: ${asset.storageKey}]` - : `[URL: ${(asset.meta as any)?.url || ''}]`, - ].join(' ')); - const desc = asset.meta?.description; - const tags = asset.meta?.tags; - if (desc) parts.push(`Description: ${desc}`); + parts.push(`Asset ID: ${asset.id} | Type: ${asset.type}`); + if (meta?.contentType) { + parts[1] += ` | Content: ${meta.contentType}`; + } + if (meta?.provider) { + parts[1] += ` | Provider: ${meta.provider}`; + } + const desc = meta?.description; + const tags = meta?.tags; + if (desc) parts.push(desc); if (tags?.length) parts.push(`Tags: ${tags.join(', ')}`); if (bodyText) parts.push(bodyText); - // Name-only content isn't worth indexing return parts.length > 1 ? parts.join('\n\n') : null; } diff --git a/apps/backend/asset/indexing/indexing.service.ts b/apps/backend/asset/indexing/indexing.service.ts index 559b867ea..07a57eef3 100644 --- a/apps/backend/asset/indexing/indexing.service.ts +++ b/apps/backend/asset/indexing/indexing.service.ts @@ -118,7 +118,16 @@ async function indexDocument(ctx: IndexingContext) { originalname: asset.name, mimetype: asset.meta.mimeType, }; - const result = await AIService.vectorStore!.upload([file], storeId); + const result = await AIService.vectorStore!.upload( + [file], storeId, + ); + // Companion metadata doc so AI can discover this + // document's asset ID via file_search + const meta = buildSyntheticContent(asset); + if (meta) { + indexSynthetic(storeId, meta, `${asset.id}-meta.md`) + .catch(() => {}); + } if (asset.meta.mimeType === mime.lookup('pdf')) { await extractAndSaveImages(asset, buffer); } From 8ef76e5eda0a60dbad5c9f27e6f8cdeb6be390fe Mon Sep 17 00:00:00 2001 From: underscope Date: Fri, 27 Mar 2026 11:23:03 +0100 Subject: [PATCH 04/28] Prefer unsplash --- apps/backend/asset/discovery/discovery.service.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/backend/asset/discovery/discovery.service.ts b/apps/backend/asset/discovery/discovery.service.ts index d20ee99a6..a4286232b 100644 --- a/apps/backend/asset/discovery/discovery.service.ts +++ b/apps/backend/asset/discovery/discovery.service.ts @@ -126,8 +126,8 @@ const strategies: Record = { serper.newsSearch(q, Math.ceil(n * 0.15) + DEDUP_BUFFER), ], [Image]: (q, n) => [ - serper.imageSearch(q, n + DEDUP_BUFFER), - unsplash.search(q, Math.ceil(n / 2)), + unsplash.search(q, n), + serper.imageSearch(q, Math.ceil(n * 0.5) + DEDUP_BUFFER), ], [Video]: (q, n) => [serper.videoSearch(q, n + DEDUP_BUFFER)], [Pdf]: (q, n) => [serper.webSearch(`${q} filetype:pdf`, n + DEDUP_BUFFER)], From f63bbf78399a08f5b9d08887b3d0630dcebfac9b Mon Sep 17 00:00:00 2001 From: underscope Date: Fri, 27 Mar 2026 11:25:02 +0100 Subject: [PATCH 05/28] Update Outline generation prompt --- apps/backend/shared/ai/schemas/Outline.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/apps/backend/shared/ai/schemas/Outline.ts b/apps/backend/shared/ai/schemas/Outline.ts index eef2df44e..034073807 100644 --- a/apps/backend/shared/ai/schemas/Outline.ts +++ b/apps/backend/shared/ai/schemas/Outline.ts @@ -99,6 +99,11 @@ const getPrompt = (context: AiContext): string => { Make sure to have content holder nodes in the structure. The content holder nodes are the following: ${leafLevels.map((it) => it.label).join(', ')}. + IMPORTANT: Do NOT wrap all content inside a single top-level node that + represents the course or repository - the repository already serves that + purpose. For example, if generating content about "${context.repository.name}", + return multiple root items (e.g. "Introduction", "Getting Started", etc.), + NOT a single "${context.repository.name}" wrapper containing them all. ${documentGuideline}`; }; From 21abd33fe4c8e4b8ded399975edcd794619f1048 Mon Sep 17 00:00:00 2001 From: underscope Date: Fri, 27 Mar 2026 18:50:15 +0100 Subject: [PATCH 06/28] Refine generation to support lib assets --- apps/backend/shared/ai/lib/AiPrompt.ts | 9 +- .../shared/ai/schemas/CcStructuredContent.ts | 296 ----------------- .../ai/schemas/CcStructuredContent/config.ts | 58 ++++ .../ai/schemas/CcStructuredContent/index.ts | 13 + .../ai/schemas/CcStructuredContent/media.ts | 147 +++++++++ .../ai/schemas/CcStructuredContent/prompt.ts | 300 ++++++++++++++++++ .../schemas/CcStructuredContent/response.ts | 61 ++++ .../ai/schemas/CcStructuredContent/schema.ts | 132 ++++++++ .../ai/schemas/CcStructuredContent/types.ts | 23 ++ apps/backend/shared/ai/schemas/CeHtml.ts | 10 +- apps/backend/shared/ai/schemas/index.ts | 2 +- apps/backend/shared/ai/schemas/interfaces.ts | 6 +- .../shared/content-plugins/elementRegistry.js | 5 + 13 files changed, 759 insertions(+), 303 deletions(-) delete mode 100644 apps/backend/shared/ai/schemas/CcStructuredContent.ts create mode 100644 apps/backend/shared/ai/schemas/CcStructuredContent/config.ts create mode 100644 apps/backend/shared/ai/schemas/CcStructuredContent/index.ts create mode 100644 apps/backend/shared/ai/schemas/CcStructuredContent/media.ts create mode 100644 apps/backend/shared/ai/schemas/CcStructuredContent/prompt.ts create mode 100644 apps/backend/shared/ai/schemas/CcStructuredContent/response.ts create mode 100644 apps/backend/shared/ai/schemas/CcStructuredContent/schema.ts create mode 100644 apps/backend/shared/ai/schemas/CcStructuredContent/types.ts diff --git a/apps/backend/shared/ai/lib/AiPrompt.ts b/apps/backend/shared/ai/lib/AiPrompt.ts index 5db5f43b4..8c1c6c990 100644 --- a/apps/backend/shared/ai/lib/AiPrompt.ts +++ b/apps/backend/shared/ai/lib/AiPrompt.ts @@ -7,12 +7,13 @@ import getContentSchema from '../schemas/index.ts'; import RepositoryContext from './RepositoryContext.ts'; import { ai as aiConfig } from '#config'; +import db from '#shared/database/index.js'; import { createLogger } from '#logger'; const logger = createLogger('ai:prompt'); const systemPrompt = ` - Assistant is a bot desinged to help authors to create content for + Assistant is a bot designed to help authors create content for Courses, Q&A content, Knowledge base, etc. Rules: - Use the User rules to generate the content @@ -23,7 +24,11 @@ const documentPrompt = ` The user has provided source documents indexed in a vector store. Use the file_search tool to find relevant information from these documents. Base ALL generated content on information found in the documents. - Do not invent information not present in the documents.`; + Do not invent information not present in the documents. + If any assets are marked as PRIMARY SOURCE, they represent the core knowledge + base. Structure and model your content primarily after these sources. + Supplementary assets provide additional context but should not override + the core sources.`; export class AiPrompt { // OpenAI client diff --git a/apps/backend/shared/ai/schemas/CcStructuredContent.ts b/apps/backend/shared/ai/schemas/CcStructuredContent.ts deleted file mode 100644 index 310a69572..000000000 --- a/apps/backend/shared/ai/schemas/CcStructuredContent.ts +++ /dev/null @@ -1,296 +0,0 @@ -import type { AiContext } from '@tailor-cms/interfaces/ai.ts'; -import { - getSchema as getMetaInputSchema, -} from '@tailor-cms/meta-element-collection/schema.js'; -import { schema as schemaAPI } from '@tailor-cms/config'; - -import type { AiResponseSpec, OpenAISchema } from './interfaces.ts'; -import { HTML_TYPE } from './CeHtml.ts'; -import elementRegistry from '../../content-plugins/elementRegistry.js'; - -// Resolve AI config (Schema + processResponse) for an element type. -const getAiSpec = (type: string) => elementRegistry.getAiConfig(type); - -// Filter to element types with AI support, fall back to HTML. -const resolveSupportedTypes = (types: string[]): string[] => { - const supported = types.filter((t) => getAiSpec(t)?.Schema); - return supported.length ? supported : [HTML_TYPE]; -}; - -interface MetaField { - key: string; - label: string; - // JSON Schema inferred from meta-input manifest at runtime. - // e.g. { type: 'string' }, { type: 'array', items: { type: 'number' } } - // null when meta-input has no schema (e.g. FILE) - excluded from AI output. - schema: { type: string; items?: { type: string } } | null; - options?: { value: string | number; label: string }[]; -} - -interface SubcontainerConfig { - label: string; - metaInputs: MetaField[]; - elementTypes: string[]; -} -type SubcontainerConfigs = Record; - -interface ParsedConfig { - subcontainers: SubcontainerConfigs; - ai?: { - definition?: string; - outputRules?: { prompt?: string }; - }; -} - -const obj = (properties: any, required: string[]) => ({ - type: 'object' as const, - properties, - ...(required.length && { required }), - additionalProperties: false, -}); - -// Server packages export raw content schemas (e.g. { content: 'string' } for HTML). -// Wrap into full element format with type discriminator and data envelope. -const buildElementSchema = (type: string) => { - const contentSchema = getAiSpec(type).Schema.schema; - return { - type: 'object' as const, - properties: { - type: { enum: [type] }, - ...contentSchema.properties, - }, - required: ['type', ...(contentSchema.required || [])], - additionalProperties: false, - }; -}; - -const getElementsSchema = (types: string[]) => { - const resolved = resolveSupportedTypes(types); - const schemas = resolved.map(buildElementSchema); - const items = schemas.length === 1 ? schemas[0] : { anyOf: schemas }; - return { type: 'array', items }; -}; - -// Processed contentElementConfig format: -// [{ name: 'Group', items: [{ id: 'TYPE' }] }] -const getElementTypeIds = (config?: any[]): string[] => - config?.flatMap((group: any) => - (group.items || []).map((it: any) => it.id || it), - ) ?? []; - -// Subcontainer meta can be a static array or a factory fn. -// e.g. meta: () => [{ key: 'title', type: 'TEXT_FIELD' }] -const getMetaDefinitions = (val: any): any[] => - typeof val.meta === 'function' - ? val.meta() - : val.meta || []; - -// Map raw meta definitions to MetaField with resolved schema. -// Options come from m.options (select) or m.items (radio). -const toMetaFields = (meta: any[]): MetaField[] => - meta.map((m: any) => ({ - key: m.key, - label: m.label, - schema: getMetaInputSchema(m.type, m), - ...((m.options || m.items) && { - options: m.options || m.items, - }), - })); - -// Parse container schema config into per-subcontainer configs. -// Resolves element types and meta field schemas for each -// subcontainer type defined in the container config. -const getConfigs = (context: AiContext): ParsedConfig => { - const empty: ParsedConfig = { subcontainers: {} }; - const { outlineActivityType, containerType } = - context.repository; - if (!outlineActivityType || !containerType) return empty; - const containers = schemaAPI.getSupportedContainers( - outlineActivityType, - ); - const container = containers.find( - (c: any) => c.type === containerType, - ); - if (!container?.config) return empty; - // Container-level element types as default fallback - const defaultElementTypes = getElementTypeIds( - container.contentElementConfig, - ); - const subcontainers: SubcontainerConfigs = {}; - for (const [type, val] of Object.entries( - container.config as Record, - )) { - // Subcontainer config overrides container-level - const elementTypes = val.contentElementConfig - ? getElementTypeIds(val.contentElementConfig) - : defaultElementTypes; - subcontainers[type] = { - label: val.label || type, - elementTypes, - metaInputs: toMetaFields(getMetaDefinitions(val)), - }; - } - return { subcontainers, ai: container.ai }; -}; - -// Build JSON schema for a single subcontainer type: -// discriminated by type enum, with per-type elements and data. -const buildSubcontainerSchema = ( - type: string, - config: SubcontainerConfig, -) => { - const { metaInputs, elementTypes } = config; - const props: Record = { - type: { enum: [type] }, - elements: getElementsSchema(elementTypes), - }; - const required = ['type', 'elements']; - const dataProps: Record = {}; - const dataRequired: string[] = []; - for (const field of metaInputs) { - if (!field.schema) continue; - const values = field.options?.map((o) => o.value); - dataProps[field.key] = values?.length - ? { ...field.schema, enum: values } - : field.schema; - dataRequired.push(field.key); - } - if (Object.keys(dataProps).length) { - props.data = obj(dataProps, dataRequired); - required.push('data'); - } - return obj(props, required); -}; - -// Build OpenAI structured output schema from container config. -// Each subcontainer type becomes a discriminated union variant -// with its own allowed element types and metadata fields. -export const Schema = (context: AiContext): OpenAISchema => { - const { subcontainers } = getConfigs(context); - // Default to generic section when no config is defined - const entries = Object.entries(subcontainers); - if (!entries.length) { - entries.push(['SECTION', { - label: 'Section', metaInputs: [], elementTypes: [], - }]); - } - // Build per-subcontainer schema with type discriminator - const schemas = entries.map(([type, config]) => - buildSubcontainerSchema(type, config), - ); - const subcontainerSchema = schemas.length === 1 - ? schemas[0] - : { anyOf: schemas }; - return { - type: 'json_schema', - name: 'cc_structured_content', - schema: obj( - { subcontainers: { type: 'array', items: subcontainerSchema } }, - ['subcontainers'], - ), - }; -}; - -const describeField = ({ key, label, options }: MetaField): string => { - const base = `"${key}" (${label})`; - const opts = options?.map((o) => o.value).join(', '); - return opts ? `${base} [options: ${opts}]` : base; -}; - -const describeElementTypes = (types: string[]): string => - resolveSupportedTypes(types) - .map((type) => { - const prompt = getAiSpec(type)?.getPrompt?.() || ''; - // Extract element description from server package prompt. - // e.g. "Generate a accordion content element as an object..." - // -> "a accordion content element" - const match = prompt.match(/generate\s+(.+?)\s+as\s+an/i); - return ` - "${type}": ${match?.[1] || type}`; - }) - .join('\n'); - -const describeSubcontainerTypes = (configs: SubcontainerConfigs): string => { - const entries = Object.entries(configs); - if (!entries.length) return ' - Type "SECTION" (Section)'; - return entries - .map(([type, { label, metaInputs = [] }]) => { - const fields = metaInputs.map(describeField).join(', '); - const suffix = fields ? `: metadata fields: ${fields}` : ''; - return ` - Type "${type}" (${label})${suffix}`; - }) - .join('\n'); -}; - -// Build prompt with available element types, subcontainer types, -// metadata fields, and container-level AI instructions. -export const getPrompt = (context: AiContext) => { - const { subcontainers, ai } = getConfigs(context); - // Collect all unique element types across subcontainers - const allElementTypes = [ - ...new Set(Object.values(subcontainers).flatMap((c) => c.elementTypes)), - ]; - const guidelines: string[] = [ - '- Fill in ALL metadata fields with values relevant to each subcontainer\'s content', - '- Each subcontainer should focus on a distinct topic or aspect', - '- Choose the best element type for each piece of content', - '- Skip media elements (images, videos, audio, files)', - '- Include at most one question element per subcontainer', - ]; - // Container ai.definition describes the content purpose - // e.g. "Learning Bit content is organized into sections" - if (ai?.definition) { - guidelines.push(`- Context: ${ai.definition}`); - } - if (context.repository.vectorStoreId) { - guidelines.push( - '- Base ALL content on the provided source documents', - '- Reference specific information, data, and examples from the documents', - '- Do not invent information not present in the documents', - ); - } - if (ai?.outputRules?.prompt) { - guidelines.push(ai.outputRules.prompt.trim()); - } - return ` - Response should be a JSON object with a "subcontainers" array. - Each subcontainer has: - - "type": one of the available subcontainer types - - "data": metadata object with the described fields filled in - - "elements": array of content elements (format defined by the schema) - - Available element types: - ${describeElementTypes(allElementTypes)} - - Available subcontainer types: - ${describeSubcontainerTypes(subcontainers)} - - Guidelines: - ${guidelines.join('\n ')}`; -}; - -// AI returns flat elements matching the content schema (e.g. { type, content } for HTML). -// Server processResponse transforms raw content into the element's data format. -const processElement = (el: any) => { - const { type, ...rawContent } = el; - const spec = getAiSpec(type); - const data = spec?.processResponse - ? spec.processResponse(rawContent) - : rawContent; - return { type, data }; -}; - -const processResponse = (data: any = {}) => { - const subcontainers = data?.subcontainers || []; - return subcontainers.map((sc: any) => ({ - ...sc, - elements: (sc.elements || []).map(processElement), - })); -}; - -const spec: AiResponseSpec = { - getPrompt, - Schema, - processResponse, -}; - -export default spec; diff --git a/apps/backend/shared/ai/schemas/CcStructuredContent/config.ts b/apps/backend/shared/ai/schemas/CcStructuredContent/config.ts new file mode 100644 index 000000000..8a16a0211 --- /dev/null +++ b/apps/backend/shared/ai/schemas/CcStructuredContent/config.ts @@ -0,0 +1,58 @@ +// Container config resolution. +// Parses schema config into per-subcontainer configs +// with element types and metadata field schemas. +import { + getSchema as getMetaInputSchema, +} from '@tailor-cms/meta-element-collection/schema.js'; +import { schema as schemaAPI } from '@tailor-cms/config'; +import type { AiContext } from '@tailor-cms/interfaces/ai.ts'; + +import type { + MetaField, + ParsedConfig, + SubcontainerConfigs, +} from './types.ts'; + +const { flattenElementTypeIds } = schemaAPI; + +const getMetaDefinitions = (val: any): any[] => + typeof val.meta === 'function' + ? val.meta() + : val.meta || []; + +const toMetaFields = (meta: any[]): MetaField[] => + meta.map((m: any) => ({ + key: m.key, + label: m.label, + schema: getMetaInputSchema(m.type, m), + ...((m.options || m.items) && { + options: m.options || m.items, + }), + })); + +export const getConfigs = (context: AiContext): ParsedConfig => { + const empty: ParsedConfig = { subcontainers: {} }; + const { repository } = context; + const { outlineActivityType, containerType } = repository; + if (!outlineActivityType || !containerType) return empty; + const containers = schemaAPI.getSupportedContainers( + outlineActivityType, + ); + const container = containers.find((c: any) => c.type === containerType); + if (!container?.config) return empty; + const defaultTypes = flattenElementTypeIds(container.contentElementConfig); + const subcontainers: SubcontainerConfigs = {}; + for (const [type, val] of Object.entries( + container.config as Record, + )) { + const elementTypes = val.contentElementConfig + ? flattenElementTypeIds(val.contentElementConfig) + : defaultTypes; + subcontainers[type] = { + label: val.label || type, + elementTypes, + metaInputs: toMetaFields(getMetaDefinitions(val)), + }; + } + return { subcontainers, ai: container.ai }; +}; diff --git a/apps/backend/shared/ai/schemas/CcStructuredContent/index.ts b/apps/backend/shared/ai/schemas/CcStructuredContent/index.ts new file mode 100644 index 000000000..52a561bf2 --- /dev/null +++ b/apps/backend/shared/ai/schemas/CcStructuredContent/index.ts @@ -0,0 +1,13 @@ +import type { AiResponseSpec } from '../interfaces.ts'; +import { Schema } from './schema.ts'; +import { getPrompt } from './prompt.ts'; +import { processResponse } from './response.ts'; + +const spec: AiResponseSpec = { + getPrompt, + Schema, + processResponse, +}; + +export default spec; +export { Schema, getPrompt, processResponse }; diff --git a/apps/backend/shared/ai/schemas/CcStructuredContent/media.ts b/apps/backend/shared/ai/schemas/CcStructuredContent/media.ts new file mode 100644 index 000000000..60acf0835 --- /dev/null +++ b/apps/backend/shared/ai/schemas/CcStructuredContent/media.ts @@ -0,0 +1,147 @@ +// Media element schemas and processing. +// IMAGE, VIDEO, and EMBED packages have no AI specs - +// they're simple media containers. Schemas here use +// assetId to map vector store asset references to +// elements. processMediaElement then resolves assetId +// → native element data (storage:// URLs, alt text, +// embed transforms). +import type { AssetReference } from '@tailor-cms/interfaces/ai.ts'; +import { AssetType } from '@tailor-cms/interfaces/asset.ts'; +import { ContentElementType } from '@tailor-cms/content-element-collection/types.js'; +import { oneLine } from 'common-tags'; +import { toEmbedUrl } from '@tailor-cms/common/asset'; + +import { createLogger } from '#logger'; + +const logger = createLogger('ai:structured-content'); + +const obj = (properties: any, required: string[]) => ({ + type: 'object' as const, + properties, + ...(required.length && { required }), + additionalProperties: false, +}); + +// JSON schemas for OpenAI structured output. +// assetId maps to vector store catalog entries; +// processMediaElement resolves to native element data. +export const MEDIA_SCHEMAS: Record = { + [ContentElementType.Image]: obj( + { + type: { enum: [ContentElementType.Image] }, + assetId: { type: 'integer' }, + alt: { type: 'string' }, + }, + ['type', 'assetId', 'alt'], + ), + [ContentElementType.Video]: obj( + { + type: { enum: [ContentElementType.Video] }, + assetId: { type: 'integer' }, + }, + ['type', 'assetId'], + ), + [ContentElementType.Embed]: obj( + { + type: { enum: [ContentElementType.Embed] }, + assetId: { type: 'integer' }, + }, + ['type', 'assetId'], + ), +}; + +export const MEDIA_DESCRIPTIONS: Record = { + [ContentElementType.Image]: oneLine` + a standalone image element for photos, diagrams. + NEVER use tags inside HTML.`, + [ContentElementType.Video]: oneLine` + a video player for uploaded video files. + Only for assets marked "→ use as VIDEO".`, + [ContentElementType.Embed]: oneLine` + an embedded resource (video, interactive content, + web page). Only for assets marked "→ use as EMBED".`, +}; + +// Uses asset.contentType set by detectLinkProvider +// at creation — no URL parsing needed here. +export const isVideoLink = (a: AssetReference) => + a.type === AssetType.Link && a.contentType === 'video'; + +// TODO: Figure out MUX video support +export const isVideoFile = (a: AssetReference) => + a.type === AssetType.Video || (a.contentType === 'video' && !!a.storageKey); + +// Resolve what element type an asset maps to. +// Returns the element type and a display label. +export const resolveAssetElementType = ( + asset: AssetReference, +): { elementType: string; label: string } | null => { + if (asset.type === AssetType.Image) { + return { + elementType: ContentElementType.Image, + label: 'image', + }; + } + if (isVideoFile(asset)) { + return { + elementType: ContentElementType.Video, + label: 'video', + }; + } + if (isVideoLink(asset)) { + return { + elementType: ContentElementType.Embed, + label: 'video link', + }; + } + return null; +}; + +// Resolve assetId to its reference from context +const resolveAsset = (assetId: number, assets: AssetReference[]) => { + const asset = assets.find((a) => a.id === assetId); + if (!asset) logger.warn({ assetId }, 'Asset not found'); + return asset || null; +}; + +// Transform media AI output into element data. +// Uses storage:// URLs so Tailor resolves signed URLs. +export const processMediaElement = (el: any, assets: AssetReference[]) => { + const asset = resolveAsset(el.assetId, assets); + if (!asset) return null; + if (el.type === ContentElementType.Image) { + const url = asset.storageKey + ? `storage://${asset.storageKey}` + : asset.publicUrl || asset.url || ''; + const isInternal = !!asset.storageKey; + return { + type: ContentElementType.Image, + data: { + url: isInternal ? '' : url, + alt: el.alt || asset.description || '', + assets: isInternal ? { url } : {}, + }, + }; + } + if (el.type === ContentElementType.Video) { + const url = asset.storageKey + ? `storage://${asset.storageKey}` + : asset.publicUrl || asset.url || ''; + const isInternal = !!asset.storageKey; + return { + type: ContentElementType.Video, + data: { + url: isInternal ? '' : url, + assets: isInternal ? { url } : {}, + }, + }; + } + if (el.type === ContentElementType.Embed) { + const url = asset.url || asset.publicUrl || ''; + return { + type: ContentElementType.Embed, + data: { url: toEmbedUrl(url) || url, height: 400 }, + }; + } + return el; +}; diff --git a/apps/backend/shared/ai/schemas/CcStructuredContent/prompt.ts b/apps/backend/shared/ai/schemas/CcStructuredContent/prompt.ts new file mode 100644 index 000000000..db8e88aad --- /dev/null +++ b/apps/backend/shared/ai/schemas/CcStructuredContent/prompt.ts @@ -0,0 +1,300 @@ +// Prompt builder for structured content generation. +// Assembles element descriptions, subcontainer types, +// guidelines, and asset catalog into the system prompt. +import type { AiContext, AssetReference } from '@tailor-cms/interfaces/ai.ts'; + +import elementRegistry from '../../../content-plugins/elementRegistry.js'; +import { getConfigs } from './config.ts'; +import { + MEDIA_DESCRIPTIONS, + isVideoFile, + isVideoLink, + resolveAssetElementType, +} from './media.ts'; +import { getAiSpec, resolveSupportedTypes } from './schema.ts'; +import type { MetaField, ParsedConfig, SubcontainerConfigs } from './types.ts'; + +const describeField = ({ key, label, options }: MetaField): string => { + const base = `"${key}" (${label})`; + const opts = options?.map((o) => o.value).join(', '); + return opts ? `${base} [options: ${opts}]` : base; +}; + +// Extract a human-readable description from an element's +// getPrompt(). Convention: prompts start with +// "Generate a as an object...". +// Falls back to the raw type ID if no prompt or no match. +const extractElementDescription = (type: string): string => { + const spec = getAiSpec(type); + if (!spec?.getPrompt) return type; + const prompt = spec.getPrompt(); + if (!prompt) return type; + // "Generate a accordion content element as an" → "a accordion...." + const match = prompt.match(/generate\s+(.+?)\s+as\s+an/i); + return match?.[1] || type; +}; + +// Build element type descriptions for the prompt. +// When hasAssets is true, IMAGE and EMBED types are +// appended so the AI knows it can reference media. +const describeElementTypes = ( + types: string[], + hasAssets = false, +): string => { + const resolved = resolveSupportedTypes(types); + const lines = resolved.map((type) => { + const desc = extractElementDescription(type); + return ` - "${type}": ${desc}`; + }); + if (hasAssets) { + for (const [type, desc] of Object.entries(MEDIA_DESCRIPTIONS)) { + if (!resolved.includes(type)) { + lines.push(` - "${type}": ${desc}`); + } + } + } + return lines.join('\n'); +}; + +const describeSubcontainerTypes = (configs: SubcontainerConfigs): string => { + const entries = Object.entries(configs); + if (!entries.length) { + return ' - Type "SECTION" (Section)'; + } + return entries + .map(([type, { label, metaInputs = [] }]) => { + const fields = metaInputs.map(describeField).join(', '); + const suffix = fields ? `: metadata fields: ${fields}` : ''; + return ` - Type "${type}" (${label})${suffix}`; + }) + .join('\n'); +}; + +// Build asset catalog for the prompt. +// Includes ALL usable assets — vector store file_search +// handles relevance, this just lists valid IDs for the AI. +const buildAssetCatalog = (assets: AssetReference[]): string => { + const usable = assets.filter((it) => it.publicUrl || it.url || it.storageKey); + if (!usable.length) return ''; + const lines = usable.map((it) => { + const media = resolveAssetElementType(it); + const hint = media ? ` → use as ${media.elementType}` : ''; + const label = media?.label || it.type; + return ` - ID:${it.id} [${label}] "${it.name}"${hint}`; + }); + return ['', 'Assets available (reference by assetId):', ...lines].join( + '\n ', + ); +}; + +const buildGuidelines = ( + context: AiContext, + ai: ParsedConfig['ai'], + hasAssets: boolean, + elementTypes: string[], +): string[] => { + const hasQuestions = elementTypes.some( + (t) => elementRegistry.isQuestion(t), + ); + const guidelines = [ + '- Fill in ALL metadata fields', + '- Each subcontainer: distinct topic or aspect', + ]; + // Perspective and depth + guidelines.push( + '- Write from an educator/teacher perspective:', + ' clear explanations, progressive complexity,', + ' practical examples, learning objectives', + '- Each subcontainer must thoroughly cover its', + ' topic — substantive, not superficial', + '- Structure content for effective learning:', + ' introduce concepts, explain, illustrate, assess', + ); + // HTML element formatting + guidelines.push( + '- HTML elements: use text-body-2 mb-5 on

,', + ' text-h3 mb-7 on headings', + '- Use

    /
      ,
      , for variety', + '- Accent important sections with CSS classes:', + ' "ce-highlight" for key takeaways,', + ' "ce-callout" for tips/warnings,', + ' "ce-example" for worked examples.', + ' Add minimal inline style as default fallback', + ' (e.g. border-left, background) — presentation', + ' layer can override these classes', + '- Each HTML element: focused content block,', + ' 300-600 words per element', + '- Mix element types coherently: text for concepts,', + ' questions to reinforce learning, media to', + ' illustrate — each element should serve a', + ' pedagogical purpose, not just variety for its own sake', + ); + // Question element guidance + if (hasQuestions) { + guidelines.push( + '- Place a question after teaching a concept —', + ' it should check understanding of what was', + ' just explained, not test random knowledge', + '- Max one question per subcontainer', + '- Pick the question type best suited to the', + ' concept being assessed (e.g. true/false for', + ' facts, multiple choice for distinctions)', + '- Write clear, unambiguous answer options', + '- Include plausible distractors in choices', + ); + } + if (hasAssets) { + const hasVideos = context.assets?.some( + (a) => isVideoLink(a) || isVideoFile(a), + ); + guidelines.push( + '- Use assets as SEPARATE elements', + '- Reference assets by their assetId number', + '- Images: IMAGE element with assetId,', + ' NEVER in HTML', + '- Uploaded videos: VIDEO element with assetId.', + ' Only for assets marked "→ use as VIDEO"', + '- Video links (YouTube, Vimeo): EMBED element.', + ' Only for assets marked "→ use as EMBED"', + '- NEVER use ,