Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 162 additions & 11 deletions daemon/cli.js
Original file line number Diff line number Diff line change
@@ -1,24 +1,44 @@
#!/usr/bin/env node
import { startServer } from './server.js';

const args = process.argv.slice(2);
const argv = process.argv.slice(2);

// ---- Subcommand router ----------------------------------------------------
//
// `od` is two CLIs glued together:
// - default mode: starts the daemon + opens the web UI.
// - `od media …`: a thin client that POSTs to the running daemon. This
// is what the code agent invokes from inside a chat to actually
// produce image / video / audio bytes (the unifying contract).
//
// We dispatch on the first positional argument so flags like --port keep
// working unchanged. Subcommand routing is keyword-based; flags are
// parsed inside each handler.

const SUBCOMMAND_MAP = {
media: runMedia,
};

const first = argv.find((a) => !a.startsWith('-'));
if (first && SUBCOMMAND_MAP[first]) {
const idx = argv.indexOf(first);
const rest = [...argv.slice(0, idx), ...argv.slice(idx + 1)];
await SUBCOMMAND_MAP[first](rest);
process.exit(0);
}

// Default: daemon mode.
let port = Number(process.env.OD_PORT) || 7456;
let open = true;

for (let i = 0; i < args.length; i++) {
const a = args[i];
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (a === '-p' || a === '--port') {
port = Number(args[++i]);
port = Number(argv[++i]);
} else if (a === '--no-open') {
open = false;
} else if (a === '-h' || a === '--help') {
console.log(`Usage: od [--port <n>] [--no-open]

Starts a local daemon that:
* scans PATH for installed code-agent CLIs (claude, codex, gemini, opencode, cursor-agent, ...)
* serves a tiny web chat UI at http://localhost:<port>
* proxies messages (text + images) to the selected agent via child-process spawn
`);
printRootHelp();
process.exit(0);
}
}
Expand All @@ -34,3 +54,134 @@ startServer({ port }).then(url => {
});
}
});

function printRootHelp() {
console.log(`Usage:
od [--port <n>] [--no-open]
Start the local daemon and open the web UI.

od media generate --surface <image|video|audio> --model <id> [opts]
Generate a media artifact and write it into the active project.
Designed to be invoked by a code agent — picks up OD_DAEMON_URL
and OD_PROJECT_ID from the env that the daemon injected on spawn.

What the daemon does:
* scans PATH for installed code-agent CLIs (claude, codex, gemini, opencode, cursor-agent, ...)
* serves the chat UI at http://localhost:<port>
* proxies messages (text + images) to the selected agent via child-process spawn
* exposes /api/projects/:id/media/generate — the unified image/video/audio
dispatcher that the agent calls via \`od media generate\`.`);
}

// ---------------------------------------------------------------------------
// Subcommand: od media …
// ---------------------------------------------------------------------------

async function runMedia(args) {
const sub = args.find((a) => !a.startsWith('-')) || '';
if (sub === 'help' || sub === '-h' || sub === '--help' || sub === '') {
printMediaHelp();
return;
}
if (sub !== 'generate') {
console.error(`unknown subcommand: od media ${sub}`);
printMediaHelp();
process.exit(1);
}

const idx = args.indexOf(sub);
const flags = parseFlags([...args.slice(0, idx), ...args.slice(idx + 1)]);

const daemonUrl = flags['daemon-url'] || process.env.OD_DAEMON_URL || 'http://127.0.0.1:7456';
const projectId = flags.project || process.env.OD_PROJECT_ID;
if (!projectId) {
console.error(
'project id required. Pass --project <id> or set OD_PROJECT_ID. The daemon injects this when it spawns the code agent.',
);
process.exit(2);
}

const surface = flags.surface;
if (!surface || !['image', 'video', 'audio'].includes(surface)) {
console.error('--surface must be one of: image | video | audio');
process.exit(2);
}
if (!flags.model) {
console.error('--model required (see http://<daemon>/api/media/models)');
process.exit(2);
}

const body = {
surface,
model: flags.model,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P3 Missing validation: the CLI parses --length and --duration as Number(flags.length) but doesn't validate they're positive integers. A malicious/confused agent could pass --length=-5 or --length=banana, silently getting NaN in the POST body. The dispatcher checks typeof but Number('banana') is number (albeit NaN). Suggest: const len = Number(flags.length); if (!Number.isFinite(len) || len <= 0) { console.error('--length must be a positive number'); process.exit(2); }

prompt: flags.prompt,
output: flags.output,
aspect: flags.aspect,
voice: flags.voice,
audioKind: flags['audio-kind'],
};
if (flags.length != null) body.length = Number(flags.length);
if (flags.duration != null) body.duration = Number(flags.duration);

const url = `${daemonUrl.replace(/\/$/, '')}/api/projects/${encodeURIComponent(projectId)}/media/generate`;
let resp;
try {
resp = await fetch(url, {
method: 'POST',
headers: { 'content-type': 'application/json' },
body: JSON.stringify(body),
});
} catch (err) {
console.error(`failed to reach daemon at ${daemonUrl}: ${err.message}`);
process.exit(3);
}
const text = await resp.text();
if (!resp.ok) {
console.error(`daemon ${resp.status}: ${text}`);
process.exit(4);
}
// Print the JSON response as one line so the agent can parse it.
process.stdout.write(text.trim() + '\n');
}

function parseFlags(argv) {
const out = {};
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (!a || !a.startsWith('--')) continue;
const key = a.slice(2);
const next = argv[i + 1];
if (next != null && !next.startsWith('--')) {
out[key] = next;
i++;
} else {
out[key] = true;
}
}
return out;
}

function printMediaHelp() {
console.log(`Usage: od media generate --surface <image|video|audio> --model <id> [opts]

Required:
--surface image | video | audio
--model Model id from /api/media/models (e.g. gpt-image-2, seedance-2, suno-v5).
--project Project id. Auto-resolved from OD_PROJECT_ID when invoked by the daemon.

Common options:
--prompt "<text>" Generation prompt.
--output <filename> File to write under the project. Auto-named if omitted.
--aspect 1:1|16:9|9:16|4:3|3:4
--length <seconds> Video length.
--duration <seconds> Audio duration.
--voice <voice-id> Speech / TTS voice.
--audio-kind music|speech|sfx
--daemon-url http://127.0.0.1:7456

Output: a single line of JSON: {"file": { name, size, kind, mime, ... }}.

Skills should call this and then reference the returned filename in their
artifact / message body. The daemon writes the bytes into the project's
files folder so the FileViewer can preview them immediately.`);
}
13 changes: 13 additions & 0 deletions daemon/design-systems.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ export async function listDesignSystems(root) {
category: extractCategory(raw) ?? 'Uncategorized',
summary: summarize(raw),
swatches: extractSwatches(raw),
// Optional `> Surface: image|video|audio` blockquote line. Most
// existing systems target the web surface and don't declare it;
// we default to 'web' so the right-side filter classifies them
// correctly.
surface: extractSurface(raw),
body: raw,
});
} catch {
Expand Down Expand Up @@ -67,6 +72,14 @@ function extractCategory(raw) {
return m?.[1];
}

const KNOWN_SURFACES = new Set(['web', 'image', 'video', 'audio']);
function extractSurface(raw) {
const m = /^>\s*Surface:\s*(.+?)\s*$/im.exec(raw);
if (!m) return 'web';
const v = m[1].trim().toLowerCase();
return KNOWN_SURFACES.has(v) ? v : 'web';
}

// Strip boilerplate like "Design System Inspired by Cohere" → "Cohere" so
// the picker dropdown reads cleanly. Hand-authored titles that don't match
// the pattern (e.g. "Neutral Modern") pass through unchanged.
Expand Down
53 changes: 53 additions & 0 deletions daemon/media-models.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Daemon-side wrapper around the shared media model registry. The
// authoritative data lives in src/media/models.data.json so the
// frontend (NewProjectPanel pickers, MEDIA_GENERATION_CONTRACT prompt)
// and the daemon dispatcher read the exact same arrays — no hand-mirror,
// no drift. We keep this file in plain JS so the daemon never needs a
// TS toolchain at runtime; it just JSON.parses one file at module load.

import { readFileSync } from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const DATA_PATH = path.join(__dirname, '..', 'src', 'media', 'models.data.json');

const data = JSON.parse(readFileSync(DATA_PATH, 'utf8'));

export const IMAGE_MODELS = data.image;
export const VIDEO_MODELS = data.video;
export const AUDIO_MODELS_BY_KIND = data.audio;
export const MEDIA_ASPECTS = data.aspects;
export const VIDEO_LENGTHS_SEC = data.videoLengthsSec;
export const AUDIO_DURATIONS_SEC = data.audioDurationsSec;

export function findMediaModel(id) {
const all = [
...IMAGE_MODELS,
...VIDEO_MODELS,
...AUDIO_MODELS_BY_KIND.music,
...AUDIO_MODELS_BY_KIND.speech,
...AUDIO_MODELS_BY_KIND.sfx,
];
return all.find((m) => m.id === id) || null;
}

export function modelsForSurface(surface, audioKind) {
if (surface === 'image') return IMAGE_MODELS;
if (surface === 'video') return VIDEO_MODELS;
if (surface === 'audio') {
const k = audioKind || 'music';
return AUDIO_MODELS_BY_KIND[k] || AUDIO_MODELS_BY_KIND.music;
}
return [];
}

// Surface-aware lookup. Returns the model record only when it is registered
// for the given (surface, audioKind) pair. The dispatcher uses this to
// reject mismatches like `surface=image, model=suno-v5` BEFORE writing
// bytes — without it, an audio model would silently produce an image-named
// stub and routing to a real provider later would land in the wrong place.
export function findMediaModelForSurface(id, surface, audioKind) {
const list = modelsForSurface(surface, audioKind);
return list.find((m) => m.id === id) || null;
}
Loading