From ad69b1f64307fdf7be92088190df824400b77391 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robin=20Angel=C3=A9?= Date: Thu, 7 May 2026 06:27:26 +0200 Subject: [PATCH 1/2] feat: block sending images when selected model lacks vision support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user pastes a screenshot in the Cline chat composer or attaches images to a new task, and the selected Cline model does not support vision (supportsVision === false), the send/start action is now blocked with a clear error message. Previously only a non-blocking warning was shown, allowing the user to send images to a model that could not process them, resulting in errors and broken tasks. Changes: - cline-chat-composer.tsx: canSubmit now checks attachmentWarningMessage to disable the send button and Enter key when images are incompatible - cline-agent-chat-panel.tsx: strengthened warning message to clearly state the model does not support images - task-create-dialog.tsx: added vision support check that disables Create and Start buttons and shows an error when images are attached to a non-vision Cline model - runtime-api.ts: added server-side validation in startTaskSession and sendTaskChatMessage handlers to reject image input for models that do not support vision, serving as a backend safety net Related issues and PRs: - Fixes #307: Kanban failed to recognise the attached screenshot - Closes #413: Cline Kanban doesn't see attached files - Mitigates #395: Unable to update model capabilites (we only block when supportsVision is explicitly false, never when undefined) - Builds on PR #35: feat: support image attachments in Cline SDK chat (which added the initial warning but did not block sending) - Relates to #156: Feature: Allow agents to attach screenshots/images to tasks (the original feature request that introduced image support) Signed-off-by: Robin Angelé --- src/trpc/runtime-api.ts | 34 +++++++++++++++++++ .../detail-panels/cline-agent-chat-panel.tsx | 2 +- .../detail-panels/cline-chat-composer.tsx | 2 +- web-ui/src/components/task-create-dialog.tsx | 30 ++++++++++++++-- 4 files changed, 63 insertions(+), 5 deletions(-) diff --git a/src/trpc/runtime-api.ts b/src/trpc/runtime-api.ts index a494c86b9..45253572d 100644 --- a/src/trpc/runtime-api.ts +++ b/src/trpc/runtime-api.ts @@ -227,6 +227,23 @@ export function createRuntimeApi(deps: CreateRuntimeApiDependencies): RuntimeTrp } : {}), }); + const hasImages = Boolean(body.images && body.images.length > 0); + if (hasImages && clineLaunchConfig.modelId) { + const providerModels = await clineProviderService + .getProviderModels(clineLaunchConfig.providerId) + .catch(() => ({ models: [] })); + const selectedModel = providerModels.models.find( + (model) => model.id === clineLaunchConfig.modelId, + ); + if (selectedModel?.supportsVision === false) { + return { + ok: false, + summary: null, + error: + "The selected Cline model does not support image input. Switch to a vision-capable model or remove the images to start this task.", + }; + } + } const clineTaskSessionService = await deps.getScopedClineTaskSessionService(workspaceScope); const resolvedClineTitle = resolveTaskTitle(body.taskTitle?.trim(), body.prompt); const summary = await clineTaskSessionService.startTaskSession({ @@ -626,6 +643,23 @@ export function createRuntimeApi(deps: CreateRuntimeApiDependencies): RuntimeTrp } } else { const clineLaunchConfig = await clineProviderService.resolveLaunchConfig(); + const chatHasImages = Boolean(body.images && body.images.length > 0); + if (chatHasImages && clineLaunchConfig.modelId) { + const providerModels = await clineProviderService + .getProviderModels(clineLaunchConfig.providerId) + .catch(() => ({ models: [] })); + const selectedModel = providerModels.models.find( + (model) => model.id === clineLaunchConfig.modelId, + ); + if (selectedModel?.supportsVision === false) { + return { + ok: false, + summary: null, + error: + "The selected Cline model does not support image input. Switch to a vision-capable model or remove the images to send this message.", + }; + } + } summary = await clineTaskSessionService.startTaskSession({ taskId: body.taskId, cwd: workspaceScope.workspacePath, diff --git a/web-ui/src/components/detail-panels/cline-agent-chat-panel.tsx b/web-ui/src/components/detail-panels/cline-agent-chat-panel.tsx index 1416b6e1b..9dfa8c475 100644 --- a/web-ui/src/components/detail-panels/cline-agent-chat-panel.tsx +++ b/web-ui/src/components/detail-panels/cline-agent-chat-panel.tsx @@ -219,7 +219,7 @@ export const ClineAgentChatPanel = React.forwardRef 0 && selectedModel?.supportsVision === false - ? "The selected Cline model may not accept image input. Choose a vision-capable model to use these images." + ? "The selected model does not support image input. Switch to a vision-capable model or remove the images to send." : null; const isPinnedToBottom = useCallback((container: HTMLDivElement): boolean => { diff --git a/web-ui/src/components/detail-panels/cline-chat-composer.tsx b/web-ui/src/components/detail-panels/cline-chat-composer.tsx index aa3f09a68..19b386dec 100644 --- a/web-ui/src/components/detail-panels/cline-chat-composer.tsx +++ b/web-ui/src/components/detail-panels/cline-chat-composer.tsx @@ -113,7 +113,7 @@ export function ClineChatComposer({ const [slashSuggestions, setSlashSuggestions] = useState([]); const [isMentionSearchLoading, setIsMentionSearchLoading] = useState(false); const [isSlashSearchLoading, setIsSlashSearchLoading] = useState(false); - const canSubmit = canSend && !isModelSaving && (draft.trim().length > 0 || images.length > 0); + const canSubmit = canSend && !isModelSaving && !attachmentWarningMessage && (draft.trim().length > 0 || images.length > 0); const activeToken = useMemo(() => detectActiveClineComposerToken(draft, cursorIndex), [cursorIndex, draft]); const completionSuggestions = useMemo(() => { diff --git a/web-ui/src/components/task-create-dialog.tsx b/web-ui/src/components/task-create-dialog.tsx index 6020693b5..8d6d7f27c 100644 --- a/web-ui/src/components/task-create-dialog.tsx +++ b/web-ui/src/components/task-create-dialog.tsx @@ -3,6 +3,7 @@ import * as DropdownMenu from "@radix-ui/react-dropdown-menu"; import * as RadixSwitch from "@radix-ui/react-switch"; import { + AlertTriangle, ArrowBigUp, ArrowLeft, Check, @@ -202,6 +203,20 @@ export function TaskCreateDialog({ onCreateStartAndOpen || primaryStartAction === "start" ? primaryStartAction : DEFAULT_PRIMARY_START_ACTION; const secondaryStartAction = effectivePrimaryStartAction === "start" ? "start_and_open" : "start"; + const effectiveAgentIdForImages = agentId ?? defaultAgentId ?? null; + const effectiveModelIdForImages = clineSettings?.modelId ?? effectiveDefaultModelId ?? ""; + const selectedClineModel = useMemo( + () => + effectiveModelIdForImages + ? providerModels.find((model) => model.id === effectiveModelIdForImages) ?? null + : null, + [effectiveModelIdForImages, providerModels], + ); + const imagesBlockedByModel = + images.length > 0 && + effectiveAgentIdForImages === "cline" && + selectedClineModel?.supportsVision === false; + // Reset state when dialog closes useEffect(() => { if (!open) { @@ -598,6 +613,15 @@ export function TaskCreateDialog({ + {imagesBlockedByModel ? ( +
+ +

+ The selected Cline model does not support image input. Switch to a vision-capable model in Override + Agent Settings or remove the images to continue. +

+
+ ) : null}