From e480c00ef70b54c9fe00ae419a2f45c6ad3802fb Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 26 Feb 2026 10:01:06 +0000
Subject: [PATCH 1/2] Initial plan


From 406ab7331fc935f5f87b1a09b4528ff879469118 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 26 Feb 2026 10:10:38 +0000
Subject: [PATCH 2/2] feat: integrate Claude Opus vision API for image analysis

Co-authored-by: leekHotline <117092932+leekHotline@users.noreply.github.com>
---
 backend/app/core/config.py                    |   4 +
 backend/app/modules/image_analysis/router.py  |   2 +-
 backend/app/modules/image_analysis/service.py | 113 +++++++++++++++---
 backend/pyproject.toml                        |   1 +
 backend/uv                                    |   0
 5 files changed, 105 insertions(+), 15 deletions(-)
 mode change 100644 => 100755 backend/uv

diff --git a/backend/app/core/config.py b/backend/app/core/config.py
index 3b1e426..b3bb31c 100644
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -46,6 +46,10 @@ class Settings(BaseSettings):
     MAX_VIDEO_SIZE: int = 50 * 1024 * 1024  # 50MB
     ALLOWED_VIDEO_TYPES: list[str] = ["video/mp4", "video/quicktime", "video/webm"]
 
+    # Anthropic Claude
+    ANTHROPIC_API_KEY: str = ""
+    CLAUDE_MODEL: str = "claude-opus-4-5"
+
     model_config = {"env_file": ".env", "extra": "ignore"}
 
 
diff --git a/backend/app/modules/image_analysis/router.py b/backend/app/modules/image_analysis/router.py
index 4c8aad1..9583131 100644
--- a/backend/app/modules/image_analysis/router.py
+++ b/backend/app/modules/image_analysis/router.py
@@ -18,5 +18,5 @@ async def describe_image(
     db: AsyncSession = Depends(get_db),
 ) -> schemas.ImageDescribeResponse:
     """Describe an image to help visually impaired users understand its content."""
-    result = await service.describe_image(payload.image_file_id, payload.language)
+    result = await service.describe_image(db, payload.image_file_id, payload.language)
     return schemas.ImageDescribeResponse(**result)
diff --git a/backend/app/modules/image_analysis/service.py b/backend/app/modules/image_analysis/service.py
index 11baf08..dbbcbca 100644
--- a/backend/app/modules/image_analysis/service.py
+++ b/backend/app/modules/image_analysis/service.py
@@ -1,20 +1,105 @@
 """Image analysis business logic."""
 
+import base64
+import logging
 
-async def describe_image(image_file_id: str, language: str) -> dict:
-    """Describe image content for visually impaired users.
+import anthropic
+from sqlalchemy.ext.asyncio import AsyncSession
 
-    This is a placeholder implementation. In production, this would call
-    a real vision AI service (e.g., GPT-4 Vision, Tencent Cloud OCR, etc.)
-    to generate a natural-language description of the image.
+from app.core.config import settings
+from app.modules.uploads.service import get_upload_content_path, get_uploaded_file
 
-    When the image is blurry or unclear, the service should indicate that
-    and provide the best possible description.
+logger = logging.getLogger(__name__)
+
+_FALLBACK_RESPONSE = {
+    "description": "图片描述服务暂时不可用，请稍后重试。",
+    "is_clear": True,
+    "clarity_note": None,
+    "confidence": 0.0,
+}
+
+
+async def describe_image(db: AsyncSession, image_file_id: str, language: str) -> dict:
+    """Describe image content for visually impaired users using Claude vision API.
+
+    Falls back to a placeholder when no API key is configured or the file
+    cannot be retrieved from storage.
     """
-    # TODO: integrate real vision AI service
-    return {
-        "description": f"[Image description placeholder for file: {image_file_id}]",
-        "is_clear": True,
-        "clarity_note": None,
-        "confidence": 0.0,
-    }
+    if not settings.ANTHROPIC_API_KEY:
+        return {
+            "description": f"[Image description placeholder for file: {image_file_id}]",
+            "is_clear": True,
+            "clarity_note": None,
+            "confidence": 0.0,
+        }
+
+    record = await get_uploaded_file(db, image_file_id)
+    if record is None:
+        return _FALLBACK_RESPONSE
+
+    try:
+        file_path = get_upload_content_path(record)
+        image_data = file_path.read_bytes()
+    except (FileNotFoundError, OSError) as exc:
+        logger.warning("Failed to read image file %s: %s", image_file_id, exc)
+        return _FALLBACK_RESPONSE
+
+    image_b64 = base64.b64encode(image_data).decode("utf-8")
+
+    if language.startswith("zh"):
+        system_prompt = (
+            "你是一个专门帮助视觉障碍用户的AI助手。"
+            "请用清晰、详细的语言描述图片内容，包括主要对象、颜色、场景和任何重要细节。"
+            "如果图片模糊或不清晰，请说明这一点。"
+        )
+        user_text = "请描述这张图片的内容。"
+    else:
+        system_prompt = (
+            "You are an AI assistant specialized in helping visually impaired users. "
+            "Describe the image content clearly and in detail, including main objects, "
+            "colors, scene, and any important details. "
+            "If the image is blurry or unclear, please note that."
+        )
+        user_text = "Please describe this image."
+
+    try:
+        client = anthropic.AsyncAnthropic(api_key=settings.ANTHROPIC_API_KEY)
+        message = await client.messages.create(
+            model=settings.CLAUDE_MODEL,
+            max_tokens=1024,
+            system=system_prompt,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image",
+                            "source": {
+                                "type": "base64",
+                                "media_type": record.mime_type,
+                                "data": image_b64,
+                            },
+                        },
+                        {
+                            "type": "text",
+                            "text": user_text,
+                        },
+                    ],
+                }
+            ],
+        )
+
+        description = message.content[0].text if message.content else ""
+        is_unclear = any(
+            word in description.lower()
+            for word in ("blurry", "unclear", "模糊", "不清晰", "看不清")
+        )
+        return {
+            "description": description,
+            "is_clear": not is_unclear,
+            "clarity_note": "图片可能模糊或不清晰" if is_unclear else None,
+            "confidence": 0.95,
+        }
+    except Exception as exc:
+        logger.error("Claude API call failed for image %s: %s", image_file_id, exc)
+        return _FALLBACK_RESPONSE
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index 3821bba..2606cc6 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -16,6 +16,7 @@ dependencies = [
     "pydantic-settings>=2.12.0",
     "python-jose[cryptography]>=3.5.0",
     "python-multipart>=0.0.22",
+    "anthropic>=0.40.0",
     "sqlalchemy[asyncio]>=2.0.46",
     "uvicorn[standard]>=0.40.0",
 ]
diff --git a/backend/uv b/backend/uv
old mode 100644
new mode 100755