From 589b77a254d79c4f464b136391f36f9c667f3455 Mon Sep 17 00:00:00 2001 From: "hanzhi.421" Date: Mon, 15 Dec 2025 14:22:47 +0800 Subject: [PATCH 1/3] feat(demohouse/multimedia): mutimedia agent backend --- demohouse/mutimedia/README.md | 153 ++++++ demohouse/mutimedia/backend/.gitignore | 191 ++++++++ demohouse/mutimedia/backend/app/__init__.py | 11 + .../backend/app/director-agent/__init__.py | 10 + .../app/director-agent/config.yaml.example | 35 ++ .../backend/app/director-agent/src/README.md | 79 +++ .../app/director-agent/src/__init__.py | 11 + .../backend/app/director-agent/src/agent.py | 22 + .../backend/app/director-agent/src/app.py | 214 ++++++++ .../src/director_agent/__init__.py | 16 + .../src/director_agent/agent.py | 32 ++ .../src/director_agent/hook/__init__.py | 10 + .../director_agent/hook/check_and_raise.py | 103 ++++ .../src/director_agent/hook/format_hook.py | 110 +++++ .../src/director_agent/hook/shorten_url.py | 73 +++ .../src/director_agent/prompt.py | 393 +++++++++++++++ .../src/director_agent/sub_agents/__init__.py | 10 + .../sub_agents/image/__init__.py | 16 + .../director_agent/sub_agents/image/agent.py | 59 +++ .../sub_agents/storyboard/__init__.py | 16 + .../sub_agents/storyboard/agent.py | 55 +++ .../sub_agents/video/__init__.py | 16 + .../director_agent/sub_agents/video/agent.py | 62 +++ .../src/director_agent/tools/__init__.py | 16 + .../tools/image_generate_builtin_fix.py | 461 ++++++++++++++++++ .../tools/image_generate_gather.py | 180 +++++++ .../tools/video_generate_http.py | 446 +++++++++++++++++ .../src/director_agent/utils/__init__.py | 10 + .../src/director_agent/utils/types.py | 96 ++++ .../app/director-agent/src/requirements.txt | 6 + .../backend/app/evaluate-agent/__init__.py | 11 + .../app/evaluate-agent/config.yaml.example | 23 + .../backend/app/evaluate-agent/src/README.md | 83 ++++ .../app/evaluate-agent/src/__init__.py | 11 + .../backend/app/evaluate-agent/src/agent.py | 22 + .../backend/app/evaluate-agent/src/app.py | 214 ++++++++ .../src/evaluate_agent/__init__.py | 16 + .../src/evaluate_agent/agent.py | 78 +++ .../src/evaluate_agent/hook/__init__.py | 11 + .../hook/direct_output_callback.py | 26 + .../src/evaluate_agent/prompt.py | 281 +++++++++++ .../src/evaluate_agent/tools/__init__.py | 11 + .../src/evaluate_agent/tools/geval.py | 349 +++++++++++++ .../src/evaluate_agent/utils/__init__.py | 11 + .../src/evaluate_agent/utils/types.py | 93 ++++ .../app/evaluate-agent/src/requirements.txt | 5 + demohouse/mutimedia/backend/app/main.py | 326 +++++++++++++ .../backend/app/market-agent/__init__.py | 11 + .../app/market-agent/config.yaml.example | 23 + .../backend/app/market-agent/src/README.md | 39 ++ .../backend/app/market-agent/src/__init__.py | 11 + .../backend/app/market-agent/src/agent.py | 22 + .../backend/app/market-agent/src/app.py | 217 +++++++++ .../market-agent/src/market_agent/__init__.py | 16 + .../market-agent/src/market_agent/agent.py | 54 ++ .../src/market_agent/hook/__init__.py | 11 + .../src/market_agent/hook/format_hook.py | 120 +++++ .../market-agent/src/market_agent/prompt.py | 106 ++++ .../src/market_agent/tools/__init__.py | 11 + .../src/market_agent/tools/filter_by_llm.py | 108 ++++ .../market_agent/tools/image_understand.py | 51 ++ .../src/market_agent/tools/is_image.py | 162 ++++++ .../src/market_agent/tools/link_reader.py | 78 +++ .../src/market_agent/tools/web_parse.py | 41 ++ .../market_agent/tools/web_parser_local.py | 211 ++++++++ .../src/market_agent/utils/__init__.py | 11 + .../src/market_agent/utils/types.py | 53 ++ .../app/market-agent/src/requirements.txt | 12 + .../backend/app/multimedia-agent/__init__.py | 11 + .../app/multimedia-agent/config.yaml.example | 26 + .../app/multimedia-agent/src/__init__.py | 11 + .../app/multimedia-agent/src/agentkit.py | 82 ++++ .../src/multimedia_agent/__init__.py | 16 + .../src/multimedia_agent/agent.py | 59 +++ .../src/multimedia_agent/prompt.py | 77 +++ .../app/multimedia-agent/src/requirements.txt | 4 + .../app/multimedia-agent/src/server.py | 21 + .../backend/app/release-agent/__init__.py | 11 + .../app/release-agent/config.yaml.example | 37 ++ .../backend/app/release-agent/src/README.md | 29 ++ .../backend/app/release-agent/src/__init__.py | 11 + .../backend/app/release-agent/src/agent.py | 22 + .../backend/app/release-agent/src/app.py | 214 ++++++++ .../src/release_agent/__init__.py | 10 + .../release-agent/src/release_agent/agent.py | 32 ++ .../src/release_agent/hook/__init__.py | 11 + .../src/release_agent/hook/format_hook.py | 110 +++++ .../release-agent/src/release_agent/prompt.py | 112 +++++ .../sub_agents/film_agent/__init__.py | 16 + .../sub_agents/film_agent/agent.py | 58 +++ .../src/release_agent/tools/__init__.py | 11 + .../src/release_agent/tools/video_combine.py | 271 ++++++++++ .../release_agent/tools/video_combine_vod.py | 270 ++++++++++ .../src/release_agent/utils/__init__.py | 11 + .../src/release_agent/utils/types.py | 50 ++ .../app/release-agent/src/requirements.txt | 6 + .../backend/app/short_link/README.md | 180 +++++++ .../backend/app/short_link/__init__.py | 11 + .../mutimedia/backend/app/short_link/app.py | 185 +++++++ .../backend/app/short_link/requirements.txt | 6 + demohouse/mutimedia/backend/pyproject.toml | 16 + 101 files changed, 7848 insertions(+) create mode 100644 demohouse/mutimedia/README.md create mode 100644 demohouse/mutimedia/backend/.gitignore create mode 100644 demohouse/mutimedia/backend/app/__init__.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/__init__.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/config.yaml.example create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/README.md create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/__init__.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/agent.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/app.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/__init__.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/agent.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/__init__.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/check_and_raise.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/format_hook.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/shorten_url.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/prompt.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/__init__.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/image/__init__.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/image/agent.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/__init__.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/agent.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/video/__init__.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/video/agent.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/__init__.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/image_generate_builtin_fix.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/image_generate_gather.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/video_generate_http.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/utils/__init__.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/director_agent/utils/types.py create mode 100644 demohouse/mutimedia/backend/app/director-agent/src/requirements.txt create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/__init__.py create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/config.yaml.example create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/README.md create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/__init__.py create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/agent.py create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/app.py create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/__init__.py create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/agent.py create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/__init__.py create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/direct_output_callback.py create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/prompt.py create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/__init__.py create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/geval.py create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/__init__.py create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/types.py create mode 100644 demohouse/mutimedia/backend/app/evaluate-agent/src/requirements.txt create mode 100644 demohouse/mutimedia/backend/app/main.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/__init__.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/config.yaml.example create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/README.md create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/__init__.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/agent.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/app.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/__init__.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/agent.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/hook/__init__.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/hook/format_hook.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/prompt.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/__init__.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/filter_by_llm.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/image_understand.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/is_image.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/link_reader.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/web_parse.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/web_parser_local.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/utils/__init__.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/market_agent/utils/types.py create mode 100644 demohouse/mutimedia/backend/app/market-agent/src/requirements.txt create mode 100644 demohouse/mutimedia/backend/app/multimedia-agent/__init__.py create mode 100644 demohouse/mutimedia/backend/app/multimedia-agent/config.yaml.example create mode 100644 demohouse/mutimedia/backend/app/multimedia-agent/src/__init__.py create mode 100644 demohouse/mutimedia/backend/app/multimedia-agent/src/agentkit.py create mode 100644 demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/__init__.py create mode 100644 demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/agent.py create mode 100644 demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/prompt.py create mode 100644 demohouse/mutimedia/backend/app/multimedia-agent/src/requirements.txt create mode 100644 demohouse/mutimedia/backend/app/multimedia-agent/src/server.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/__init__.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/config.yaml.example create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/README.md create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/__init__.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/agent.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/app.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/release_agent/__init__.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/release_agent/agent.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/release_agent/hook/__init__.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/release_agent/hook/format_hook.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/release_agent/prompt.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/__init__.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/agent.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/__init__.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/video_combine.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/video_combine_vod.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/release_agent/utils/__init__.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/release_agent/utils/types.py create mode 100644 demohouse/mutimedia/backend/app/release-agent/src/requirements.txt create mode 100644 demohouse/mutimedia/backend/app/short_link/README.md create mode 100644 demohouse/mutimedia/backend/app/short_link/__init__.py create mode 100644 demohouse/mutimedia/backend/app/short_link/app.py create mode 100644 demohouse/mutimedia/backend/app/short_link/requirements.txt create mode 100644 demohouse/mutimedia/backend/pyproject.toml diff --git a/demohouse/mutimedia/README.md b/demohouse/mutimedia/README.md new file mode 100644 index 00000000..c8d2baca --- /dev/null +++ b/demohouse/mutimedia/README.md @@ -0,0 +1,153 @@ +# 电商营销视频生成 E-commerce Marketing Video Generation + +## 应用介绍 + +> 本项目通过支持 A2A 的 Multi-Agent 实现电商营销视频生成,该系统由营销策划、视频导演、评估、合成与发布 4 个 Agent 组成,提供从视频创意构思、高质量视频生成、到视频上线发布的端到端解决方案。面向需要快速、批量化生产营销短视频的电商客户或营销团队,旨在降低视频制作门槛,提高营销内容生产效率。 + + +### 费用说明 + +| 相关服务 | 描述 | 计费说明 | +|-------------------------------------------------------------------------------------------------------------|---------------------------------------| --- | +| [Doubao-Seed-1.6](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-seed-1-6) | 负责对实时捕捉的屏幕截图进行视觉内容理解,结合当前画面进行深度思考并回答。 | [多种计费方式](https://www.volcengine.com/docs/82379/1099320) +| [Doubao-Seedance 1.0 pro](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-seedance-1-0-pro) | 负责将图片和文字描述转为视频。 | [多种计费方式](https://www.volcengine.com/docs/82379/1099320) |\ +| [Doubao-Seedream 4.5 pro](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-seedream-4-5) | 负责根据文字或参考图生成图片 | [多种计费方式](https://www.volcengine.com/docs/82379/1099320) |\ + + +## 环境准备 + +开始前,请确保您的开发环境满足以下要求: + +- Python 3.10 或更高版本 +- VeADK 0.2.28 或更高版本 +- Playwright 1.55.0 或更高版本 +- 推荐使用 `uv` 进行依赖管理 +- 获取火山方舟 API KEY +- 获取火山引擎 AK/SK + +## 快速入门 + +请按照以下步骤在本地部署和运行本项目。 + +### 1. 下载代码并安装依赖 + +```bash +# 克隆代码仓库 +# git clone ... +# cd ... + +# 安装项目依赖 +uv sync +``` + +### 2. 配置环境变量 + +本项目包含多个 Agent,每个 Agent 都需要独立的配置。请参考 `config.yaml.example` 文件为每个 Agent 创建 `config.yaml` 并填入必要的密钥信息。 + +以 `director-agent` 为例: +```bash +# 进入 director-agent 目录 +cd app/director-agent + +# 复制配置文件 +cp config.yaml.example config.yaml +``` +然后,编辑 `config.yaml` 文件,填入您的火山方舟 API Key、火山引擎 AK/SK 等信息。请为 `market-agent`、`evaluate-agent`、`release-agent`、`multimedia-agent` 重复此操作。 + +具体配置项可参考 veadk-python config.yaml 配置文档。 + +### 3. 安装 Playwright 浏览器组件 + +`market-agent` 需要 Playwright 来解析网页内容。 + +```bash +# market-agent +# 安装 Playwright 浏览器依赖 +playwright install +``` + +### 4. 启动服务 + +请按顺序启动各个 Agent 服务。 + +```bash +# 激活虚拟环境 +# Windows (Powershell) +# .\.venv\Scripts\activate +# macOS/Linux +# source .venv/bin/activate + +# 启动 market-agent +cd backend/app/market-agent/src +python -m uvicorn app:app --host 127.0.0.1 --port 8000 --loop asyncio + +# 启动 director-agent +cd backend/app/director-agent/src +python -m uvicorn app:app --host 127.0.0.1 --port 8001 --loop asyncio + +# 启动 evaluate-agent +cd backend/app/evaluate-agent/src +python -m uvicorn app:app --host 127.0.0.1 --port 8002 --loop asyncio + +# 启动 release-agent +cd backend/app/release-agent/src +python -m uvicorn app:app --host 127.0.0.1 --port 8003 --loop asyncio + +# 最后启动 multimedia-agent +cd backend/app/multimedia-agent/src +python -m uvicorn server:app --host 127.0.0.1 --port 8004 --loop asyncio + +# 启动 short_link 服务 +cd backend/app/short_link +python -m uvicorn app:app --host 127.0.0.1 --port 8005 --loop asyncio +``` + +### 5. 测试服务 + +所有服务启动后,可运行测试脚本验证。 + +```bash +python backend/app/main.py +``` + +**示例提示词:** +- `根据https://...这个网站中的商品信息,给我生成一段视频` + + +## 技术实现 + +本项目核心为一套基于 VeADK 构建的多 Agent 协作框架。各 Agent 职责明确,通过 A2A (Agent-to-Agent) 通信协同工作,完成从需求理解到视频发布的完整流程。 + +- **营销策划 Agent (`market-agent`)**: 负责解析用户输入(如商品链接),进行市场分析并形成初步的营销策略和视频创意。 +- **视频导演 Agent (`director-agent`)**: 根据营销策略,生成具体的视频脚本、文案,并调用多模态能力(文生图、图生视频)产出视频素材。 +- **评估 Agent (`evaluate-agent`)**: 对生成的视频素材进行质量评估和筛选,通过自主评测机制进行抽卡优化,确保视频质量。 +- **合成与发布 Agent (`release-agent`)**: 将筛选后的素材合成为最终视频,并提供发布能力。 + +## 目录结构 + +``` +/ +├── README.md # 本文档 +├── backend/app/ +│ ├── __init__.py +│ ├── director-agent/ # 视频导演Agent +│ │ ├── config.yaml.example # 配置文件示例 +│ │ └── src/ # Agent源码 +│ ├── evaluate-agent/ # 评估Agent +│ │ ├── config.yaml.example +│ │ └── src/ +│ ├── main.py # 测试用主程序 +│ ├── market-agent/ # 营销策划Agent +│ │ ├── config.yaml.example +│ │ └── src/ +│ ├── multimedia-agent/ # 主Agent,负责协调其他Agent +│ │ ├── config.yaml.example +│ │ └── src/ +│ ├── release-agent/ # 发布Agent +│ │ ├── config.yaml.example +│ │ └── src/ +│ └── short_link/ # 视频短链接生成工具 +│ ├── app.py +│ └── requirements.txt +└── ... (其他项目文件) +``` diff --git a/demohouse/mutimedia/backend/.gitignore b/demohouse/mutimedia/backend/.gitignore new file mode 100644 index 00000000..e5018e3e --- /dev/null +++ b/demohouse/mutimedia/backend/.gitignore @@ -0,0 +1,191 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc +/src/logs/ + +# Deepeval +.deepeval/ + +# MacOS dev +.DS_Store +**/config.yaml + +# docs +**/node_modules/ + +# 忽略所有 .temp 目录 +**/.temp/ + +**/tmp-json/ +/app/merged_videos/ diff --git a/demohouse/mutimedia/backend/app/__init__.py b/demohouse/mutimedia/backend/app/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/director-agent/__init__.py b/demohouse/mutimedia/backend/app/director-agent/__init__.py new file mode 100644 index 00000000..1bef36da --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/demohouse/mutimedia/backend/app/director-agent/config.yaml.example b/demohouse/mutimedia/backend/app/director-agent/config.yaml.example new file mode 100644 index 00000000..85fe5d28 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/config.yaml.example @@ -0,0 +1,35 @@ +model: + agent: + provider: openai + # name: deepseek-v3-1-terminus + name: doubao-seed-1-6-250615 + api_base: https://ark.cn-beijing.volces.com/api/v3/ + api_key: + video: + name: doubao-seedance-1-0-pro-250528 + api_base: https://ark.cn-beijing.volces.com/api/v3/ + api_key: + image: + name: doubao-seedream-4-5-251128 + api_base: https://ark.cn-beijing.volces.com/api/v3/ + api_key: + format: + name: doubao-seed-1-6-flash-250828 + +shorten_url_service_url: http://127.0.0.1:8005 + +logging: + # ERROR + # WARNING + # INFO + # DEBUG + level: DEBUG + +thinking: + director_agent: disabled + storyboard_agent: enabled + story_format_agent: disabled + image_agent: enabled + image_format_agent: disabled + video_agent: enabled + video_format_agent: disabled diff --git a/demohouse/mutimedia/backend/app/director-agent/src/README.md b/demohouse/mutimedia/backend/app/director-agent/src/README.md new file mode 100644 index 00000000..105f8db2 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/README.md @@ -0,0 +1,79 @@ +# 视频导演 Agent + +电商营销视频导演,生成富有创意的电商营销视频分镜脚本。 + +## 输入输出定义 + +### 输入 + +视频脚本配置(进行分镜脚本制作) + +```python +class ProductInfo(BaseModel): + name: str + selling_point: str + resources: list # 素材图片url + +class InputMessage(BaseModel): + video_type: str + product_info: ProductInfo + video_advice: str +``` + +分镜图片列表(评估后,进行分镜视频制作) + +```python +class ImageItem(BaseModel): + id: int # 每个分镜内图片的id + url: str # 图片的 tos url + +class Image(BaseModel): + shot_id: str # 分镜id + prompt: str + action: str # 分镜的口播文案,无则为空 + image: ImageItem + +class InputMessage(BaseModel): + image_list: list(Image) +``` + +### 输出 + +分镜图片列表 + +```python +class ImageItem(BaseModel): + id: int # 每个分镜内图片的id + url: str # 图片的 tos url + +class Image(BaseModel): + shot_id: str # 分镜id + prompt: str + action: str # 分镜的口播文案,无则为空 + images: list(ImageItem) + +class OutputMessage(BaseModel): + image_list: list(Image) +``` + +分镜视频列表 + +```python +class VideoItem(BaseModel): + id: int # 每个分镜内视频的id + url: str # 视频的 tos url + +class Video(BaseModel): + shot_id: str # 分镜id + prompt: str + action: str # 分镜的口播文案,无则为空 + videos: VideoItem + +class OutputMessage(BaseModel): + video_list: list(Video) +``` + +## 工具 + +1. 图片生成:VeADK 内置 `image_generate` 工具 +2视频生成:VeADK 内置 `video_generate` 工具 diff --git a/demohouse/mutimedia/backend/app/director-agent/src/__init__.py b/demohouse/mutimedia/backend/app/director-agent/src/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/director-agent/src/agent.py b/demohouse/mutimedia/backend/app/director-agent/src/agent.py new file mode 100644 index 00000000..94a1b4e8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/agent.py @@ -0,0 +1,22 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from director_agent.agent import agent # type: ignore + +from veadk.memory.short_term_memory import ShortTermMemory +from veadk.types import AgentRunConfig + +# [required] instantiate the agent run configuration +agent_run_config = AgentRunConfig( + app_name="director_agent", + agent=agent, # type: ignore + short_term_memory=ShortTermMemory(backend="local"), # type: ignore +) diff --git a/demohouse/mutimedia/backend/app/director-agent/src/app.py b/demohouse/mutimedia/backend/app/director-agent/src/app.py new file mode 100644 index 00000000..5baa0fb5 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/app.py @@ -0,0 +1,214 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from contextlib import asynccontextmanager +from typing import Callable + +from agent import agent_run_config + +from fastapi import FastAPI +from fastapi.routing import APIRoute + +from fastmcp import FastMCP + +from starlette.routing import Route + +from google.adk.a2a.utils.agent_card_builder import AgentCardBuilder +from a2a.types import AgentProvider + +from veadk.a2a.ve_a2a_server import init_app +from veadk.runner import Runner +from veadk.tracing.telemetry.exporters.apmplus_exporter import APMPlusExporter +from veadk.tracing.telemetry.exporters.cozeloop_exporter import CozeloopExporter +from veadk.tracing.telemetry.exporters.tls_exporter import TLSExporter +from veadk.tracing.telemetry.opentelemetry_tracer import OpentelemetryTracer +from veadk.types import AgentRunConfig +from veadk.utils.logger import get_logger +from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator +from opentelemetry import context + +logger = get_logger(__name__) + +assert isinstance(agent_run_config, AgentRunConfig), ( + f"Invalid agent_run_config type: {type(agent_run_config)}, expected `AgentRunConfig`" +) + +app_name = agent_run_config.app_name +agent = agent_run_config.agent +short_term_memory = agent_run_config.short_term_memory + +VEFAAS_REGION = os.getenv("APP_REGION", "cn-beijing") +VEFAAS_FUNC_ID = os.getenv("_FAAS_FUNC_ID", "") +agent_card_builder = AgentCardBuilder( + agent=agent, + provider=AgentProvider( + organization="Volcengine Agent Development Kit (VeADK)", + url=f"https://console.volcengine.com/vefaas/region:vefaas+{VEFAAS_REGION}/function/detail/{VEFAAS_FUNC_ID}", + ), +) + + +def load_tracer() -> None: + EXPORTER_REGISTRY = { + "VEADK_TRACER_APMPLUS": APMPlusExporter, + "VEADK_TRACER_COZELOOP": CozeloopExporter, + "VEADK_TRACER_TLS": TLSExporter, + } + + exporters = [] + for env_var, exporter_cls in EXPORTER_REGISTRY.items(): + if os.getenv(env_var, "").lower() == "true": + if ( + agent.tracers + and isinstance(agent.tracers[0], OpentelemetryTracer) + and any(isinstance(e, exporter_cls) for e in agent.tracers[0].exporters) + ): + logger.warning( + f"Exporter {exporter_cls.__name__} is already defined in agent.tracers[0].exporters. These two exporters will be used at the same time. As a result, your data may be uploaded twice." + ) + else: + exporters.append(exporter_cls()) + + tracer = OpentelemetryTracer(name="veadk_tracer", exporters=exporters) + agent_run_config.agent.tracers.extend([tracer]) + + +def build_mcp_run_agent_func() -> Callable: + runner = Runner( + agent=agent, + short_term_memory=short_term_memory, + app_name=app_name, + user_id="", + ) + + async def run_agent( + user_input: str, + user_id: str = "mcp_user", + session_id: str = "mcp_session", + ) -> str: + # Set user_id for runner + runner.user_id = user_id + + # Running agent and get final output + final_output = await runner.run( + messages=user_input, + session_id=session_id, + ) + return final_output + + run_agent_doc = f"""{agent.description} + Args: + user_input: User's input message (required). + user_id: User identifier. Defaults to "mcp_user". + session_id: Session identifier. Defaults to "mcp_session". + Returns: + Final agent response as a string.""" + + run_agent.__doc__ = run_agent_doc + + return run_agent + + +async def agent_card() -> dict: + agent_card = await agent_card_builder.build() + return agent_card.model_dump() + + +async def get_cozeloop_space_id() -> dict: + return { + "space_id": os.getenv( + "OBSERVABILITY_OPENTELEMETRY_COZELOOP_SERVICE_NAME", default="" + ) + } + + +load_tracer() + +# Build a run_agent function for building MCP server +run_agent_func = build_mcp_run_agent_func() + +a2a_app = init_app( + server_url="0.0.0.0", + app_name=app_name, + agent=agent, + short_term_memory=short_term_memory, +) + +a2a_app.post("/run_agent", operation_id="run_agent", tags=["mcp"])(run_agent_func) +a2a_app.get("/agent_card", operation_id="agent_card", tags=["mcp"])(agent_card) +a2a_app.get( + "/get_cozeloop_space_id", operation_id="get_cozeloop_space_id", tags=["mcp"] +)(get_cozeloop_space_id) + +# === Build mcp server === + +mcp = FastMCP.from_fastapi(app=a2a_app, name=app_name, include_tags={"mcp"}) + +# Create MCP ASGI app +mcp_app = mcp.http_app(path="/", transport="streamable-http") + + +# Combined lifespan management +@asynccontextmanager +async def combined_lifespan(app: FastAPI): + async with mcp_app.lifespan(app): + yield + + +# Create main FastAPI app with combined lifespan +app = FastAPI( + title=a2a_app.title, + version=a2a_app.version, + lifespan=combined_lifespan, + openapi_url=None, + docs_url=None, + redoc_url=None, +) + + +@app.middleware("http") +async def otel_context_middleware(request, call_next): + carrier = { + "traceparent": request.headers.get("Traceparent"), + "tracestate": request.headers.get("Tracestate"), + } + logger.debug(f"traceparent exists: {carrier['traceparent'] is not None}") + if carrier["traceparent"] is None: + return await call_next(request) + else: + ctx = TraceContextTextMapPropagator().extract(carrier=carrier) + token = context.attach(ctx) + try: + response = await call_next(request) + finally: + context.detach(token) + return response + + +# Mount A2A routes to main app +for route in a2a_app.routes: + app.routes.append(route) + +# Mount MCP server at /mcp endpoint +app.mount("/mcp", mcp_app) + + +# remove openapi routes +paths = ["/openapi.json", "/docs", "/redoc"] +new_routes = [] +for route in app.router.routes: + if isinstance(route, (APIRoute, Route)) and route.path in paths: + continue + new_routes.append(route) +app.router.routes = new_routes + +# === Build mcp server end === diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/__init__.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/__init__.py new file mode 100644 index 00000000..9cebfb90 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import agent + +__all__ = [ + "agent", +] diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/agent.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/agent.py new file mode 100644 index 00000000..4fdeb008 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/agent.py @@ -0,0 +1,32 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from veadk import Agent +from .sub_agents.image.agent import image_agent +from .sub_agents.storyboard.agent import story_agent +from .sub_agents.video.agent import video_agent +from veadk.config import getenv +from director_agent.prompt import PROMPT_ROOT_AGENT + +agent = Agent( + name="director_agent", + description="根据视频配置脚本,生成分镜视频", + # instruction=getenv("PROMPT_ROOT_AGENT"), + instruction=PROMPT_ROOT_AGENT, + sub_agents=[story_agent, image_agent, video_agent], + model_extra_config={ + "extra_body": { + "thinking": {"type": getenv("THINKING_DIRECTOR_AGENT", "enabled")} + } + }, +) + +root_agent = agent diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/__init__.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/__init__.py new file mode 100644 index 00000000..1bef36da --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/check_and_raise.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/check_and_raise.py new file mode 100644 index 00000000..1d450e2d --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/check_and_raise.py @@ -0,0 +1,103 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Optional + +from google.adk.tools import BaseTool, ToolContext + +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + + +def error_status(tool_name: str, reason: str) -> dict: + """Create a standardized error dictionary for tool responses.""" + return {"status": {"success": False, "message": f"{tool_name} Error: {reason}"}} + + +def raise_result_error( + tool: BaseTool, args: dict[str, Any], tool_context: ToolContext, tool_response: Any +) -> Optional[Any]: + """ + Post-execution hook to validate the results of image and video generation tools. + + This hook checks if the number of generated media items matches the number + requested in the tool's input arguments. + + - For `image_generate`, it calculates the expected number of images based on + the `tasks` list, considering both single and group generation requests. + - For `video_generate`, it checks the number of videos requested in the `params` list. + + If a mismatch is found, it returns a formatted error dictionary to halt + the workflow and notify the user. + """ + if tool.name == "image_generate": + try: + tasks = args.get("tasks", []) + if not tasks: + return None # No tasks to check + + # Calculate the total number of images expected from all tasks + total_expected_images = 0 + for task in tasks: + task_type = task.get("task_type", "") + is_group_task = "group" in task_type + if is_group_task: + total_expected_images += task.get("max_images", 1) + else: + total_expected_images += 1 + + logger.debug(f"Expected {total_expected_images} images to be generated.") + + if isinstance(tool_response, dict): + success_list = tool_response.get("success_list", []) + actual_images = len(success_list) + + if actual_images != total_expected_images: + reason = f"生成的图片总数 ({actual_images}) 与预期 ({total_expected_images}) 不符。" + logger.warning(reason) + return error_status(tool.name, reason) + else: + logger.warning( + f"Tool response for {tool.name} is not a dict: {tool_response}" + ) + + except Exception as e: + logger.error(f"在为 {tool.name} 校验结果时出错: {e}") + return None + + elif tool.name == "video_generate": + try: + params = args.get("params", []) + if not params: + return None # No params to check + + total_expected_videos = len(params) + logger.debug(f"Expected {total_expected_videos} videos to be generated.") + + if isinstance(tool_response, dict): + success_list = tool_response.get("success_list", []) + actual_videos = len(success_list) + + if actual_videos != total_expected_videos: + reason = f"生成的视频总数 ({actual_videos}) 与预期 ({total_expected_videos}) 不符。" + logger.warning(reason) + return error_status(tool.name, reason) + else: + logger.warning( + f"Tool response for {tool.name} is not a dict: {tool_response}" + ) + + except Exception as e: + logger.error(f"在为 {tool.name} 校验结果时出错: {e}") + return None + + return None diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/format_hook.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/format_hook.py new file mode 100644 index 00000000..2ddf3d7d --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/format_hook.py @@ -0,0 +1,110 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import json_repair +from typing import Optional +from google.adk.agents.callback_context import CallbackContext +from google.adk.events import Event +from google.adk.models import LlmResponse +from pydantic import ValidationError +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + + +def fix_output_format( + *, + callback_context: CallbackContext, + llm_response: LlmResponse, + model_response_event: Optional[Event] = None, +) -> Optional[LlmResponse]: + """ + 检查输出格式是否符合要求,并尝试修复 + 多种情况 + 场景1. (正常->正常)无schema,直接返回原始 llm_responses。 + 场景2. (正常->正常)有schema,输出无需修复且符合schema,返回 llm_responses。 + 场景3 (异常->异常)有schema,输出无需修复但不符合schema,返回 原始 llm_responses。输出日志 + 场景4. (异常->异常)有schema,输出需要修复,修复失败,返回原始 llm_responses。输出日志 + 场景5. (**异常->正常**)有schema,输出需要修复,修复成功后符合schema,返回 修正后的 llm_responses。 + 场景6. (异常->异常)有schema,输出需要修复,修复成功后不符合schema,返回 原始 llm_responses。输出日志 + + """ + agent = callback_context._invocation_context.agent + user_id = callback_context._invocation_context.user_id + session_id = callback_context._invocation_context.session.id + invocation_id = callback_context.invocation_id + output_schema = agent.output_schema + + message = f"[fix_output_format]: agent_name:{agent.name} user_id:{user_id} session_id:{session_id} invocation_id:{invocation_id}" + fixed = False + + # 1. 如果没有直接return即可 + if not output_schema: + logger.debug(f"{message}\nNo output_schema, return original llm_response") + return llm_response # 场景1(成功) + + text = llm_response.content.parts[0].text + logger.debug(f"{message}\nOriginal llm_response length: {len(text)}") + + # 2. 检查输出格式是否符合output_schema要求 + try: + output = json.loads(text) + except json.JSONDecodeError: + # 尝试修复 + try: + output = json_repair.loads(text) + if isinstance(output, list): + output = output[0] + fixed = True + except Exception: + logger.warning( + f"{message}\nOutput format is not valid JSON, trying to `json_repair` but failed. Original output length: {len(text)}" + ) + llm_response = llm_response_validate_error( + llm_response, "DirectorAgent输出不符合规范,且无法修复,请重试" + ) + return llm_response # 场景4(失败) + + # 3. 检查输出格式是否符合output_schema要求 + try: + output_schema.model_validate(output) + if fixed: + llm_response.content.parts[0].text = json.dumps(output, ensure_ascii=False) + fixed_text = json.dumps(output, ensure_ascii=False) + logger.warning( + f"{message}\nOutput format was not valid JSON, but `json_repair` success. Fixed output length: {len(fixed_text)}" + ) + else: + logger.debug( + f"{message}\nOutput format is valid JSON and valid for output_schema. Original output length: {len(text)}" + ) + return llm_response # 场景2&场景5(成功) + except ValidationError: + if fixed: + logger.warning( + f"{message}\nOutput format was not valid JSON, `json_repair` success but the result is not valid for output_schema. Original output length: {len(text)}" + ) + else: + logger.warning( + f"{message}\nOutput format is valid JSON but not valid for output_schema. Original output length: {len(text)}" + ) + llm_response = llm_response_validate_error( + llm_response, "DirectorAgent输出不符合规范,存在异常,请重试" + ) + return llm_response # 场景6 & 场景3(失败) + + +def llm_response_validate_error(llm_response: LlmResponse, reason: str) -> LlmResponse: + llm_response.content.parts[0].text = json.dumps( + {"status": {"success": False, "message": reason}} + ) + return llm_response diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/shorten_url.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/shorten_url.py new file mode 100644 index 00000000..39fdc318 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/shorten_url.py @@ -0,0 +1,73 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Optional, Any + +import requests +from google.adk.tools import BaseTool, ToolContext +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + +shorten_url_service_url = os.getenv("SHORTEN_URL_SERVICE_URL", None) + + +def shorten_url_impl(url: str, resource_type: Optional[str] = "resource") -> str: + """ + Shorten the URL using the TinyURL API. + """ + data = {"url": url, "type": resource_type} + # 发送 POST 请求 + + shorten_url = shorten_url_service_url + r"/shorten" + response = requests.post(shorten_url, json=data) + if response.status_code == 200: + response_data = response.json() + short_url = response_data.get("short_url") + return short_url + else: + logger.error(f"Failed to shorten URL: {response.status_code}") + return url + + +def hook_shorten_url( + tool: BaseTool, args: dict[str, Any], tool_context: ToolContext, tool_response: Any +) -> Optional[Any]: + """ + Shorten the URL from the LLM response. + """ + if shorten_url_service_url is None: + logger.warning("SHORTEN_URL_SERVICE_URL is not set, skipping shorten_url hook") + return None + + tool_name = tool.name + if tool_name == "image_generate": + success_list = tool_response["success_list"] + for data in success_list: + if isinstance(data, dict): + for key, value in data.items(): + if isinstance(value, str): + value = shorten_url_impl(url=value, resource_type="image") + data[key] = value + logger.debug(f"Shorten URL of `image_generate` successfully: {success_list}") + return tool_response + elif tool_name == "video_generate": + success_list = tool_response["success_list"] + for data in success_list: + if isinstance(data, dict): + for key, value in data.items(): + if isinstance(value, str): + value = shorten_url_impl(url=value, resource_type="video") + data[key] = value + logger.debug(f"Shorten URL of `video_generate` successfully: {success_list}") + return tool_response + return None diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/prompt.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/prompt.py new file mode 100644 index 00000000..5d562713 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/prompt.py @@ -0,0 +1,393 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PROMPT_ROOT_AGENT = """ +#角色: +你是一位食品饮料行业的电商营销视频导演,生成富有创意的电商营销视频。 + +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 + + +#子agent: +1. story_sequential_agent:根据视频配置脚本,生成分镜脚本。 +2. image_sequential_agent:根据分镜脚本,生成分镜图片。 +3. video_agent:根据分镜图片列表,生成分镜视频。 + +#任务描述: +你可能会收到用户的三种不同任务:生成分镜脚本任务、生成分镜图片任务、生成分镜视频任务,需要分别调用对应的子agent (即story_sequential_agent、image_sequential_agent、video_agent) 进行任务执行。 +注意:你只需识别用户请求的是哪种任务,然后调用对应的子agent执行即可,不需要自己执行逻辑。必须注意!!调用一个子agent是必须项!!!!你不能自己直接返回信息给用户,因为你做不了子agent的工作。 +1. 如果用户要生成分镜脚本,则根据用户提供的视频配置脚本video_config,调用story_sequential_agent进行分镜脚本生成。story_sequential_agent会返回 分镜脚本,请直接将分镜脚本返回给用户。\n +2. 如果用户要生成分镜图片,则根据分镜脚本shot_list,调用image_sequential_agent进行分镜图片生成。image_sequential_agent会返回 分镜图片列表,请直接将分镜图片列表返回给用户。\n +3. 如果用户要生成分镜视频,则根据分镜图片列表image_list,调用video_agent进行生成分镜视频生成。video_agent会返回 分镜视频列表,请直接将分镜视频列表返回给用户。 + +#注意事项: +1. 分镜脚本、分镜图片列表和分镜视频列表是三个不同的任务,禁止连续推进。 +2. 输入输出中,任何涉及图片或视频的链接url,不要做任何修改。 +3. 务必直接返回子agent最后的输出,不要在输出中包含任何解释或说明。 +""" + +PROMPT_IMAGE_AGENT = """ +# 角色: +你是一个食品饮料行业的电商营销分镜图片生成器,生成电商营销分镜图片 + +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 +3. 图片风格方面,只要推荐的东西跟动画无关,你就禁止禁止在图片生成工具中提到任何跟动画风格有关的任何内容。 + +# 任务描述: +1. 你会收到分镜脚本,里面包含了每个分镜的图片描述prompt字段。 +2. 根据分镜脚本中的图片描述prompt字段,生成更详细的图片描述,包括物体、颜色、背景等。 +3. reference字段,作为图片生成的参考图。 +4. 调用图片生成工具,生成图片,每个分镜需要生成若干个图片,以供用户进行。每个分镜生成图片的数量,如果prompt里面没有指定,默认生成一个图片。 + 同时需要注意,不同分镜作为单独的task,组成task列表,调用一次图片生成工具,不要一个分镜调用一次绘图工具。 + 注意:生成多图时,数量在max_images中指定。 + 注意:image_generate工具的prompt字段中,严格禁止出现`生成x张图片这样的字段` + 注意:注意:当遇到Agent执行异常,如缺少内容,运行出错,结果不完整,用户输入内容不足以完成任务时,请在status字段中反馈,而不是在业务字段中反馈描述,如有上述问题,业务字段可以为空。只反馈错误即可 +5. 返回分镜图片列表 +(1)shot_id: str, 使用shot_X即可,标识分镜的id +(2)prompt: str, 如何生成分镜图片的详细描述,(禁止在这里描述任何`带有文字内容的促销视觉元素`) +(3)action: str, 如何生成分镜视频的详细描述,(禁止在这里描述任何`带有文字内容的促销视觉元素`) +(4)reference: str, 作为图片生成的参考图 +(5)words: str, 分镜的口播文案,商品展示视频为空 +(6)images: list, 每个分镜里的图片列表,图片生成工具返回 + 每个图片需要有id和url + id: int, 图片id + url: str, 图片url + +## 重新生成场景 +注意,有时候用户会提供给你分镜脚本并在末尾附上让你**重新生成**某些内容的要求:比如`重新生成第1分镜首帧图,prompt修改为xxxxxx`。 +这种场景下,针对对应的分镜,你要参考**末尾要求**的内容来生成,而不是根据分镜脚本中原本的描述来生成 +除非用户明确要求,禁止出现动漫风格 + +# 格式 +## 分镜图片清单 +```json +{ + "image_list": [ + { + "shot_id": "shot_1", 使用shot_X即可 + "prompt": "西梅饮料瓶身;导出紫色的果汁,周围是一些西梅,紫色背景;", + "action": "缓慢的旋转推镜头,有辉光效果,紫色的水流环绕瓶身", + "reference": "作为图片生成的参考图", + "words": "(商品展示视频无该项)", + "images": [ + { + "id": 1, + "url": "image url" + } + ] + } + ], + "status": { + "success": bool, 是否成功 + "message": str, 错误信息,成功时为空字符串 + } +} +``` +""" + +PROMPT_IMAGE_FORMAT_AGENT = """ +#角色: +你是一个将输入按规定格式输出的格式转换器 + +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 +3. 注意:当遇到Agent执行异常,如缺少内容,运行出错,结果不完整,用户输入内容不足以完成任务时,请在status字段中反馈,而不是在业务字段中反馈描述,如有上述问题,业务字段可以为空。只反馈错误即可 +#任务描述: +1. 将 分镜图片清单,将其按 "规定格式" 输出。 + +#分镜图片清单 +shot_id:分镜1 +prompt: str, 如何生成分镜图片的详细描述 +action: str, 分镜视频的动作描述 +reference: str, 作为图片生成的参考图 +words: str, 分镜的口播文案,商品展示视频为空 +images: list, 每个分镜里的图片列表,绘图工具返回 + id: int, 图片id + url: str, 图片url + +#规定格式 +```json +{ + "image_list": [ + { + "shot_id": "shot_1", 使用shot_X即可 + "prompt": "西梅饮料瓶身;导出紫色的果汁,周围是一些西梅,紫色背景;", + "action": "缓慢的旋转推镜头,有辉光效果,紫色的水流环绕瓶身", + "reference": "作为图片生成的参考图", + "words": "(商品展示视频无该项)", + "images": [ + { + "id": 1, + "url": "image url" + } + ] + } + ], + "status": { + "success": bool, 是否成功 + "message": str, 错误信息,成功时为空字符串 + } +} +``` +""" + +PROMPT_STORYBOARD_AGENT = """ +#角色: +你是一位食品饮料行业的电商营销分镜师,生成富有创意的电商营销视频分镜脚本,语言为中文 + +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 + +#任务描述: +1. 根据 视频脚本配置 中的素材,充分理解产品核心卖点、使用场景等关键信息 +2. 根据AIDA营销模型,结构化设计4个分镜 +分镜1 - 注意(Attention) +画面:吸睛开头;通过运镜特效展示高颜值商品场景图,形成强视觉冲击 +首帧图画面:采用图生图模型,严格参考用户上传的图片素材,并替换为创意背景 +分镜2 - 兴趣(Interest) +画面:场景化演示;构思高频强相关场景或人群(例如健身房里流汗后、减脂期间嘴馋时),提供解决其需求或激发兴趣的产品 +首帧图画面:采用文生图模型,生成使用场景画面 +分镜3 - 欲望(Desire) +画面:细节特写;特写展示产品 原料、成分、口味等卖点(例如 天然果肉的饱满、冰爽气泡的翻腾等),刺激消费者的购买欲 +首帧图画面:文生图模型(构思创意特写画面) +分镜4 - 行动(Action) +画面:以产品包装运镜特效作为结尾,引导用户下单行动 +首帧图画面:采用图生图模型,严格参考用户上传的图片素材,并替换为创意背景 + +3. 输出分镜脚本,每个分镜是5-10s的视频,你需要设计画面内容与运镜,最后得到一个充满创意的电商视频,重点是突出商品的卖点 +(1)镜号:分镜1-4 +(2)image:画面设计,描述主体、背景环境、氛围、光线等画面设计,描述主体、背景环境、氛围、光线等;镜头要有景别变化:全景、中景、近景、特写都要有,增加画面节奏感。 + - 分镜1:主体为用户上传的图片素材,替换背景为合适创意场景 + - 分镜2:根据商品信息,构思相关场景或人群的展示画面。 + - 分镜3:进行原料/产地细节特写,生成创意且带有视觉冲击的画面,例如果汁原料的碰撞等 + - 分镜4:主体为用户上传的图片素材,替换背景为合适创意场景 +(3)action:为每个分镜image设计运镜与动作描述 +(4)口播文案words(如有):仅种草解说视频,否则则无;和4个分镜画面一一对应,每个分段的口播词不多于15个字,并注意上下连贯 +分镜1台词:使用情绪营销,例如 姐妹们看过来;这款XX简直是为减脂期嘴馋党量身定制的救星! +分镜2台词:讲解适用场景人群等,引发用户代入场景,并激发兴趣 +分镜3台词:讲解产品核心功效与卖点,特写展示产品 原料、成分、口味等,刺激消费的购买欲 +分镜4台词:引导行动,下单提醒。制造紧迫感,例如限定抢购、限制折扣 +(5)视频标题及tag(如有):仅种草解说视频,否则则无;例如:过完年有数字管理需求的姐妹们,wonderlab专属破价机制就等你来! #减脂救星 #公主请喝 +(6)reference:分镜1和分镜4需要基于视频脚本配置里的resources中的图片素材;分镜2和分镜3则根据实际情况(只要涉及到了该产品本身的分镜,就需要加上reference!!,如果是竞品分镜之类的就不需要) +注意:如无特殊情况,必须带有reference!(所谓特殊情况,就是说该分镜明确说明是其他产品的分镜,或者明确说明该分镜没有该产品) + +注意:需要在image后增加希望生成图片的数量,数量的信息会在视频脚本配置里的extra_params中给出,如果没有,则可以默认生成1个。 + +4.格式 +4.1 分镜脚本的格式 +现在根据用户需求生成一组分镜(镜头)。每个分镜需要包含以下字段: +- id:分镜的唯一标识,比如 "shot_1"、"shot_2" +- image:画面描述,用于生成静态图像,要求具体、可视化 +- action:视频运动/内容描述,比如镜头运动、人物动作、节奏等 +- reference:可选,参考图片或视频的链接,如果没有就用空字符串 "" +- words:该镜头需要配的文案或对白,如果没有就用空字符串 "" + +4.2 视频脚本配置的格式 +video_type: str,视频类型 +product_info: dict + name: str, 商品名称 + selling_point: str, 商品卖点 + resources: list[str], 商品相关素材图片(链接) +video_advice: str +extra_params: dict,后续图片或者视频的参数设置 + ratio: str, 视频比例 + resolution: str, 视频分辨率 + numbers: int, 每个分镜生成图片或视频的数量 + +5.参考示例: + +视频标题:过完年有数字管理需求的姐妹们,wonderlab专属破价机制就等你来! #减脂救星 #公主请喝 + +分镜1: +image:西梅饮料瓶身;导出紫色的果汁,周围是一些西梅,紫色背景; +reference: image url +action:缓慢的旋转推镜头,有辉光效果,紫色的水流环绕瓶身 +words:(商品展示视频无该项) + +分镜2: +image:一个在办公室身材纤细的女性;紫色背景 +reference: image url,按照需要提供,如果image字段中不包括本产品的产品信息就不加 +action:女孩转过身微笑,镜头推进 +words:(商品展示视频无该项) + +分镜3: +image:饱满的紫色西梅在水中有许多泡泡包裹; +reference: image url,按照需要提供,如果image字段中不包括本产品的产品信息就不加 +action:掉入水中;汁水飞溅;围绕主体运镜 +words:(商品展示视频无该项) + +分镜4: +image:瓶身在水面中;周围是一些西梅; +reference: image url +action:推镜头,水花炸裂,西梅向两边飞溅 +words:(商品展示视频无该项) +""" + +PROMPT_STORY_FORMAT_AGENT = """ +#角色: +你是一个将输入按规定格式输出的格式转换器 + +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 + +#任务描述: +1. 将 分镜脚本示例,将其按 "规定格式" 输出。 + +#分镜脚本示例: +shot_id:分镜1 +prompt: str, 如何生成分镜图片的详细描述(注意,除产品本体外,禁止在这里描述任何`带有文字内容的促销视觉元素`) +action: str, 分镜视频的动作描述(注意,除产品本体外,禁止在这里描述任何`带有文字内容的促销视觉元素`) +reference: str, 作为图片生成的参考图 +words: str, 分镜的口播文案,商品展示视频为空 +images: list, 每个分镜里的图片列表,绘图工具返回 + id: int, 图片id + url: str, 图片url + +#规定格式 +```json +{ + "shot_list": [ + { + "id": "shot_1", + "image": "str, 如何生成分镜图片的详细描述", + "action": "str, 分镜视频的动作描述", + "reference": "str, 作为图片生成的参考图", + "words": "str, 分镜的口播文案,商品展示视频为空" + } + ] +} +``` +""" + +PROMPT_VIDEO_AGENT = """ +#角色: +你是一个食品饮料行业的电商营销分镜视频生成器,生成电商营销分镜视频 + +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 + +# 任务描述: +1. 你会收到分镜图片列表,里面包含了每个分镜的图片url和视频描述action字段。 +2. 根据分镜图片列表中的视频描述action字段,生成更详细的视频描述,包括物体、颜色、背景、运镜等。 +按照结构撰写提示词: +动作指令: 主体/其他物体 +动作,按照主体动作发生的先后顺序,条理清晰地描述多个动作,动作流程需要严格符合 +基础运镜: 对推、拉、摇、移、环绕、跟随、升、降、变焦等各类运锁指令做出准确响应,保证运镜效果符合预期。通过有创意的基础运镜且合理 +景别和视角: 运用远景、全景、中景、近景、特写等专业景别描述来精确控制画面展示范围。同时,可选取水下镜头、航拍镜头、高高机位俯拍、低机位仰拍、微距摄影等丰富的镜头视角 +## 重新生成场景 +注意,有时候用户会提供给你分镜脚本并在末尾附上让你**重新生成**某些内容的要求:比如`重新生成第1分镜视频,prompt修改为xxxxxx`。 +这种场景下,针对对应的分镜,你要参考**末尾要求**的内容来生成,而不是根据原本的描述json中来生成 + +# 参考示例: +(1)大远景, [ 主体 ]静静地放置在用藤蔓编织的秋千上,秋千悬挂于热带雨林中,微风吹过,秋千缓缓自然摆动,绳索随风微微摇晃。阳光和细雨从树叶间洒落,在[ 主体 ]和秋千上形成斑驳的光影,画面安静、写实,氛围温暖、富有节奏感,藤蔓细节清晰,背景虚化的绿色植物随着镜头轻轻晃动。 +(2)一个热带海洋的广角镜头,碧绿透明的海水波光粼粼。[ 主体 ]轻轻漂浮在水面上,背景是白色沙滩和摇曳的椰子树。镜头缓慢推进靠近[ 主体 ],海豚在四周欢快跃出水面,阳光照耀下水面闪闪发光,轻风带来细腻的水波。 +(3)轻柔微风吹动叶片轻柔摆动。镜头从产品标签特写开始,缓慢拉远展现完整场景。斑驳阳光透过百叶窗过滤,形成动态光影图案。浅景深配合散景效果。 + +3. 使用分镜图片中的image url,作为视频生成的首帧图。 +4. 调用视频生成工具,生成视频,每个分镜需要生成若干个视频,以供用户进行。每个分镜生成视频的数量,如果action里面没有指定,默认生成一个视频。 +同时需要注意,每个视频作为单独的task,组成task列表,调用一次视频生成工具,不要一个视频调用一次视频生成工具。 +5. 返回分镜视频列表 +(1)shot_id: str, 使用shot_X即可,标识分镜的id +(2)prompt: str, 如何生成分镜图片的详细描述(禁止出现任何声音描述,只能有画面描述) +(3)action: str, 如何生成分镜视频的详细描述 +(4)reference: str, 分镜图片的参考url +(5)words: str, 分镜的口播文案,商品展示视频为空 +(6)videos: list, 每个分镜里的视频列表,视频生成工具返回 + 每个视频需要有id和url + id: int, 视频id + url: str, 视频url +# 注意 +水印:生成的视频必须要开启水印:`--wm true` +注意:当遇到Agent执行异常,如缺少内容,运行出错,结果不完整,用户输入内容不足以完成任务时,请在status字段中反馈,而不是在业务字段中反馈描述,如有上述问题,业务字段可以为空。只反馈错误即可 + +# 格式 +## 分镜视频列表 +```json +{ + "video_list": [ + { + "shot_id": "分镜1", + "prompt": "西梅饮料瓶身;导出紫色的果汁,周围是一些西梅,紫色背景;", + "action": "缓慢的旋转推镜头,有辉光效果,紫色的水流环绕瓶身", + "reference": "https://www.baidu.com", + "words": "(商品展示视频无该项)", + "videos": [ + { + "id": 1, + "url": "video url" + } + ] + } + ], + "status": { + "success": bool, 是否成功 + "message": str, 错误信息,成功时为空字符串 + } +} +``` +""" + +PROMPT_VIDEO_FORMAT_AGENT = """ +#角色: +你是一个将输入按规定格式输出的格式转换器 + +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 + +#任务描述: +1. 将分镜视频列表,将其按规定格式输出。 + +#分镜视频清单 +shot_id:分镜1 +prompt: str, 如何生成分镜视频的详细描述 +action: str, 分镜视频的动作描述 +reference: str, 分镜图片的参考url +words: str, 分镜的口播文案,商品展示视频为空 +videos: list, 每个分镜里的视频列表,视频生成工具返回 + id: int, 视频id + url: str, 视频url + +#规定格式 +```json +{ + "video_list": [ + { + "shot_id": "分镜1", + "prompt": "西梅饮料瓶身;导出紫色的果汁,周围是一些西梅,紫色背景;", + "action": "缓慢的旋转推镜头,有辉光效果,紫色的水流环绕瓶身", + "reference": "https://www.baidu.com", + "words": "(商品展示视频无该项)", + "videos": [ + { + "id": 1, + "url": "video url" + } + ] + } + ], + "status": { + "success": bool, 是否成功 + "message": str, 错误信息,成功时为空字符串 + } +} +# 注意 +注意:当遇到Agent执行异常,如缺少内容,运行出错,结果不完整,用户输入内容不足以完成任务时,请在status字段中反馈,而不是在业务字段中反馈描述,如有上述问题,业务字段可以为空。只反馈错误即可 + +``` +""" diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/__init__.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/__init__.py new file mode 100644 index 00000000..1bef36da --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/image/__init__.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/image/__init__.py new file mode 100644 index 00000000..ce4e293f --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/image/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .agent import image_agent + +__all__ = [ + "image_agent", +] diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/image/agent.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/image/agent.py new file mode 100644 index 00000000..34e30b9d --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/image/agent.py @@ -0,0 +1,59 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from veadk import Agent +from veadk.agents.sequential_agent import SequentialAgent + +from director_agent.hook.check_and_raise import raise_result_error +from director_agent.hook.shorten_url import hook_shorten_url +from director_agent.tools.image_generate_gather import image_generate +from director_agent.hook.format_hook import fix_output_format +from director_agent.utils.types import ( + json_response_config, + ImageList, + max_output_tokens_config, +) +from veadk.config import getenv +from director_agent.prompt import PROMPT_IMAGE_AGENT, PROMPT_IMAGE_FORMAT_AGENT + +image_generate_agent = Agent( + name="image_generate_agent", + description="根据分镜脚本,为分镜生成图片", + instruction=PROMPT_IMAGE_AGENT, + tools=[image_generate], + after_tool_callback=[raise_result_error, hook_shorten_url], + generate_content_config=max_output_tokens_config, + model_extra_config={ + "extra_body": {"thinking": {"type": getenv("THINKING_IMAGE_AGENT", "enabled")}} + }, +) + +image_format_agent = Agent( + name="image_format_agent", + model_name=getenv("MODEL_FORMAT_NAME"), + description="将模型的输出格式化", + instruction=PROMPT_IMAGE_FORMAT_AGENT, + generate_content_config=json_response_config, + after_model_callback=[fix_output_format], + output_schema=ImageList, + output_key="image_list", + model_extra_config={ + "extra_body": { + "thinking": {"type": getenv("THINKING_IMAGE_FORMAT_AGENT", "disabled")} + } + }, +) + +image_agent = SequentialAgent( + name="image_sequential_agent", + description="根据分镜脚本,为分镜生成图片", + sub_agents=[image_generate_agent, image_format_agent], +) diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/__init__.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/__init__.py new file mode 100644 index 00000000..292ddc7d --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .agent import storyboard_agent + +__all__ = [ + "storyboard_agent", +] diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/agent.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/agent.py new file mode 100644 index 00000000..3b6a1fff --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/agent.py @@ -0,0 +1,55 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from veadk import Agent +from veadk.agents.sequential_agent import SequentialAgent +from director_agent.hook.format_hook import fix_output_format +from director_agent.utils.types import ( + ShotList, + json_response_config, + max_output_tokens_config, +) +from veadk.config import getenv +from director_agent.prompt import PROMPT_STORYBOARD_AGENT, PROMPT_STORY_FORMAT_AGENT + +storyboard_agent = Agent( + name="storyboard_agent", + description="根据视频配置脚本,生成分镜脚本", + instruction=PROMPT_STORYBOARD_AGENT, + generate_content_config=max_output_tokens_config, + model_extra_config={ + "extra_body": { + "thinking": {"type": getenv("THINKING_STORYBOARD_AGENT", "enabled")} + } + }, +) + +story_format_agent = Agent( + name="story_format_agent", + model_name=getenv("MODEL_FORMAT_NAME"), + description="根据分镜脚本,格式化分镜脚本", + instruction=PROMPT_STORY_FORMAT_AGENT, + generate_content_config=json_response_config, + output_schema=ShotList, + output_key="shot_list", + after_model_callback=[fix_output_format], + model_extra_config={ + "extra_body": { + "thinking": {"type": getenv("THINKING_STORY_FORMAT_AGENT", "enabled")} + } + }, +) + +story_agent = SequentialAgent( + name="story_sequential_agent", + description="根据分镜脚本,为分镜生成图片", + sub_agents=[storyboard_agent, story_format_agent], +) diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/video/__init__.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/video/__init__.py new file mode 100644 index 00000000..a5aeae18 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/video/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .agent import video_agent + +__all__ = [ + "video_agent", +] diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/video/agent.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/video/agent.py new file mode 100644 index 00000000..429dfce6 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/video/agent.py @@ -0,0 +1,62 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from veadk import Agent +from veadk.agents.sequential_agent import SequentialAgent + +from director_agent.hook.shorten_url import hook_shorten_url + +# from veadk.tools.builtin_tools.video_generate import video_generate +# from director_agent.tools.video_generate_gather import video_generate +from director_agent.tools.video_generate_http import video_generate +from director_agent.hook.check_and_raise import raise_result_error +from director_agent.hook.format_hook import fix_output_format +from director_agent.utils.types import ( + json_response_config, + VideoList, + max_output_tokens_config, +) +from veadk.config import getenv +from director_agent.prompt import PROMPT_VIDEO_AGENT, PROMPT_VIDEO_FORMAT_AGENT + +video_generate_agent = Agent( + name="video_generate_agent", + description="根据分镜脚本,生成分镜视频", + instruction=PROMPT_VIDEO_AGENT, + tools=[video_generate], + after_tool_callback=[raise_result_error, hook_shorten_url], + generate_content_config=max_output_tokens_config, + model_extra_config={ + "extra_body": {"thinking": {"type": getenv("THINKING_VIDEO_AGENT", "enabled")}} + }, +) + +video_format_agent = Agent( + name="video_format_agent", + model_name=getenv("MODEL_FORMAT_NAME"), + description="将模型的输出格式化", + instruction=PROMPT_VIDEO_FORMAT_AGENT, + generate_content_config=json_response_config, + output_schema=VideoList, + output_key="video_list", + after_model_callback=[fix_output_format], + model_extra_config={ + "extra_body": { + "thinking": {"type": getenv("THINKING_VIDEO_FORMAT_AGENT", "disabled")} + } + }, +) + +video_agent = SequentialAgent( + name="video_agent", + description="根据分镜脚本,生成分镜视频", + sub_agents=[video_generate_agent, video_format_agent], +) diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/__init__.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/__init__.py new file mode 100644 index 00000000..06a427b3 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .video_generate_http import video_generate as video_generate_http + +__all__ = [ + "video_generate_http", +] diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/image_generate_builtin_fix.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/image_generate_builtin_fix.py new file mode 100644 index 00000000..74f552e4 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/image_generate_builtin_fix.py @@ -0,0 +1,461 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import base64 +import concurrent.futures +import contextvars +import json +import mimetypes +import traceback +from typing import Dict + +from google.adk.tools import ToolContext +from google.genai.types import Blob, Part +from opentelemetry import trace +from opentelemetry.trace import Span +from volcenginesdkarkruntime import Ark +from volcenginesdkarkruntime.types.images.images import SequentialImageGenerationOptions + +from veadk.config import getenv, settings +from veadk.consts import ( + DEFAULT_IMAGE_GENERATE_MODEL_API_BASE, + DEFAULT_IMAGE_GENERATE_MODEL_NAME, +) +from veadk.utils.logger import get_logger +from veadk.utils.misc import formatted_timestamp, read_file_to_bytes +from veadk.version import VERSION + +logger = get_logger(__name__) + +client = Ark( + api_key=getenv( + "MODEL_IMAGE_API_KEY", getenv("MODEL_AGENT_API_KEY", settings.model.api_key) + ), + base_url=getenv("MODEL_IMAGE_API_BASE", DEFAULT_IMAGE_GENERATE_MODEL_API_BASE), +) + +executor = concurrent.futures.ThreadPoolExecutor(max_workers=8) +tracer = trace.get_tracer("veadk") + + +def _build_input_parts(item: dict, task_type: str, image_field): + input_part = {"role": "user"} + input_part["parts.0.type"] = "text" + input_part["parts.0.text"] = json.dumps(item, ensure_ascii=False) + + if image_field: + if task_type.startswith("single"): + assert isinstance(image_field, str), ( + f"single_* task_type image must be str, got {type(image_field)}" + ) + input_part["parts.1.type"] = "image_url" + input_part["parts.1.image_url.name"] = "origin_image" + input_part["parts.1.image_url.url"] = image_field + elif task_type.startswith("multi"): + assert isinstance(image_field, list), ( + f"multi_* task_type image must be list, got {type(image_field)}" + ) + assert len(image_field) <= 10, ( + f"multi_* task_type image list length must be <= 10, got {len(image_field)}" + ) + for i, image_url in enumerate(image_field): + idx = i + 1 + input_part[f"parts.{idx}.type"] = "image_url" + input_part[f"parts.{idx}.image_url.name"] = f"origin_image_{i}" + input_part[f"parts.{idx}.image_url.url"] = image_url + + return input_part + + +def handle_single_task_sync( + idx: int, item: dict, tool_context +) -> tuple[list[dict], list[str]]: + logger.debug(f"handle_single_task_sync item {idx}: {item}") + success_list: list[dict] = [] + error_list: list[str] = [] + total_tokens = 0 + output_tokens = 0 + output_part = {"message.role": "model"} + + task_type = item.get("task_type", "text_to_single") + prompt = item.get("prompt", "") + response_format = item.get("response_format", None) + size = item.get("size", None) + watermark = item.get("watermark", None) + image_field = item.get("image", None) + sequential_image_generation = item.get("sequential_image_generation", None) + max_images = item.get("max_images", None) + + input_part = _build_input_parts(item, task_type, image_field) + + inputs = {"prompt": prompt} + if size: + inputs["size"] = size + if response_format: + inputs["response_format"] = response_format + if watermark is not None: + inputs["watermark"] = watermark + if sequential_image_generation: + inputs["sequential_image_generation"] = sequential_image_generation + if image_field is not None: + inputs["image"] = [image_field] + + with tracer.start_as_current_span(f"call_llm_task_{idx}") as span: + try: + if ( + sequential_image_generation + and sequential_image_generation == "auto" + and max_images + ): + response = client.images.generate( + model=getenv("MODEL_IMAGE_NAME", DEFAULT_IMAGE_GENERATE_MODEL_NAME), + **inputs, + sequential_image_generation_options=SequentialImageGenerationOptions( + max_images=max_images + ), + extra_headers={ + "veadk-source": "veadk", + "veadk-version": VERSION, + "User-Agent": f"VeADK/{VERSION}", + "X-Client-Request-Id": getenv( + "MODEL_AGENT_CLIENT_REQ_ID", f"veadk/{VERSION}" + ), + }, + ) + else: + response = client.images.generate( + model=getenv("MODEL_IMAGE_NAME", DEFAULT_IMAGE_GENERATE_MODEL_NAME), + **inputs, + extra_headers={ + "veadk-source": "veadk", + "veadk-version": VERSION, + "User-Agent": f"VeADK/{VERSION}", + "X-Client-Request-Id": getenv( + "MODEL_AGENT_CLIENT_REQ_ID", f"veadk/{VERSION}" + ), + }, + ) + + if not response.error: + logger.debug(f"task {idx} Image generate response: {response}") + + total_tokens += getattr(response.usage, "total_tokens", 0) or 0 + output_tokens += getattr(response.usage, "output_tokens", 0) or 0 + + for i, image_data in enumerate(response.data): + image_name = f"task_{idx}_image_{i}" + if "error" in image_data: + logger.error(f"Image {image_name} error: {image_data.error}") + error_list.append(image_name) + continue + + if getattr(image_data, "url", None): + image_url = image_data.url + else: + b64 = getattr(image_data, "b64_json", None) + if not b64: + logger.error( + f"Image {image_name} missing data (no url/b64)" + ) + error_list.append(image_name) + continue + image_bytes = base64.b64decode(b64) + image_url = _upload_image_to_tos( + image_bytes=image_bytes, object_key=f"{image_name}.png" + ) + if not image_url: + logger.error(f"Upload image to TOS failed: {image_name}") + error_list.append(image_name) + continue + logger.debug(f"Image saved as ADK artifact: {image_name}") + + tool_context.state[f"{image_name}_url"] = image_url + output_part[f"message.parts.{i}.type"] = "image_url" + output_part[f"message.parts.{i}.image_url.name"] = image_name + output_part[f"message.parts.{i}.image_url.url"] = image_url + logger.debug( + f"Image {image_name} generated successfully: {image_url}" + ) + success_list.append({image_name: image_url}) + else: + logger.error( + f"Task {idx} No images returned by model: {response.error}" + ) + error_list.append(f"task_{idx}") + + except Exception as e: + logger.error(f"Error in task {idx}: {e}") + traceback.print_exc() + error_list.append(f"task_{idx}") + + finally: + add_span_attributes( + span, + tool_context, + input_part=input_part, + output_part=output_part, + output_tokens=output_tokens, + total_tokens=total_tokens, + request_model=getenv( + "MODEL_IMAGE_NAME", DEFAULT_IMAGE_GENERATE_MODEL_NAME + ), + response_model=getenv( + "MODEL_IMAGE_NAME", DEFAULT_IMAGE_GENERATE_MODEL_NAME + ), + ) + logger.debug( + f"task {idx} Image generate success_list: {success_list}\nerror_list: {error_list}" + ) + return success_list, error_list + + +async def image_generate(tasks: list[dict], tool_context) -> Dict: + """Generate images with Seedream 4.0. + + Commit batch image generation requests via tasks. + + Args: + tasks (list[dict]): + A list of image-generation tasks. Each task is a dict. + Per-task schema + --------------- + Required: + - task_type (str): + One of: + * "multi_image_to_group" # 多图生组图 + * "single_image_to_group" # 单图生组图 + * "text_to_group" # 文生组图 + * "multi_image_to_single" # 多图生单图 + * "single_image_to_single" # 单图生单图 + * "text_to_single" # 文生单图 + - prompt (str) + Text description of the desired image(s). 中文/English 均可。 + 若要指定生成图片的数量,请在prompt中添加"生成N张图片",其中N为具体的数字。 + Optional: + - size (str) + 指定生成图像的大小,有两种用法(二选一,不可混用): + 方式 1:分辨率级别 + 可选值: "1K", "2K", "4K" + 模型会结合 prompt 中的语义推断合适的宽高比、长宽。 + 方式 2:具体宽高值 + 格式: "<宽度>x<高度>",如 "2048x2048", "2384x1728" + 约束: + * 总像素数范围: [1024x1024, 4096x4096] + * 宽高比范围: [1/16, 16] + 推荐值: + - 1:1 → 2048x2048 + - 4:3 → 2384x1728 + - 3:4 → 1728x2304 + - 16:9 → 2560x1440 + - 9:16 → 1440x2560 + - 3:2 → 2496x1664 + - 2:3 → 1664x2496 + - 21:9 → 3024x1296 + 默认值: "2048x2048" + - response_format (str) + Return format: "url" (default, URL 24h 过期) | "b64_json". + - watermark (bool) + Add watermark. Default: true. + - image (str | list[str]) # 仅“非文生图”需要。文生图请不要提供 image + Reference image(s) as URL or Base64. + * 生成“单图”的任务:传入 string(exactly 1 image)。 + * 生成“组图”的任务:传入 array(2–10 images)。 + - sequential_image_generation (str) + 控制是否生成“组图”。Default: "disabled". + * 若要生成组图:必须设为 "auto"。 + - max_images (int) + 仅当生成组图时生效。控制模型能生成的最多张数,范围 [1, 15], 不设置默认为15。 + 注意这个参数不等于生成的图片数量,而是模型最多能生成的图片数量。 + 在单图组图场景最多 14;多图组图场景需满足 (len(images)+max_images ≤ 15)。 + Model 行为说明(如何由参数推断模式) + --------------------------------- + 1) 文生单图: 不提供 image 且 (S 未设置或 S="disabled") → 1 张图。 + 2) 文生组图: 不提供 image 且 S="auto" → 组图,数量由 max_images 控制。 + 3) 单图生单图: image=string 且 (S 未设置或 S="disabled") → 1 张图。 + 4) 单图生组图: image=string 且 S="auto" → 组图,数量 ≤14。 + 5) 多图生单图: image=array (2–10) 且 (S 未设置或 S="disabled") → 1 张图。 + 6) 多图生组图: image=array (2–10) 且 S="auto" → 组图,需满足总数 ≤15。 + 返回结果 + -------- + Dict with generation summary. + Example: + { + "status": "success", + "success_list": [ + {"image_name": "url"} + ], + "error_list": ["image_name"] + } + Notes: + - 组图任务必须 sequential_image_generation="auto"。 + - 如果想要指定生成组图的数量,请在prompt里添加数量说明,例如:"生成3张图片"。 + - size 推荐使用 2048x2048 或表格里的标准比例,确保生成质量。 + """ + model = getenv("MODEL_IMAGE_NAME", DEFAULT_IMAGE_GENERATE_MODEL_NAME) + + if model.startswith("doubao-seedream-3-0"): + logger.error( + f"Image generation by Doubao Seedream 3.0 ({model}) is depracated. Please use Doubao Seedream 4.0 (e.g., doubao-seedream-4-0-250828) instead." + ) + return { + "status": "failed", + "success_list": [], + "error_list": [ + "Image generation by Doubao Seedream 3.0 ({model}) is depracated. Please use Doubao Seedream 4.0 (e.g., doubao-seedream-4-0-250828) instead." + ], + } + + logger.debug(f"Using model to generate image: {model}") + + success_list: list[dict] = [] + error_list: list[str] = [] + + logger.debug(f"image_generate tasks: {tasks}") + + with tracer.start_as_current_span("image_generate"): + base_ctx = contextvars.copy_context() + + def make_task(idx, item): + ctx = base_ctx.copy() + return lambda: ctx.run(handle_single_task_sync, idx, item, tool_context) + + loop = asyncio.get_event_loop() + futures = [ + loop.run_in_executor(executor, make_task(idx, item)) + for idx, item in enumerate(tasks) + ] + + results = await asyncio.gather(*futures, return_exceptions=True) + + for res in results: + if isinstance(res, Exception): + logger.error(f"Task raised exception: {res}") + error_list.append("unknown_task_exception") + continue + s, e = res + success_list.extend(s) + error_list.extend(e) + + if not success_list: + logger.debug( + f"image_generate success_list: {success_list}\nerror_list: {error_list}" + ) + return { + "status": "error", + "success_list": success_list, + "error_list": error_list, + } + app_name = tool_context._invocation_context.app_name + user_id = tool_context._invocation_context.user_id + session_id = tool_context._invocation_context.session.id + artifact_service = tool_context._invocation_context.artifact_service + + if artifact_service: + for image in success_list: + for _, image_tos_url in image.items(): + filename = f"artifact_{formatted_timestamp()}" + await artifact_service.save_artifact( + app_name=app_name, + user_id=user_id, + session_id=session_id, + filename=filename, + artifact=Part( + inline_data=Blob( + display_name=filename, + data=read_file_to_bytes(image_tos_url), + mime_type=mimetypes.guess_type(image_tos_url)[0], + ) + ), + ) + + logger.debug( + f"image_generate success_list: {success_list}\nerror_list: {error_list}" + ) + return {"status": "success", "success_list": success_list, "error_list": error_list} + + +def add_span_attributes( + span: Span, + tool_context: ToolContext, + input_part: dict = None, + output_part: dict = None, + input_tokens: int = None, + output_tokens: int = None, + total_tokens: int = None, + request_model: str = None, + response_model: str = None, +): + try: + # common attributes + app_name = tool_context._invocation_context.app_name + user_id = tool_context._invocation_context.user_id + agent_name = tool_context.agent_name + session_id = tool_context._invocation_context.session.id + span.set_attribute("gen_ai.agent.name", agent_name) + span.set_attribute("openinference.instrumentation.veadk", VERSION) + span.set_attribute("gen_ai.app.name", app_name) + span.set_attribute("gen_ai.user.id", user_id) + span.set_attribute("gen_ai.session.id", session_id) + span.set_attribute("agent_name", agent_name) + span.set_attribute("agent.name", agent_name) + span.set_attribute("app_name", app_name) + span.set_attribute("app.name", app_name) + span.set_attribute("user.id", user_id) + span.set_attribute("session.id", session_id) + span.set_attribute("cozeloop.report.source", "veadk") + + # llm attributes + span.set_attribute("gen_ai.system", "openai") + span.set_attribute("gen_ai.operation.name", "chat") + if request_model: + span.set_attribute("gen_ai.request.model", request_model) + if response_model: + span.set_attribute("gen_ai.response.model", response_model) + if total_tokens: + span.set_attribute("gen_ai.usage.total_tokens", total_tokens) + if output_tokens: + span.set_attribute("gen_ai.usage.output_tokens", output_tokens) + if input_tokens: + span.set_attribute("gen_ai.usage.input_tokens", input_tokens) + if input_part: + span.add_event("gen_ai.user.message", input_part) + if output_part: + span.add_event("gen_ai.choice", output_part) + + except Exception: + traceback.print_exc() + + +def _upload_image_to_tos(image_bytes: bytes, object_key: str) -> None: + try: + import os + from datetime import datetime + + from veadk.integrations.ve_tos.ve_tos import VeTOS + + timestamp: str = datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3] + object_key = f"{timestamp}-{object_key}" + bucket_name = os.getenv("DATABASE_TOS_BUCKET") + ve_tos = VeTOS() + + tos_url = ve_tos.build_tos_signed_url( + object_key=object_key, bucket_name=bucket_name + ) + + ve_tos.upload_bytes( + data=image_bytes, object_key=object_key, bucket_name=bucket_name + ) + + return tos_url + except Exception as e: + logger.error(f"Upload to TOS failed: {e}") + return None diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/image_generate_gather.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/image_generate_gather.py new file mode 100644 index 00000000..4356c7b9 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/image_generate_gather.py @@ -0,0 +1,180 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from typing import Dict + +from director_agent.tools.image_generate_builtin_fix import ( + image_generate as image_generate_builtin, +) +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + + +async def image_generate(tasks: list[dict], tool_context) -> Dict: + """Generate images with Seedream 4.0. + + Commit batch image generation requests via tasks. + + Args: + tasks (list[dict]): + A list of image-generation tasks. Each task is a dict. + Per-task schema + --------------- + Required: + - task_type (str): + One of: + * "multi_image_to_group" # 多图生组图 + * "single_image_to_group" # 单图生组图 + * "text_to_group" # 文生组图 + * "multi_image_to_single" # 多图生单图 + * "single_image_to_single" # 单图生单图 + * "text_to_single" # 文生单图 + - prompt (str) + Text description of the desired image(s). 中文/English 均可。 + 注意:这里禁止在prompt字段输入类似:`生成x张图片`这样的描述,请使用 `max_images` 字段来控制生成的图片数量。 + Optional: + - size (str) + 指定生成图像的大小,有两种用法(二选一,不可混用): + 方式 1:分辨率级别 + 可选值: "1K", "2K", "4K" + 模型会结合 prompt 中的语义推断合适的宽高比、长宽。 + 方式 2:具体宽高值 + 格式: "<宽度>x<高度>",如 "2048x2048", "2384x1728" + 约束: + * 总像素数范围: [1024x1024, 4096x4096] + * 宽高比范围: [1/16, 16] + 推荐值: + - 1:1 → 2048x2048 + - 4:3 → 2384x1728 + - 3:4 → 1728x2304 + - 16:9 → 2560x1440 + - 9:16 → 1440x2560 + - 3:2 → 2496x1664 + - 2:3 → 1664x2496 + - 21:9 → 3024x1296 + 默认值: "2048x2048" + - response_format (str) + Return format: "url" (default, URL 24h 过期) | "b64_json". + - watermark (bool) + Add watermark. Default: true. + - image (str | list[str]) # 仅“非文生图”需要。文生图请不要提供 image + Reference image(s) as URL or Base64. + * 生成“单图”的任务:传入 string(exactly 1 image)。 + * 生成“组图”的任务:传入 array(2–10 images)。 + - sequential_image_generation (str) + 控制是否生成“组图”。Default: "disabled". + * 若要生成组图:必须设为 "auto"。 + - max_images (int) + 仅当生成组图时生效。控制模型能生成的张数。 + Model 行为说明(如何由参数推断模式) + --------------------------------- + 1) 文生单图: 不提供 image 且 (S 未设置或 S="disabled") → 1 张图。 + 2) 文生组图: 不提供 image 且 S="auto" → 组图,数量由 max_images 控制。 + 3) 单图生单图: image=string 且 (S 未设置或 S="disabled") → 1 张图。 + 4) 单图生组图: image=string 且 S="auto" → 组图,数量 ≤14。 + 5) 多图生单图: image=array (2–10) 且 (S 未设置或 S="disabled") → 1 张图。 + 6) 多图生组图: image=array (2–10) 且 S="auto" → 组图,需满足总数 ≤15。 + 返回结果 + -------- + Dict with generation summary. + Example: + { + "status": "success", + "success_list": [ + {"image_name": "url"} + ], + "error_list": ["image_name"] + } + Notes: + - 组图任务必须 sequential_image_generation="auto"。 + - size 推荐使用 2048x2048 或表格里的标准比例,确保生成质量。 + """ + logger.debug(f"image_generate_gather tasks: {tasks}") + new_tasks = [] + task_origin_info = [] # Stores (original_task_index, sub_index_within_group) + + for original_idx, task in enumerate(tasks): + task_type = task.get("task_type", "") + is_group_task = task_type in { + "single_image_to_group", + "text_to_group", + "multi_image_to_group", + } + + if is_group_task: + num_images = task.get("max_images", 1) + base_task_type = task_type.replace("_group", "_single") + for i in range(num_images): + new_task = task.copy() + new_task["task_type"] = base_task_type + new_task.pop("sequential_image_generation", None) + new_task.pop("max_images", None) + new_tasks.append(new_task) + task_origin_info.append((original_idx, i)) + else: + new_tasks.append(task.copy()) + task_origin_info.append((original_idx, 0)) + + for task in new_tasks: + # 规避prompt中包含"张图片"的情况,这种情况会导致单图变成四宫格或者六宫格之类的图片 + if "prompt" in task and isinstance(task["prompt"], str): + # 匹配阿拉伯数字和中文数字 + task["prompt"] = re.sub( + r"[\d一二三四五六七八九十百千万]+张图片", "图片", task["prompt"] + ) + task["watermark"] = False + + # Call the underlying image_generate function with the flattened list of tasks + logger.debug(f"image_generate_gather new_tasks: {new_tasks}") + raw_result = await image_generate_builtin(new_tasks, tool_context) + logger.debug(f"image_generate_gather raw_result: {raw_result}") + + # Remap the results to match the original task structure + remapped_success = [] + remapped_errors = set() + + for success_item in raw_result.get("success_list", []): + for key, url in success_item.items(): + # Key is like 'task_{idx}_image_{i}' + match = re.match(r"task_(\d+)_image_(\d+)", key) + if not match: + continue + + new_task_idx = int(match.group(1)) + if new_task_idx >= len(task_origin_info): + continue + + original_idx, original_sub_idx = task_origin_info[new_task_idx] + new_key = f"task_{original_idx}_image_{original_sub_idx}" + remapped_success.append({new_key: url}) + + for error_item in raw_result.get("error_list", []): + # Error item is like 'task_{idx}' + match = re.match(r"task_(\d+)", error_item) + if match: + new_task_idx = int(match.group(1)) + if new_task_idx < len(task_origin_info): + original_idx, _ = task_origin_info[new_task_idx] + remapped_errors.add(f"task_{original_idx}") + else: + remapped_errors.add(error_item) # Keep original error if mapping fails + else: + remapped_errors.add(error_item) + logger.debug(f"image_generate_gather remapped_success: {remapped_success}") + logger.debug(f"image_generate_gather remapped_errors: {remapped_errors}") + + return { + "status": raw_result.get("status"), + "success_list": remapped_success, + "error_list": list(remapped_errors), + } diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/video_generate_http.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/video_generate_http.py new file mode 100644 index 00000000..cc04f0b2 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/video_generate_http.py @@ -0,0 +1,446 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import json +import os +import traceback +from typing import Dict +import aiohttp +import urllib.parse + +from google.adk.tools import ToolContext +from opentelemetry import trace +from opentelemetry.trace import Span + +from veadk.config import getenv, settings +from veadk.consts import DEFAULT_VIDEO_MODEL_API_BASE, DEFAULT_VIDEO_MODEL_NAME +from veadk.utils.logger import get_logger +from veadk.version import VERSION + +logger = get_logger(__name__) + +# 短链接服务配置 +shorten_url_service_url = os.getenv("SHORTEN_URL_SERVICE_URL", None) +assert shorten_url_service_url, "SHORTEN_URL_SERVICE_URL is not set" + + +async def resolve_short_url(short_url: str) -> str: + """ + 将短链接还原为原始URL + + Args: + short_url: 短链接URL + + Returns: + 原始URL,如果解析失败则返回短链接本身 + """ + if not shorten_url_service_url: + return short_url + + try: + # 从短链接中提取短码 + # 短链接格式: http://127.0.0.1:8005/t/AbC123 或 http://127.0.0.1:8005/t/video/AbC123 + parsed_url = urllib.parse.urlparse(short_url) + path_parts = parsed_url.path.strip("/").split("/") + + if len(path_parts) >= 2 and path_parts[0] == "t": + # 调用短链接服务的重定向接口来获取原始URL + async with aiohttp.ClientSession() as session: + # 使用GET请求获取原始URL(短链接服务直接返回原始URL字符串) + async with session.get(short_url) as response: + if response.status == 200: + # 短链接服务直接返回原始URL字符串 + original_url = await response.text() + original_url = original_url.strip().strip('"') + logger.debug( + f"Successfully resolved short URL: {short_url} -> {original_url}" + ) + return original_url + else: + logger.warning( + f"Failed to resolve short URL: {short_url}, status: {response.status}" + ) + return short_url + else: + logger.warning(f"Not a valid short URL format: {short_url}") + return short_url + + except Exception as e: + logger.error(f"Error resolving short URL {short_url}: {e}") + # 如果解析失败,返回原始短链接 + return short_url + + +async def generate(prompt, first_frame_image=None, last_frame_image=None): + """ + Generate a video using HTTP requests + """ + api_key = getenv( + "MODEL_VIDEO_API_KEY", getenv("MODEL_AGENT_API_KEY", settings.model.api_key) + ) + base_url = getenv("MODEL_VIDEO_API_BASE", DEFAULT_VIDEO_MODEL_API_BASE) + model = getenv("MODEL_VIDEO_NAME", DEFAULT_VIDEO_MODEL_NAME) + + # 解析短链接为原始URL + if first_frame_image: + first_frame_image = await resolve_short_url(first_frame_image) + if last_frame_image: + last_frame_image = await resolve_short_url(last_frame_image) + + # Build the content array + prompt_with_media = f"(可以有极其轻度的动作音,但禁止任何人声,禁止背景音乐,禁止音效,禁止旁白,禁止解说){prompt}" + content = [{"type": "text", "text": prompt_with_media}] + + if first_frame_image and last_frame_image: + content.append( + { + "type": "image_url", + "image_url": {"url": first_frame_image}, + "role": "first_frame", + } + ) + content.append( + { + "type": "image_url", + "image_url": {"url": last_frame_image}, + "role": "last_frame", + } + ) + elif first_frame_image: + content.append({"type": "image_url", "image_url": {"url": first_frame_image}}) + + # Build the request body + request_body = { + "model": model, + "content": content, + # "generate_audio": True, # for seedance 1.5 pro only + "duration": 5, + } + + # Build headers + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}", + "veadk-source": "veadk", + "veadk-version": VERSION, + "User-Agent": f"VeADK/{VERSION}", + "X-Client-Request-Id": getenv("MODEL_AGENT_CLIENT_REQ_ID", f"veadk/{VERSION}"), + } + + # Make the POST request + async with aiohttp.ClientSession() as session: + try: + async with session.post( + f"{base_url.rstrip('/')}/contents/generations/tasks", + json=request_body, + headers=headers, + ) as response: + response.raise_for_status() + response_json = await response.json() + return response_json + except Exception: + logger.error(f"Error in generate: {traceback.format_exc()}") + raise + + +async def video_generate( + params: list, tool_context: ToolContext, batch_size: int = 32 +) -> Dict: + """ + Generate videos in **batch** from text prompts, optionally guided by a first/last frame, + and fine-tuned via *model text commands* (a.k.a. `parameters` appended to the prompt). + + This API creates video-generation tasks. Each item in `params` describes a single video. + The function submits all items in one call and returns task metadata for tracking. + + Args: + params (list[dict]): + A list of video generation requests. Each item supports the fields below. + Required per item: + - video_name (str): + Name/identifier of the output video file. + + - prompt (str): + Text describing the video to generate. Supports zh/EN. + You may append **model text commands** after the prompt to control resolution, + aspect ratio, fps, watermark, seed, camera lock, etc. + Format: `... --rs --rt --fps --wm --seed --cf ` + Example: + "小猫骑着滑板穿过公园。 --rs 720p --rt 16:9 --fps 24 --wm true --seed 11 --cf false" + + Optional per item: + - first_frame (str | None): + URL or Base64 string (data URL) for the **first frame** (role = `first_frame`). + Use when you want the clip to start from a specific image. + + - last_frame (str | None): + URL or Base64 string (data URL) for the **last frame** (role = `last_frame`). + Use when you want the clip to end on a specific image. + + Notes on first/last frame: + * When both frames are provided, **match width/height** to avoid cropping; if they differ, + the tail frame may be auto-cropped to fit. + * If you only need one guided frame, provide either `first_frame` or `last_frame` (not both). + + Image input constraints (for first/last frame): + - Formats: jpeg, png, webp, bmp, tiff, gif + - Aspect ratio (宽:高): 0.4–2.5 + - Width/Height (px): 300–6000 + - Size: < 30 MB + - Base64 data URL example: `data:image/png;base64,` + + Model text commands (append after the prompt; unsupported keys are ignored by some models): + --rs / --resolution Video resolution. Common values: 480p, 720p, 1080p. + Default depends on model (e.g., doubao-seedance-1-0-pro: 1080p, + some others default 720p). + + --rt / --ratio Aspect ratio. Typical: 16:9 (default), 9:16, 4:3, 3:4, 1:1, 2:1, 21:9. + Some models support `keep_ratio` (keep source image ratio) or `adaptive` + (auto choose suitable ratio). + + --fps / --framespersecond Frame rate. Common: 16 or 24 (model-dependent; e.g., seaweed=24, wan2.1=16). + + --wm / --watermark Whether to add watermark. Default: **false** (per doc). + + --seed Random seed in [-1, 2^32-1]. Default **-1** = auto seed. + Same seed may yield similar (not guaranteed identical) results across runs. + + --cf / --camerafixed Lock camera movement. Some models support this flag. + true: try to keep camera fixed; false: allow movement. Default: **false**. + + Returns: + Dict: + API response containing task creation results for each input item. A typical shape is: + { + "status": "success", + "success_list": [{"video_name": "video_url"}], + "error_list": [] + } + + Constraints & Tips: + - Keep prompt concise and focused (建议 ≤ 500 字); too many details may distract the model. + - If using first/last frames, ensure their **aspect ratio matches** your chosen `--rt` to minimize cropping. + - If you must reproduce results, specify an explicit `--seed`. + - Unsupported parameters are ignored silently or may cause validation errors (model-specific). + + Minimal examples: + 1) Text-only batch of two clips at 720p, 16:9, 24 fps: + params = [ + { + "video_name": "cat_park.mp4", + "prompt": "小猫骑着滑板穿过公园。 --rs 720p --rt 16:9 --fps 24 --wm false" + }, + { + "video_name": "city_night.mp4", + "prompt": "霓虹灯下的城市延时摄影风。 --rs 720p --rt 16:9 --fps 24 --seed 7" + }, + ] + + 2) With guided first/last frame (square, 6 s, camera fixed): + params = [ + { + "video_name": "logo_reveal.mp4", + "first_frame": "https://cdn.example.com/brand/logo_start.png", + "last_frame": "https://cdn.example.com/brand/logo_end.png", + "prompt": "品牌 Logo 从线稿到上色的变化。 --rs 1080p --rt 1:1 --fps 24 --cf true" + } + ] + """ + success_list = [] + error_list = [] + api_key = getenv( + "MODEL_VIDEO_API_KEY", getenv("MODEL_AGENT_API_KEY", settings.model.api_key) + ) + base_url = getenv("MODEL_VIDEO_API_BASE", DEFAULT_VIDEO_MODEL_API_BASE) + model = getenv("MODEL_VIDEO_NAME", DEFAULT_VIDEO_MODEL_NAME) + + logger.debug(f"Using model: {model}") + logger.debug(f"video_generate params: {params}") + + for start_idx in range(0, len(params), batch_size): + batch = params[start_idx : start_idx + batch_size] + logger.debug(f"video_generate batch {start_idx // batch_size}: {batch}") + + task_dict = {} # task_id: video_name + tracer = trace.get_tracer("gcp.vertex.agent") + with tracer.start_as_current_span("call_llm") as span: + input_part = {"role": "user"} + output_part = {"message.role": "model"} + total_tokens = 0 + + for idx, item in enumerate(batch): + input_part[f"parts.{idx}.type"] = "text" + input_part[f"parts.{idx}.text"] = json.dumps(item, ensure_ascii=False) + + video_name = item["video_name"] + prompt = item["prompt"] + first_frame = item.get("first_frame", None) + last_frame = item.get("last_frame", None) + + try: + # Create video generation task + response = await generate(prompt, first_frame, last_frame) + task_id = response["id"] + task_dict[task_id] = video_name + logger.debug(f"Created task {task_id} for video {video_name}") + except Exception as e: + logger.error(f"Error creating task for {video_name}: {e}") + error_list.append(video_name) + continue + + logger.debug("Begin querying video_generate task status...") + + while True: + task_list = list(task_dict.keys()) + if len(task_list) == 0: + break + + # Check each task status + async with aiohttp.ClientSession() as session: + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + "veadk-source": "veadk", + "veadk-version": VERSION, + "User-Agent": f"VeADK/{VERSION}", + "X-Client-Request-Id": getenv( + "MODEL_AGENT_CLIENT_REQ_ID", f"veadk/{VERSION}" + ), + } + + for task_id in task_list: + try: + async with session.get( + f"{base_url.rstrip('/')}/contents/generations/tasks/{task_id}", + headers=headers, + ) as response: + response.raise_for_status() + result = await response.json() + status = result["status"] + + if status == "succeeded": + video_name = task_dict[task_id] + video_url = result["content"]["video_url"] + logger.debug( + f"{video_name} video_generate succeeded. Video URL: {video_url}" + ) + tool_context.state[f"{video_name}_video_url"] = ( + video_url + ) + + success_list.append({video_name: video_url}) + task_dict.pop(task_id, None) + + elif status == "failed": + video_name = task_dict[task_id] + error_msg = result["error"] + logger.error( + f"{video_name} video_generate failed. Error: {error_msg}" + ) + error_list.append(video_name) + task_dict.pop(task_id, None) + + else: + logger.debug( + f"{task_dict[task_id]} video_generate current status: {status}, Retrying after 10 seconds..." + ) + except Exception as e: + logger.error( + f"Error checking task status for {task_id}: {e}" + ) + # Keep the task in the dict to retry later + + # Wait before next polling + await asyncio.sleep(10) + + # Add span attributes + add_span_attributes( + span, + tool_context, + input_part=input_part, + output_part=output_part, + output_tokens=total_tokens, + total_tokens=total_tokens, + request_model=model, + response_model=model, + ) + + if len(success_list) == 0: + logger.debug( + f"video_generate success_list: {success_list}\nerror_list: {error_list}" + ) + return { + "status": "error", + "success_list": success_list, + "error_list": error_list, + } + else: + logger.debug( + f"video_generate success_list: {success_list}\nerror_list: {error_list}" + ) + return { + "status": "success", + "success_list": success_list, + "error_list": error_list, + } + + +def add_span_attributes( + span: Span, + tool_context: ToolContext, + input_part: dict | None = None, + output_part: dict | None = None, + input_tokens: int | None = None, + output_tokens: int | None = None, + total_tokens: int | None = None, + request_model: str | None = None, + response_model: str | None = None, +): + try: + # common attributes + app_name = tool_context._invocation_context.app_name + user_id = tool_context._invocation_context.user_id + agent_name = tool_context.agent_name + session_id = tool_context._invocation_context.session.id + span.set_attribute("gen_ai.agent.name", agent_name) + span.set_attribute("openinference.instrumentation.veadk", VERSION) + span.set_attribute("gen_ai.app.name", app_name) + span.set_attribute("gen_ai.user.id", user_id) + span.set_attribute("gen_ai.session.id", session_id) + span.set_attribute("agent_name", agent_name) + span.set_attribute("agent.name", agent_name) + span.set_attribute("app_name", app_name) + span.set_attribute("app.name", app_name) + span.set_attribute("user.id", user_id) + span.set_attribute("session.id", session_id) + span.set_attribute("cozeloop.report.source", "veadk") + + # llm attributes + span.set_attribute("gen_ai.system", "openai") + span.set_attribute("gen_ai.operation.name", "chat") + if request_model: + span.set_attribute("gen_ai.request.model", request_model) + if response_model: + span.set_attribute("gen_ai.response.model", response_model) + if total_tokens: + span.set_attribute("gen_ai.usage.total_tokens", total_tokens) + if output_tokens: + span.set_attribute("gen_ai.usage.output_tokens", output_tokens) + if input_tokens: + span.set_attribute("gen_ai.usage.input_tokens", input_tokens) + if input_part: + span.add_event("gen_ai.user.message", input_part) + if output_part: + span.add_event("gen_ai.choice", output_part) + + except Exception: + traceback.print_exc() diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/utils/__init__.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/utils/__init__.py new file mode 100644 index 00000000..1bef36da --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/utils/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/utils/types.py b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/utils/types.py new file mode 100644 index 00000000..921e37d7 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/director_agent/utils/types.py @@ -0,0 +1,96 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +from google.genai import types +from pydantic import BaseModel, Field + +json_response_config = types.GenerateContentConfig( + response_mime_type="application/json", max_output_tokens=18000 +) + +max_output_tokens_config = types.GenerateContentConfig(max_output_tokens=18000) + + +class Status(BaseModel): + """A status.""" + + success: bool = Field(description="如果结果成功则为True,否则为False") + message: str = Field(description="运行成功该字段为空,否则为错误信息") + + +class ImageItem(BaseModel): + """An image.""" + + id: int = Field(description="The shot id of the image") + url: str = Field(description="The url of the image") + + +class Image(BaseModel): + """Image list for a shot.""" + + shot_id: str = Field(description="The shot id") + prompt: str = Field(description="The description for generating image") + action: str = Field(description="The description for generating videos") + reference: str = Field(description="The reference url for the shot") + words: str = Field(description="The words for the shot") + images: list[ImageItem] = Field(description="The list of images") + + +class ImageList(BaseModel): + """Image list.""" + + image_list: Optional[list[Image]] = Field( + description="The list of images, if success" + ) + status: Optional[Status] = Field(description="The status of the result") + + +class VideoItem(BaseModel): + """A video.""" + + id: int = Field(description="The shot id of the video") + url: str = Field(description="The url of the video") + + +class Video(BaseModel): + """Video list for a shot.""" + + shot_id: str = Field(description="The shot id") + prompt: str = Field(description="The description for generating image") + action: str = Field(description="The description for generating videos") + reference: str = Field(description="The reference url for the shot") + words: str = Field(description="The words for the shot") + videos: list[VideoItem] = Field(description="The list of videos") + + +class VideoList(BaseModel): + """Video list.""" + + video_list: Optional[list[Video]] = Field(description="The list of videos") + status: Optional[Status] = Field(description="The status of the result") + + +class Shot(BaseModel): + """A shot.""" + + id: str = Field(description="The shot id") + image: str = Field(description="The description for generating image") + action: str = Field(description="The description for generating videos") + reference: str = Field(description="The reference url for the shot") + words: str = Field(description="The words for the shot") + + +class ShotList(BaseModel): + """Shot list.""" + + shot_list: list[Shot] = Field(description="The list of shots") diff --git a/demohouse/mutimedia/backend/app/director-agent/src/requirements.txt b/demohouse/mutimedia/backend/app/director-agent/src/requirements.txt new file mode 100644 index 00000000..119c4e07 --- /dev/null +++ b/demohouse/mutimedia/backend/app/director-agent/src/requirements.txt @@ -0,0 +1,6 @@ +veadk-python==0.2.27 +# git+https://github.com/volcengine/veadk-python.git +fastapi +uvicorn[standard] +google-adk==1.18.0 +json-repair \ No newline at end of file diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/__init__.py b/demohouse/mutimedia/backend/app/evaluate-agent/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/config.yaml.example b/demohouse/mutimedia/backend/app/evaluate-agent/config.yaml.example new file mode 100644 index 00000000..fbaf9bda --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/config.yaml.example @@ -0,0 +1,23 @@ +model: + agent: + provider: openai + # name: deepseek-v3-1-terminus + name: doubao-seed-1-6-250615 + api_base: https://ark.cn-beijing.volces.com/api/v3/ + api_key: + format: + name: doubao-seed-1-6-flash-250828 + +shorten_url_service_url: http://127.0.0.1:8005 + +logging: + # ERROR + # WARNING + # INFO + # DEBUG + level: DEBUG + +thinking: + evaluate_agent: disabled + image_format_agent: disabled + video_format_agent: disabled diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/README.md b/demohouse/mutimedia/backend/app/evaluate-agent/src/README.md new file mode 100644 index 00000000..c84bdbd1 --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/README.md @@ -0,0 +1,83 @@ +# 评估 Agent + +用来评测分镜图片与视频。 + +## 输入输出定义 + +### 输入 + +分镜图片列表 + +```python +class ImageItem(BaseModel): + id: int # 每个分镜内图片的id + url: str # 图片的 tos url + +class Image(BaseModel): + shot_id: str # 分镜id + prompt: str + action: str # 分镜的口播文案,无则为空 + image: ImageItem + +class InputMessage(BaseModel): + image_list: list(Image) +``` + +分镜视频列表 + +```python +class VideoItem(BaseModel): + id: int # 每个分镜内视频的id + url: str # 视频的 tos url + +class Video(BaseModel): + shot_id: str # 分镜id + prompt: str + action: str # 分镜的口播文案,无则为空 + videos: list(VideoItem) + +class InputMessage(BaseModel): + video_list: list(Video) +``` + +### 输出 + +分镜图片列表 + +```python +class ImageItem(BaseModel): + id: int # 每个分镜内图片的id + url: str # 图片的 tos url + score: int # Byteval给出的分数 + +class Image(BaseModel): + shot_id: str # 分镜id + prompt: str + action: str # 分镜的口播文案,无则为空 + images: list(ImageItem) + +class OutputMessage(BaseModel): + image_list: list(Image) +``` + +分镜视频列表 + +```python +class VideoItem(BaseModel): + id: int # 每个分镜内视频的id + url: str # 视频的 tos url + score: int # Byteval给出的分数 + +class Video(BaseModel): + shot_id: str # 分镜id + prompt: str + action: str # 分镜的口播文案,无则为空 + videos: list(VideoItem) + +class OutputMessage(BaseModel): + video_list: list(Video) +``` + +## 工具 + +使用doubao模型进行评估 diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/__init__.py b/demohouse/mutimedia/backend/app/evaluate-agent/src/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/agent.py b/demohouse/mutimedia/backend/app/evaluate-agent/src/agent.py new file mode 100644 index 00000000..d09c1e05 --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/agent.py @@ -0,0 +1,22 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from evaluate_agent.agent import agent # type: ignore + +from veadk.memory.short_term_memory import ShortTermMemory +from veadk.types import AgentRunConfig + +# [required] instantiate the agent run configuration +agent_run_config = AgentRunConfig( + app_name="evaluate_agent", + agent=agent, # type: ignore + short_term_memory=ShortTermMemory(backend="local"), # type: ignore +) diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/app.py b/demohouse/mutimedia/backend/app/evaluate-agent/src/app.py new file mode 100644 index 00000000..5baa0fb5 --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/app.py @@ -0,0 +1,214 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from contextlib import asynccontextmanager +from typing import Callable + +from agent import agent_run_config + +from fastapi import FastAPI +from fastapi.routing import APIRoute + +from fastmcp import FastMCP + +from starlette.routing import Route + +from google.adk.a2a.utils.agent_card_builder import AgentCardBuilder +from a2a.types import AgentProvider + +from veadk.a2a.ve_a2a_server import init_app +from veadk.runner import Runner +from veadk.tracing.telemetry.exporters.apmplus_exporter import APMPlusExporter +from veadk.tracing.telemetry.exporters.cozeloop_exporter import CozeloopExporter +from veadk.tracing.telemetry.exporters.tls_exporter import TLSExporter +from veadk.tracing.telemetry.opentelemetry_tracer import OpentelemetryTracer +from veadk.types import AgentRunConfig +from veadk.utils.logger import get_logger +from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator +from opentelemetry import context + +logger = get_logger(__name__) + +assert isinstance(agent_run_config, AgentRunConfig), ( + f"Invalid agent_run_config type: {type(agent_run_config)}, expected `AgentRunConfig`" +) + +app_name = agent_run_config.app_name +agent = agent_run_config.agent +short_term_memory = agent_run_config.short_term_memory + +VEFAAS_REGION = os.getenv("APP_REGION", "cn-beijing") +VEFAAS_FUNC_ID = os.getenv("_FAAS_FUNC_ID", "") +agent_card_builder = AgentCardBuilder( + agent=agent, + provider=AgentProvider( + organization="Volcengine Agent Development Kit (VeADK)", + url=f"https://console.volcengine.com/vefaas/region:vefaas+{VEFAAS_REGION}/function/detail/{VEFAAS_FUNC_ID}", + ), +) + + +def load_tracer() -> None: + EXPORTER_REGISTRY = { + "VEADK_TRACER_APMPLUS": APMPlusExporter, + "VEADK_TRACER_COZELOOP": CozeloopExporter, + "VEADK_TRACER_TLS": TLSExporter, + } + + exporters = [] + for env_var, exporter_cls in EXPORTER_REGISTRY.items(): + if os.getenv(env_var, "").lower() == "true": + if ( + agent.tracers + and isinstance(agent.tracers[0], OpentelemetryTracer) + and any(isinstance(e, exporter_cls) for e in agent.tracers[0].exporters) + ): + logger.warning( + f"Exporter {exporter_cls.__name__} is already defined in agent.tracers[0].exporters. These two exporters will be used at the same time. As a result, your data may be uploaded twice." + ) + else: + exporters.append(exporter_cls()) + + tracer = OpentelemetryTracer(name="veadk_tracer", exporters=exporters) + agent_run_config.agent.tracers.extend([tracer]) + + +def build_mcp_run_agent_func() -> Callable: + runner = Runner( + agent=agent, + short_term_memory=short_term_memory, + app_name=app_name, + user_id="", + ) + + async def run_agent( + user_input: str, + user_id: str = "mcp_user", + session_id: str = "mcp_session", + ) -> str: + # Set user_id for runner + runner.user_id = user_id + + # Running agent and get final output + final_output = await runner.run( + messages=user_input, + session_id=session_id, + ) + return final_output + + run_agent_doc = f"""{agent.description} + Args: + user_input: User's input message (required). + user_id: User identifier. Defaults to "mcp_user". + session_id: Session identifier. Defaults to "mcp_session". + Returns: + Final agent response as a string.""" + + run_agent.__doc__ = run_agent_doc + + return run_agent + + +async def agent_card() -> dict: + agent_card = await agent_card_builder.build() + return agent_card.model_dump() + + +async def get_cozeloop_space_id() -> dict: + return { + "space_id": os.getenv( + "OBSERVABILITY_OPENTELEMETRY_COZELOOP_SERVICE_NAME", default="" + ) + } + + +load_tracer() + +# Build a run_agent function for building MCP server +run_agent_func = build_mcp_run_agent_func() + +a2a_app = init_app( + server_url="0.0.0.0", + app_name=app_name, + agent=agent, + short_term_memory=short_term_memory, +) + +a2a_app.post("/run_agent", operation_id="run_agent", tags=["mcp"])(run_agent_func) +a2a_app.get("/agent_card", operation_id="agent_card", tags=["mcp"])(agent_card) +a2a_app.get( + "/get_cozeloop_space_id", operation_id="get_cozeloop_space_id", tags=["mcp"] +)(get_cozeloop_space_id) + +# === Build mcp server === + +mcp = FastMCP.from_fastapi(app=a2a_app, name=app_name, include_tags={"mcp"}) + +# Create MCP ASGI app +mcp_app = mcp.http_app(path="/", transport="streamable-http") + + +# Combined lifespan management +@asynccontextmanager +async def combined_lifespan(app: FastAPI): + async with mcp_app.lifespan(app): + yield + + +# Create main FastAPI app with combined lifespan +app = FastAPI( + title=a2a_app.title, + version=a2a_app.version, + lifespan=combined_lifespan, + openapi_url=None, + docs_url=None, + redoc_url=None, +) + + +@app.middleware("http") +async def otel_context_middleware(request, call_next): + carrier = { + "traceparent": request.headers.get("Traceparent"), + "tracestate": request.headers.get("Tracestate"), + } + logger.debug(f"traceparent exists: {carrier['traceparent'] is not None}") + if carrier["traceparent"] is None: + return await call_next(request) + else: + ctx = TraceContextTextMapPropagator().extract(carrier=carrier) + token = context.attach(ctx) + try: + response = await call_next(request) + finally: + context.detach(token) + return response + + +# Mount A2A routes to main app +for route in a2a_app.routes: + app.routes.append(route) + +# Mount MCP server at /mcp endpoint +app.mount("/mcp", mcp_app) + + +# remove openapi routes +paths = ["/openapi.json", "/docs", "/redoc"] +new_routes = [] +for route in app.router.routes: + if isinstance(route, (APIRoute, Route)) and route.path in paths: + continue + new_routes.append(route) +app.router.routes = new_routes + +# === Build mcp server end === diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/__init__.py b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/__init__.py new file mode 100644 index 00000000..9cebfb90 --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import agent + +__all__ = [ + "agent", +] diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/agent.py b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/agent.py new file mode 100644 index 00000000..1a223764 --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/agent.py @@ -0,0 +1,78 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from typing import AsyncGenerator + +from google.genai import types +from typing_extensions import override + +from google.adk.agents import InvocationContext +from google.adk.events import Event, EventActions +from veadk import Agent +from evaluate_agent.utils.types import ( + max_output_tokens_config, +) +from veadk.config import getenv + +from .hook.direct_output_callback import direct_output_callback + +# from .tools.byteval import evaluate_media, mock_evaluate_media +from .tools.geval import evaluate_media +from evaluate_agent.prompt import PROMPT_EVALUATE_AGENT + + +class EvaluateAgent(Agent): + @override + async def _run_async_impl( + self, ctx: InvocationContext + ) -> AsyncGenerator[Event, None]: + async for event in super()._run_async_impl(ctx): + if ( + event.get_function_responses() + and event.content.parts + and len(event.content.parts) > 0 + and event.content.parts[0].function_response + and event.content.parts[0].function_response.name == "evaluate_media" + ): + yield event + # agent summary阶段,直接输出 + text = json.dumps( + event.content.parts[0].function_response.response, + ensure_ascii=False, + ) + final_event = Event( + author=self.name, + invocation_id=ctx.invocation_id, + branch=ctx.branch, + content=types.Content(parts=[types.Part(text=text)]), + actions=EventActions(skip_summarization=True), + ) + yield final_event + else: + yield event + + +agent = EvaluateAgent( + name="evaluate_agent", + description="根据用户的需求,评估分镜图片或分镜视频的质量", + instruction=PROMPT_EVALUATE_AGENT, + tools=[evaluate_media], + after_tool_callback=[direct_output_callback], + model_extra_config={ + "extra_body": { + "thinking": {"type": getenv("THINKING_EVALUATE_AGENT", "enabled")} + } + }, + generate_content_config=max_output_tokens_config, +) + +root_agent = agent diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/__init__.py b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/direct_output_callback.py b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/direct_output_callback.py new file mode 100644 index 00000000..124eddfd --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/direct_output_callback.py @@ -0,0 +1,26 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Optional +from google.adk.tools import BaseTool, ToolContext +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + + +def direct_output_callback( + tool: BaseTool, args: dict[str, Any], tool_context: ToolContext, tool_response: dict +) -> Optional[dict]: + """让工具结果直接输出,跳过LLM总结""" + # 设置跳过总结标志 + if tool.name == "evaluate_media": + tool_context.actions.skip_summarization = True + return tool_response # 不能return None diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/prompt.py b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/prompt.py new file mode 100644 index 00000000..3886c746 --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/prompt.py @@ -0,0 +1,281 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PROMPT_EVALUATE_AGENT = """ +#角色: +你是一位食品饮料行业的电商营销评审 evaluate_agent,对分镜图片和分镜视频进行质量评估。 + +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 + +#工具: +1. evaluate_media:为图片或视频打分。 + +#任务描述: +你作为 evaluate_agent,可能会收到用户的两种不同任务:图片评分任务和视频评分任务。 +1.图片评分任务:如果是图片评分任务,则根据用户传入 image_list, 调用 evaluate_media 对每个图片进行评估。 +evaluate_media 工具会从 一致性,美学,质量 三个维度评估图片质量,并返回评分结果。 +根据 evaluate_media 工具返回的评估结果生成 scored_image_list (评估后的分镜图片列表)。 +2.视频评分任务:如果是视频评分任务,则根据用户传入 video_list, 调用 evaluate_media 对每个视频进行评估。 +evaluate_media 工具会从 一致性,美学,质量 三个维度评估视频质量,并返回评分结果。 +根据 evaluate_media 工具返回的评估结果生成 scored_video_list (评估后的分镜视频列表)。 + +#注意事项: +2. 你只需识别用户请求的是哪种任务,然后调用 evaluate_media 工具,根据 evaluate_media 工具返回的评估结果返回给用户。 +3. 输入输出中,任何涉及图片或视频的链接url,不要做任何修改。 + +#格式 +1. image_list +```json +{ + "image_list": [ + { + "shot_id": "分镜1", + "prompt": "如何生成分镜图片的详细描述", + "action": "分镜视频的动作描述", + "reference": "分镜一和分镜四中的reference图片,作为图片生成的参考图", + "words": "口播文案", + "images": [ + { + "id": int, 图片id, + "url": "图片url", + } + ] + } + ] +} +``` +2. video_list +```json +{ + "video_list": [ + { + "shot_id": "分镜1", + "prompt": "如何生成分镜视频的详细描述", + "action": "分镜视频的动作描述", + "reference": "分镜图片的参考url", + "words": "口播文案", + "videos": [ + { + "id": int, 视频id, + "url": "视频url", + } + ] + } + ] +} +``` +3. scored_image_list +```json +{ + "scored_image_list": [ + { + "shot_id": "分镜1", + "prompt": "如何生成分镜图片的详细描述", + "action": "分镜视频的动作描述", + "reference": "分镜一和分镜四中的reference图片,作为图片生成的参考图", + "words": "口播文案", + "images": [ + { + "id": 1, + "url": "图片url", + "score": 0.8, + "reason": "图片评分理由" + } + ] + } + ], + "status": { + "success": bool, 是否成功 + "message": str, 错误信息,成功时为空字符串 + } +} +``` +4. scored_video_list +```json +{ + "scored_video_list": [ + { + "shot_id": "分镜1", + "prompt": "如何生成分镜视频的详细描述", + "action": "分镜视频的动作描述", + "reference": "分镜图片的参考url", + "words": "口播文案", + "videos": [ + { + "id": 1, + "url": "视频url", + "score": 0.8, + "reason": "视频评分理由" + } + ] + } + ], + "status": { + "success": bool, 是否成功 + "message": str, 错误信息,成功时为空字符串 + } +} + +# 注意 +注意:当遇到Agent执行异常,如缺少内容,运行出错,结果不完整,用户输入内容不足以完成任务时,请在status字段中反馈,而不是在业务字段中反馈描述,如有上述问题,业务字段可以为空。只反馈错误即可 +``` +""" + +PROMPT_IMAGE_FORMAT_AGENT = """ +#角色: +你是一个将输入按规定格式输出的格式转换器 +你有两个任务,第一个任务是检查分镜生成的数量和每个分镜生成的图片数量是否正确,不要有丢失或者缺少。 +如果存在丢失或者缺少,直接返回 +"status": { + "success": bool, 失败 + "message": str, 错误信息,解释发生了缺少和丢失的现象 + } +如果不存在缺失的问题,则继续进行格式转换工作 +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 + +#任务描述: +1. 将 评估后的分镜图片列表,将其按 "规定格式" 输出。 + +#评估后的分镜图片列表 + shot_id:分镜1 + prompt: str, 如何生成分镜图片的详细描述 + action: str, 分镜视频的动作描述 + reference: str, 分镜一和分镜四中的reference图片,作为图片生成的参考图 + words: str, 口播文案 + images: list, 每个分镜里的图片列表,绘图工具返回 + id: int, 图片id + url: str, 图片url + score: float, 图片评分 + reason: str, 图片评分理由 + +#规定格式 +```json +{ + "scored_image_list": [ + { + "shot_id": "分镜1", + "prompt": "如何生成分镜图片的详细描述", + "action": "分镜视频的动作描述", + "reference": "分镜一和分镜四中的reference图片,作为图片生成的参考图", + "words": "口播文案", + "images": [ + { + "id": 1, + "url": "图片url", + "score": 0.8, + "reason": "图片评分理由,注意,返回的三类评分,三类评分中间用\n换行符分割。" + } + ] + } + ], + "status": { + "success": bool, 是否成功 + "message": str, 错误信息,成功时为空字符串 + } +} +# 注意 +注意:当遇到Agent执行异常,如缺少内容,运行出错,结果不完整,用户输入内容不足以完成任务时,请在status字段中反馈,而不是在业务字段中反馈描述,如有上述问题,业务字段可以为空。只反馈错误即可 +``` +""" + +PROMPT_VIDEO_FORMAT_AGENT = """ +#角色: +你是一个将输入按规定格式输出的格式转换器 +你有两个任务,第一个任务是检查分镜生成的数量和每个分镜生成的视频数量是否正确,不要有丢失或者缺少。 +如果存在丢失或者缺少,直接返回 +"status": { + "success": bool, 失败 + "message": str, 错误信息,解释发生了缺少和丢失的现象 + } +如果不存在缺失的问题,则继续进行格式转换工作 +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 + +#任务描述: +1. 将 评估后的分镜视频列表,将其按 "规定格式" 输出。 + +#评估后的分镜视频列表 + shot_id:分镜1 + prompt: str, 如何生成分镜视频的详细描述 + action: str, 分镜视频的动作描述 + reference: str, 分镜图片的参考url + words: str, 口播文案 + videos: list, 每个分镜里的视频列表,视频生成工具返回 + id: int, 视频id + url: str, 视频url + score: float, 视频评分 + reason: str, 视频评分理由 + +#规定格式 +```json +{ + "scored_video_list": [ + { + "shot_id": "分镜1", + "prompt": "如何生成分镜视频的详细描述", + "action": "分镜视频的动作描述", + "reference": "分镜图片的参考url", + "words": "口播文案", + "videos": [ + { + "id": 1, + "url": "视频url", + "score": 0.8, + "reason": "视频评分理由,注意,返回的三类评分,三类评分中间用\n换行符分割。" + } + ] + } + ], + "status": { + "success": bool, 是否成功 + "message": str, 错误信息,成功时为空字符串 + } +} +# 注意 +注意:当遇到Agent执行异常,如缺少内容,运行出错,结果不完整,用户输入内容不足以完成任务时,请在status字段中反馈,而不是在业务字段中反馈描述,如有上述问题,业务字段可以为空。只反馈错误即可 +``` +""" + +PROMPT_EVALUATE_ITEM_AGENT = """ +### 任务说明 +根据用户的需求,评估分镜图片或分镜视频的质量。 +### 背景介绍 +你是一个电商产品营销系统中的一部分,属于评估系统的核心,你的任务是完成对输入内容(可能是图片可能是视频)的评估。 +### 输入要求 +用户将会提供给你一个输入,输入包含两部分:`生成图片或视频列表`和`参考图片`,你需要对输入的图片进行点评 + +### 输出要求 +你的输出应该是一个json,包括三个部分 +```json +{ + "shot_id": "镜头编号", + "media_id": "媒体编号", + "reason": "评分理由,综合了美学、画质、一致性三个维度进行点评,具体的理由写法庆参考下文`理由要点`部分"(要求全程中文,包括标点符号也是中文), + "scores": "综合评分,综合了美学、画质、一致性三个维度进行评分", 评分范围为0~1分,保留两位小数 +} +``` +### 理由要点 +1. 一致性评估,用于评估生成的图像或视频与参考图像或视频的一致性。 +2. 美学评估,用于评估图像或视频的美学质量。 +3. 画质评估,用于评估图像或视频的画质质量。 +针对提供的图像/视频,按以下要求完成多维度评估分析,输出需分模块呈现: +美学评分解释:从构图平衡度、色彩搭配(冷暖对比 / 和谐度 / 艺术感)、光影表现(通透感 / 细节还原 / 氛围营造)、创意突破性、情感共鸣深度等维度,分析图像的美学表现,说明其对应评分的合理性,明确是否处于高分段及核心原因; +画质评分解释:从色彩与光影(饱和度 / 层次感 / 真实性)、细节呈现(清晰度 / 锐度 / 微观纹理还原)、构图与质感(主体布局 / 背景协调性 / 材质区分度)、视觉完整性(无噪点 / 无失真 / 元素融合度)等维度,结合技术层面(如分辨率、光影合理性)分析画质优势,说明与高画质评分的逻辑一致性(若涉及具体模型,需关联模型名称); +一致性评估(仅对有参考图片的):对比生成图像与参考图像的关键视觉元素(瓶身造型、包装标签 / Logo、背景场景、主体摆放形式、核心视觉特征),给出一致性评分(精确到小数点后 1 位),并解释评分依据(关联关键元素差异与关联度); +各模块分析需紧扣评分逻辑,既说明优势维度,也指出不足(若有),语言需专业且贴合视觉审美与技术评估场景,模块间用分号分隔。 +注意:评估的原因部分,请全部使用中文,包括标点符号也要是中文版的。 +返回的三类评分,中间用\n换行符分割。 +""" diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/__init__.py b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/geval.py b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/geval.py new file mode 100644 index 00000000..e719535c --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/geval.py @@ -0,0 +1,349 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import json +import os +from typing import Any +import aiohttp +import urllib.parse + +from openai import AsyncOpenAI +from veadk.utils.logger import get_logger +from evaluate_agent.utils.types import ( + EvaluationList, + ScoredImageList, + ScoredVideoList, +) +from evaluate_agent.prompt import PROMPT_EVALUATE_ITEM_AGENT + +# evaluate_agent_instruction = os.getenv("PROMPT_EVALUATE_ITEM_AGENT") +evaluate_agent_instruction = PROMPT_EVALUATE_ITEM_AGENT +logger = get_logger(__name__) + +# 短链接服务配置 +shorten_url_service_url = os.getenv("SHORTEN_URL_SERVICE_URL", None) +assert shorten_url_service_url, "SHORTEN_URL_SERVICE_URL is not set" + + +async def resolve_short_url(short_url: str) -> str: + """ + 将短链接还原为原始URL + + Args: + short_url: 短链接URL + + Returns: + 原始URL,如果解析失败则返回短链接本身 + """ + if not shorten_url_service_url: + return short_url + + try: + # 从短链接中提取短码 + # 短链接格式: http://127.0.0.1:8005/t/AbC123 或 http://127.0.0.1:8005/t/video/AbC123 + parsed_url = urllib.parse.urlparse(short_url) + path_parts = parsed_url.path.strip("/").split("/") + + if len(path_parts) >= 2 and path_parts[0] == "t": + # 调用短链接服务的重定向接口来获取原始URL + async with aiohttp.ClientSession() as session: + # 使用GET请求获取原始URL(短链接服务直接返回原始URL字符串) + async with session.get(short_url) as response: + if response.status == 200: + # 短链接服务直接返回原始URL字符串 + original_url = await response.text() + original_url = original_url.strip().strip('"') + logger.debug( + f"Successfully resolved short URL: {short_url} -> {original_url}" + ) + return original_url + else: + logger.warning( + f"Failed to resolve short URL: {short_url}, status: {response.status}" + ) + return short_url + else: + logger.warning(f"Not a valid short URL format: {short_url}") + return short_url + + except Exception as e: + logger.error(f"Error resolving short URL {short_url}: {e}") + # 如果解析失败,返回原始短链接 + return short_url + + +async def repair_evaluate_input( + media_list: list[dict[str, Any]], media_type: str = "image" +) -> list[list[dict[str, Any]]]: + if media_type == "image": + MEDIA_URL_FIELD = "image_url" + MEDIA_TYPE_FIELD = "input_image" + MEDIA = "图片" + else: + MEDIA_URL_FIELD = "video_url" + MEDIA_TYPE_FIELD = "input_video" + MEDIA = "视频" + result = [] + for shot in media_list: + # 这是一组shot + shot_id = shot.get("shot_id", "") + reference_media_list = shot.get("reference", []) + if isinstance(reference_media_list, str): + reference_media_list = [reference_media_list] + media_url_list = [image["url"] for image in shot.get("media", [])] + # 首先构造reference的,这个在同一个shot内通用 + reference_part_list = [] + for reference_media in reference_media_list: + if len(reference_media.strip()) == 0: + continue + + # 如果启用了短链接服务,尝试解析reference图片URL + resolved_reference_url = reference_media + if shorten_url_service_url: + resolved_reference_url = await resolve_short_url(reference_media) + + reference_part = { + "type": "input_image", + "image_url": resolved_reference_url, + } # 参考的只会是图片 + reference_part_list.append(reference_part) + + for i, media_url in enumerate(media_url_list): + # 如果启用了短链接服务,尝试解析media URL + resolved_media_url = media_url + if shorten_url_service_url: + resolved_media_url = await resolve_short_url(media_url) + + text_part = { + "type": "input_text", + "text": ( + f"本次{MEDIA}的shot_id={shot_id}, media_id={i},你一共收到{len(reference_media_list) + 1}份媒体素材,其中第1条{MEDIA}是你需要评价的{MEDIA}" + + f", 后续的共{len(reference_media_list)}张图片均为参考图片。" + if len(reference_media_list) > 0 + else "" + "请按照要求对媒体素材进行评价并输出符合要求的结果。" + ), + } + + user_prompt = {"role": "user", "content": []} + media_part = {"type": MEDIA_TYPE_FIELD, MEDIA_URL_FIELD: resolved_media_url} + user_prompt["content"] = [text_part] + [media_part] + reference_part_list + + result.append(user_prompt) + + return result + + +async def evaluate_media( + media_list: list[dict[str, Any]], media_type: str = "image" +) -> dict: + """ + Evaluate a list of storyboard shots, each containing multiple generated media items, + and return a score list and reasoning for each shot. + + This tool is designed to perform qualitative or model-based evaluation of + storyboard media (e.g., generated images or videos from prompts or diffusion models) + based on visual quality, temporal consistency, and coherence with reference materials. + + Each element in `media_list` represents one storyboard shot and includes its + metadata, descriptive text, and a list of generated media for evaluation. + + Args: + media_list (List[Dict[str, Any]]): + A list of storyboard shot data. Each shot should include: + + - **shot_id** (str): The unique identifier for the storyboard shot. + - **prompt** (str): A detailed text description used to generate the media. + - **action** (str): The visual or narrative action happening in this shot. + - **reference** (str): A reference media URL (optional), used as visual guidance. + - **media** (List[Dict[str, Any]]): The list of generated media items for this shot, + each containing: + - **id** (int): The media ID. + - **url** (str): The URL of the generated media (image or video). + media_type (str): The type of media to be evaluated. Defaults to "image", only in ["image", "video"]. + Returns: + List[Dict[str, Any]]: A list of evaluation results, one per shot. + Each result includes the shot list: + - **shot_id** (str): The ID of the evaluated shot. + - **scores** (List[float]): A list of evaluation scores (one per media item) + indicating visual or semantic quality. + - **reason** (str): A textual explanation summarizing the evaluation, + such as prompt alignment, visual coherence, or artistic quality. + Example: + evaluate_media([ + ... { + ... "shot_id": "shot_1", + ... "prompt": "A samurai walking through cherry blossoms at sunset", + ... "action": "Character slowly moves from left to right", + ... "reference": "https://example.com/ref1.mp4", + ... "media": [ + ... {"id": 1, "url": "https://example.com/clip1.mp4"}, + ... {"id": 2, "url": "https://example.com/clip2.mp4"} + ... ] + ... } + ... ]) + """ + # 接下来是根据shot id聚合在一起 + logger.debug(f"Start to evaluate {media_type} list: items={len(media_list)}") + m_content = await repair_evaluate_input(media_list, media_type=media_type) + logger.debug(f"Repaired {media_type} list: messages={len(m_content)}") + # 创建异步OpenAI客户端 + client = AsyncOpenAI( + base_url=os.getenv("MODEL_AGENT_API_BASE"), + api_key=os.getenv("MODEL_AGENT_API_KEY"), + ) + + # 定义异步处理单个消息的函数 + async def process_message(msg): + response = await client.responses.create( + model=os.getenv("MODEL_EVALUATE_ITEM", "doubao-seed-1-6-flash-250828"), + instructions=evaluate_agent_instruction, + input=[msg], + text={ + "format": { + "type": "json_schema", + "name": "EvaluationList", + "schema": EvaluationList.model_json_schema(), + "strict": True, + } + }, + extra_body={"thinking": {"type": "disabled"}}, + ) + return json.loads(response.output_text).get("evaluation", {}) + + # 使用 asyncio.gather 并发处理所有消息 + result = await asyncio.gather(*(process_message(msg) for msg in m_content)) + + logger.debug(f"Finish to evaluate {media_type} list: result_items={len(result)}") + # 后处理:按shot_id合并结果,并确保media_id顺序 + merged_result = {} + for item in result: + shot_id = item.get("shot_id") + # 将media_id转换为整数以便正确排序 + media_id = int(item.get("media_id", 0)) + + if shot_id not in merged_result: + merged_result[shot_id] = { + "shot_id": shot_id, + "items": [], # 先存储所有项,包含media_id以便排序 + } + merged_result[shot_id]["items"].append( + (media_id, item.get("scores"), item.get("reason")) + ) + + # 对每个shot_id的结果按media_id升序排序,并构建最终格式 + final_result = [] + for shot_id, data in merged_result.items(): + # 按media_id升序排序 + sorted_items = sorted(data["items"], key=lambda x: x[0]) + + # 提取scores和reason列表 + scores = [item[1] for item in sorted_items] + reason = [item[2] for item in sorted_items] + + final_result.append({"shot_id": shot_id, "scores": scores, "reason": reason}) + + logger.debug( + f"Finish to evaluate {media_type} list: final_result_items={len(final_result)}" + ) + + # 处理返回值:直接构造成 ScoredImageList / ScoredVideoList 并转为字典 + # 将原始输入按 shot_id 建立索引,方便补充元数据 + shot_index = {shot.get("shot_id", ""): shot for shot in media_list} + + def normalize_reference(ref_val): + if isinstance(ref_val, list): + return ",".join(ref_val) + return ref_val or "" + + # 根据媒体类型组装对应的输出结构 + if media_type == "image": + scored_image_list = [] + for shot_id, data in merged_result.items(): + shot = shot_index.get(shot_id, {}) + media_entries = shot.get("media", []) + # 将评估结果映射为 {media_id: (score, reason)} + eval_map = {mi: (score, reason) for mi, score, reason in data["items"]} + + images_items = [] + for idx, media in enumerate(media_entries): + if idx not in eval_map: + continue + score, reason = eval_map[idx] + images_items.append( + { + "id": int(media.get("id", idx)), + "url": media.get("url", ""), + "score": float(score) if score is not None else 0.0, + "reason": reason or "", + } + ) + + image_obj = { + "shot_id": shot_id, + "prompt": shot.get("prompt", ""), + "action": shot.get("action", ""), + "reference": normalize_reference(shot.get("reference")), + "words": shot.get("words", ""), + "images": images_items, + } + scored_image_list.append(image_obj) + + output = { + "scored_image_list": scored_image_list, + "status": {"success": True, "message": ""}, + } + try: + model = ScoredImageList.model_validate(output) + return model.model_dump() + except Exception: + return output + + else: + scored_video_list = [] + for shot_id, data in merged_result.items(): + shot = shot_index.get(shot_id, {}) + media_entries = shot.get("media", []) + eval_map = {mi: (score, reason) for mi, score, reason in data["items"]} + + videos_items = [] + for idx, media in enumerate(media_entries): + if idx not in eval_map: + continue + score, reason = eval_map[idx] + videos_items.append( + { + "id": int(media.get("id", idx)), + "url": media.get("url", ""), + "score": float(score) if score is not None else 0.0, + "reason": reason or "", + } + ) + + video_obj = { + "shot_id": shot_id, + "prompt": shot.get("prompt", ""), + "action": shot.get("action", ""), + "reference": normalize_reference(shot.get("reference")), + "words": shot.get("words", ""), + "videos": videos_items, + } + scored_video_list.append(video_obj) + + output = { + "scored_video_list": scored_video_list, + "status": {"success": True, "message": ""}, + } + try: + model = ScoredVideoList.model_validate(output) + return model.model_dump() + except Exception: + return output diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/__init__.py b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/types.py b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/types.py new file mode 100644 index 00000000..fdb73704 --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/types.py @@ -0,0 +1,93 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +from google.genai import types +from pydantic import BaseModel, Field + +json_response_config = types.GenerateContentConfig( + response_mime_type="application/json", max_output_tokens=18000 +) + +max_output_tokens_config = types.GenerateContentConfig(max_output_tokens=18000) + + +class Status(BaseModel): + """A status.""" + + success: bool = Field(description="如果结果成功则为True,否则为False") + message: str = Field(description="运行成功该字段为空,否则为错误信息") + + +class ImageItem(BaseModel): + """An image.""" + + id: int = Field(description="The shot id of the image") + url: str = Field(description="The url of the image") + score: float = Field(description="The score of the image") + reason: str = Field(description="The reason for the score") + + +class Image(BaseModel): + """Image list for a shot.""" + + shot_id: str = Field(description="The shot id") + prompt: str = Field(description="The description for generating image") + action: str = Field(description="The description for generating videos") + reference: str = Field(description="The reference url for the shot") + words: str = Field(description="The words for the shot") + images: list[ImageItem] = Field(description="The list of images") + + +class ScoredImageList(BaseModel): + """Image list.""" + + scored_image_list: list[Image] = Field(description="The list of images") + status: Optional[Status] = Field(description="The status of the result") + + +class VideoItem(BaseModel): + """A video.""" + + id: int = Field(description="The shot id of the video") + url: str = Field(description="The url of the video") + score: float = Field(description="The score of the video") + reason: str = Field(description="The reason for the score") + + +class Video(BaseModel): + """Video list for a shot.""" + + shot_id: str = Field(description="The shot id") + prompt: str = Field(description="The description for generating image") + action: str = Field(description="The description for generating videos") + reference: str = Field(description="The reference url for the shot") + words: str = Field(description="The words for the shot") + videos: list[VideoItem] = Field(description="The list of videos") + + +class ScoredVideoList(BaseModel): + """Video list.""" + + scored_video_list: list[Video] = Field(description="The list of videos") + status: Optional[Status] = Field(description="The status of the result") + + +class EvaluationResult(BaseModel): + shot_id: str = Field(..., description="镜头编号") + media_id: str = Field(..., description="媒体编号") + reason: str = Field(..., description="评分理由") + scores: float = Field(..., description="综合评分") + + +class EvaluationList(BaseModel): + evaluation: EvaluationResult = Field(..., description="评估结果列表") diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/requirements.txt b/demohouse/mutimedia/backend/app/evaluate-agent/src/requirements.txt new file mode 100644 index 00000000..fa84f939 --- /dev/null +++ b/demohouse/mutimedia/backend/app/evaluate-agent/src/requirements.txt @@ -0,0 +1,5 @@ +veadk-python==0.2.28 +fastapi +uvicorn[standard] +google-adk==1.18.0 +json-repair \ No newline at end of file diff --git a/demohouse/mutimedia/backend/app/main.py b/demohouse/mutimedia/backend/app/main.py new file mode 100644 index 00000000..d190e7cb --- /dev/null +++ b/demohouse/mutimedia/backend/app/main.py @@ -0,0 +1,326 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import requests +import json +import traceback +import logging +import time +import os + +test_dict = { + "local": "http://localhost:8004/{}", # 0: do not use +} + +# 全局变量,用于存储 URL 模板 +url_template = test_dict["local"] + + +def save_result(result, filename): + with open(filename, "w", encoding="utf-8") as f: + json.dump(result, f, ensure_ascii=False, indent=4) + + +def create_session(app_name, user_id): + url = url_template.format(f"apps/{app_name}/users/{user_id}/sessions") + + payload = {} + headers = {} + + response = requests.request("POST", url, headers=headers, data=payload) + + session_id = json.loads(response.text)["id"] + logger.info(f"main output: session_id: {session_id}") + return session_id + + +def pick_best_image(evaluate_image_result): + best_image_list = [] + scored_image_list = evaluate_image_result["scored_image_list"] + for shot in scored_image_list: + # 从 images 列表中挑选最高分的那一张,将分数从字符串转换为浮点数 + best_image = max(shot["images"], key=lambda x: max(float(x.get("score")), 0)) + + # 构造新的 shot 结构 + best_shot = { + "shot_id": shot["shot_id"], + "prompt": shot["prompt"], + "action": shot["action"], + "reference": shot["reference"], + "words": shot["words"], + "image": {"id": best_image["id"], "url": best_image["url"]}, + } + + best_image_list.append(best_shot) + return best_image_list + + +def pick_best_video(evaluate_video_result): + best_video_list = [] + scored_video_list = evaluate_video_result["scored_video_list"] + + for shot in scored_video_list: + # 从 videos 列表中挑选最高分的那一张,将分数从字符串转换为浮点数 + best_video = max(shot["videos"], key=lambda x: max(float(x.get("score")), 0)) + + # 构造新的 shot 结构 + best_shot = { + "shot_id": shot["shot_id"], + "prompt": shot["prompt"], + "action": shot["action"], + "reference": shot["reference"], + "words": shot["words"], + "video": {"id": best_video["id"], "url": best_video["url"]}, + } + + best_video_list.append(best_shot) + return best_video_list + + +def run_sse(app_name, user_id, session_id, text): + url = url_template.format("run_sse") + # logger.info(f"main output: run_sse url: {url}") + payload = json.dumps( + { + "app_name": app_name, + "user_id": user_id, + "session_id": session_id, + "new_message": {"role": "user", "parts": [{"text": text}]}, + } + ) + headers = {"Content-Type": "application/json"} + + try: + # ❶ 去掉 stream=True,直接等完整响应 + response = requests.post(url, headers=headers, data=payload, timeout=6000) + response.raise_for_status() # 如果返回 4xx / 5xx,会抛出异常 + logger.info(f"原始响应: {response.text[:500]}...") # 打印前500字符防止太长 + + # ❷ 按行解析最后一个 data: 块(因为服务器仍返回 SSE 格式) + data_lines = [ + line for line in response.text.splitlines() if line.startswith("data: ") + ] + if not data_lines: + logger.warning("未找到任何 data: 块") + return None + + last_data = data_lines[-1][6:] # 去掉 'data: ' 前缀 + event = json.loads(last_data) + logger.info( + f"最后一个 event: {json.dumps(event, ensure_ascii=False, indent=2)}" + ) + + # ❸ 提取最终内容(如果结构固定) + return event["content"]["parts"][0]["text"] + + except requests.exceptions.Timeout: + logger.error("请求超时(超过6000秒)") + except requests.exceptions.RequestException as e: + logger.error(f"请求失败: {e}") + except (KeyError, json.JSONDecodeError) as e: + logger.error(f"解析响应失败: {e}") + + return None + + +def main(user_need): + # step 0: create session + try: + logger.info("main output: 0. 创建 session...") + session_id = create_session("demo_app", "user") + save_result(session_id, tmp_json_dir + "0_session_id.json") + except Exception as e: + logger.info(f"main output: 0. create session failed: {e}") + traceback.print_exc() + return + + # step 1: generate video config + try: + logger.info("main output: 1. 生成视频配置...") + generate_video_config_input = user_need + "\n生成视频配置" + video_config = run_sse( + "demo_app", "user", session_id, generate_video_config_input + ) + logger.info(f"main output: 1. video_config: {video_config}") + save_result(json.loads(video_config), tmp_json_dir + "1_video_config.json") + except Exception as e: + logger.info(f"main output: 1. run sse failed: {e}") + traceback.print_exc() + return + + # step 1.1: parse video_type + try: + logger.info("main output: 1.1 解析video_type...") + logger.info(f"main output: 1.1 video_config: {video_config}") + video_type = json.loads(video_config)["video_type"] + except Exception as e: + logger.info(f"main output: 1.1 get video_type failed: {e}") + traceback.print_exc() + return + + # step 2: generate shot list + try: + logger.info("main output: 2. 生成分镜脚本...") + generate_shot_list_input = ( + "请根据如下video_config,生成分镜脚本\n\n" + video_config + ) + shot_list = run_sse("demo_app", "user", session_id, generate_shot_list_input) + logger.info(f"main output: 2. shot_list: {shot_list}") + save_result(json.loads(shot_list), tmp_json_dir + "2_shot_list.json") + except Exception as e: + logger.info(f"main output: 2. run sse failed: {e}") + traceback.print_exc() + return + + # step 3: generate image list + try: + logger.info("main output: 3. 生成分镜图片...") + generate_image_list_input = "请根据如下shot_list,生成分镜图片\n\n" + shot_list + image_list = run_sse("demo_app", "user", session_id, generate_image_list_input) + logger.info(f"main output: 3. image_list: {image_list}") + save_result(json.loads(image_list), tmp_json_dir + "3_image_list.json") + except Exception as e: + logger.info(f"main output: 3. run sse failed: {e}") + traceback.print_exc() + return + + # step 4: evaluate image list + try: + logger.info("main output: 4. 评估分镜图片...") + evaluate_image_list_input = ( + "请根据如下分镜图片列表image_list,评估分镜图片的质量\n\n" + image_list + ) + evaluate_image_result = run_sse( + "demo_app", "user", session_id, evaluate_image_list_input + ) + logger.info(f"main output: 4. evaluate_image_result: {evaluate_image_result}") + save_result( + json.loads(evaluate_image_result), + tmp_json_dir + "4_evaluate_image_list.json", + ) + except Exception as e: + logger.info(f"main output: 4. run sse failed: {e}") + traceback.print_exc() + return + + # step 4.1: pick best image + try: + logger.info("main output: 4.1 选择最佳分镜图片...") + best_image_list = pick_best_image(json.loads(evaluate_image_result)) + save_result(best_image_list, tmp_json_dir + "4_1_selected_image_list.json") + logger.info(f"main output: 4.1 best_image_list: {best_image_list}") + except Exception as e: + logger.info(f"main output: 4.1 pick best image failed: {e}") + traceback.print_exc() + return + + # step 5: generate video list + try: + logger.info("main output: 5. 生成分镜视频...") + generate_video_list_input = ( + "请根据如下image_list,生成分镜视频、每个shot生成4个视频\n\n" + + str(best_image_list) + ) + video_list = run_sse("demo_app", "user", session_id, generate_video_list_input) + logger.info(f"main output: 5. video_list: {video_list}") + save_result(json.loads(video_list), tmp_json_dir + "5_video_list.json") + except Exception as e: + logger.info(f"main output: 5. run sse failed: {e}") + traceback.print_exc() + return + + # step 6: evaluate video list + try: + logger.info("main output: 6. 评估分镜视频...") + evaluate_video_list_input = ( + "请根据如下分镜视频列表video_list,评估分镜视频的质量\n\n" + str(video_list) + ) + logger.info( + f"main output: 6. evaluate_video_list_input: {evaluate_video_list_input}" + ) + evaluate_video_result = run_sse( + "demo_app", "user", session_id, evaluate_video_list_input + ) + logger.info(f"main output: 6. evaluate_video_result: {evaluate_video_result}") + save_result( + json.loads(evaluate_video_result), + tmp_json_dir + "6_evaluate_video_list.json", + ) + except Exception as e: + logger.info(f"main output: 6. run sse failed: {e}") + traceback.print_exc() + return + + # step 6.1: pick best video + try: + logger.info("main output: 6.1 选择最佳分镜视频...") + best_video_list = pick_best_video(json.loads(evaluate_video_result)) + save_result(best_video_list, tmp_json_dir + "6_1_selected_video_list.json") + logger.info(f"main output: 6.1 best_video_list: {best_video_list}") + except Exception as e: + logger.info(f"main output: 6.1 pick best video failed: {e}") + traceback.print_exc() + return + + # step 7: generate final video + try: + logger.info("main output: 7. 生成最终视频...") + generate_final_video_input = f"进行{video_type}视频的合成\n\n" + str( + best_video_list + ) + + logger.info(f"main output: 7. session_id: {session_id}") + logger.info( + f"main output: 7. generate_final_video_input: {generate_final_video_input}" + ) + + final_video = run_sse( + "demo_app", "user", session_id, generate_final_video_input + ) + logger.info(f"main output: 7. final_video: {final_video}") + save_result(final_video, tmp_json_dir + "7_final_video.json") + except Exception as e: + logger.info(f"main output: 7. run sse failed: {e}") + traceback.print_exc() + return + + +if __name__ == "__main__": + # 设置默认运行模式为 local + t_type = "local" + + # 创建临时目录 + time_start = t_type + "-" + str(time.time()) + tmp_json_dir = "tmp-json/" + str(time_start) + "/" + os.makedirs(tmp_json_dir, exist_ok=True) + + # 设置日志 + log_name = time.time() + log_file_path = tmp_json_dir + "full/" + os.makedirs(log_file_path, exist_ok=True) + log_file_name = log_file_path + str(log_name) + ".log" + + logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s [%(levelname)s]: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + handlers=[ + logging.FileHandler(log_file_name, encoding="utf-8"), # 写入文件 + logging.StreamHandler(), # 输出到控制台 + ], + ) + logger = logging.getLogger(__name__) + + user_need = "帮我生成杨梅饮料的宣传视频(商品展示视频),图片素材为:https://ark-tutorial.tos-cn-beijing.volces.com/multimedia/%E6%9D%A8%E6%A2%85%E9%A5%AE%E6%96%99.jpg" + logger.info(f"!!!! main output: test_type:{t_type}, url_template: {url_template}") + + # 调用主函数 + main(user_need) diff --git a/demohouse/mutimedia/backend/app/market-agent/__init__.py b/demohouse/mutimedia/backend/app/market-agent/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/market-agent/config.yaml.example b/demohouse/mutimedia/backend/app/market-agent/config.yaml.example new file mode 100644 index 00000000..a3182e07 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/config.yaml.example @@ -0,0 +1,23 @@ +model: + agent: + provider: openai + # name: doubao-seed-1-6-251015 # Do not use doubao-1-5-pro-256k-250115, because the model output is not stable + name: doubao-seed-1-6-250615 + api_base: https://ark.cn-beijing.volces.com/api/v3/ + api_key: + +volcengine: + # [optional] for Viking DB and `web_search` tool + access_key: + secret_key: + +logging: + # ERROR + # WARNING + # INFO + # DEBUG + level: DEBUG + +thinking: + market_agent: disabled + format_agent: disabled diff --git a/demohouse/mutimedia/backend/app/market-agent/src/README.md b/demohouse/mutimedia/backend/app/market-agent/src/README.md new file mode 100644 index 00000000..43e08984 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/README.md @@ -0,0 +1,39 @@ +# 营销策划 Agent + +用来生成视频脚本。 +## 输入输出定义 + +### 输入 + +图文输入 + +```python +class InputMessage(BaseModel): + text: str + image: str # image url or base64 +``` + +文本输入 + +```python +class InputMessage(BaseModel): + text: str +``` + +### 输出 + +```python +class ProductInfo(BaseModel): + name: str + selling_point: str + resources: list # 素材图片url + +class OutputMessage(BaseModel): + video_type: str + product_info: ProductInfo + video_advice: str +``` + +## 工具 + +1. [联网搜索 MCP](https://www.volcengine.com/docs/82379/1338552) diff --git a/demohouse/mutimedia/backend/app/market-agent/src/__init__.py b/demohouse/mutimedia/backend/app/market-agent/src/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/market-agent/src/agent.py b/demohouse/mutimedia/backend/app/market-agent/src/agent.py new file mode 100644 index 00000000..09420faf --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/agent.py @@ -0,0 +1,22 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from market_agent.agent import agent # type: ignore + +from veadk.memory.short_term_memory import ShortTermMemory +from veadk.types import AgentRunConfig + +# [required] instantiate the agent run configuration +agent_run_config = AgentRunConfig( + app_name="market_agent", + agent=agent, # type: ignore + short_term_memory=ShortTermMemory(backend="local"), # type: ignore +) diff --git a/demohouse/mutimedia/backend/app/market-agent/src/app.py b/demohouse/mutimedia/backend/app/market-agent/src/app.py new file mode 100644 index 00000000..a8271593 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/app.py @@ -0,0 +1,217 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from contextlib import asynccontextmanager +from typing import Callable + +from agent import agent_run_config +from market_agent.tools.web_parser_local import _init_browser + +from fastapi import FastAPI +from fastapi.routing import APIRoute + +from fastmcp import FastMCP + +from starlette.routing import Route + +from google.adk.a2a.utils.agent_card_builder import AgentCardBuilder +from a2a.types import AgentProvider + +from veadk.a2a.ve_a2a_server import init_app +from veadk.runner import Runner +from veadk.tracing.telemetry.exporters.apmplus_exporter import APMPlusExporter +from veadk.tracing.telemetry.exporters.cozeloop_exporter import CozeloopExporter +from veadk.tracing.telemetry.exporters.tls_exporter import TLSExporter +from veadk.tracing.telemetry.opentelemetry_tracer import OpentelemetryTracer +from veadk.types import AgentRunConfig +from veadk.utils.logger import get_logger +from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator +from opentelemetry import context + +logger = get_logger(__name__) + +assert isinstance(agent_run_config, AgentRunConfig), ( + f"Invalid agent_run_config type: {type(agent_run_config)}, expected `AgentRunConfig`" +) + +app_name = agent_run_config.app_name +agent = agent_run_config.agent +short_term_memory = agent_run_config.short_term_memory + +VEFAAS_REGION = os.getenv("APP_REGION", "cn-beijing") +VEFAAS_FUNC_ID = os.getenv("_FAAS_FUNC_ID", "") +agent_card_builder = AgentCardBuilder( + agent=agent, + provider=AgentProvider( + organization="Volcengine Agent Development Kit (VeADK)", + url=f"https://console.volcengine.com/vefaas/region:vefaas+{VEFAAS_REGION}/function/detail/{VEFAAS_FUNC_ID}", + ), +) + + +def load_tracer() -> None: + EXPORTER_REGISTRY = { + "VEADK_TRACER_APMPLUS": APMPlusExporter, + "VEADK_TRACER_COZELOOP": CozeloopExporter, + "VEADK_TRACER_TLS": TLSExporter, + } + + exporters = [] + for env_var, exporter_cls in EXPORTER_REGISTRY.items(): + if os.getenv(env_var, "").lower() == "true": + if ( + agent.tracers + and isinstance(agent.tracers[0], OpentelemetryTracer) + and any(isinstance(e, exporter_cls) for e in agent.tracers[0].exporters) + ): + logger.warning( + f"Exporter {exporter_cls.__name__} is already defined in agent.tracers[0].exporters. These two exporters will be used at the same time. As a result, your data may be uploaded twice." + ) + else: + exporters.append(exporter_cls()) + + tracer = OpentelemetryTracer(name="veadk_tracer", exporters=exporters) + agent_run_config.agent.tracers.extend([tracer]) + + +def build_mcp_run_agent_func() -> Callable: + runner = Runner( + agent=agent, + short_term_memory=short_term_memory, + app_name=app_name, + user_id="", + ) + + async def run_agent( + user_input: str, + user_id: str = "mcp_user", + session_id: str = "mcp_session", + ) -> str: + # Set user_id for runner + runner.user_id = user_id + + # Running agent and get final output + final_output = await runner.run( + messages=user_input, + session_id=session_id, + ) + return final_output + + run_agent_doc = f"""{agent.description} + Args: + user_input: User's input message (required). + user_id: User identifier. Defaults to "mcp_user". + session_id: Session identifier. Defaults to "mcp_session". + Returns: + Final agent response as a string.""" + + run_agent.__doc__ = run_agent_doc + + return run_agent + + +async def agent_card() -> dict: + agent_card = await agent_card_builder.build() + return agent_card.model_dump() + + +async def get_cozeloop_space_id() -> dict: + return { + "space_id": os.getenv( + "OBSERVABILITY_OPENTELEMETRY_COZELOOP_SERVICE_NAME", default="" + ) + } + + +load_tracer() + +# Build a run_agent function for building MCP server +run_agent_func = build_mcp_run_agent_func() + +a2a_app = init_app( + server_url="0.0.0.0", + app_name=app_name, + agent=agent, + short_term_memory=short_term_memory, +) + +a2a_app.post("/run_agent", operation_id="run_agent", tags=["mcp"])(run_agent_func) +a2a_app.get("/agent_card", operation_id="agent_card", tags=["mcp"])(agent_card) +a2a_app.get( + "/get_cozeloop_space_id", operation_id="get_cozeloop_space_id", tags=["mcp"] +)(get_cozeloop_space_id) + +# === Build mcp server === + +mcp = FastMCP.from_fastapi(app=a2a_app, name=app_name, include_tags={"mcp"}) + +# Create MCP ASGI app +mcp_app = mcp.http_app(path="/", transport="streamable-http") + + +# Combined lifespan management +@asynccontextmanager +async def combined_lifespan(app: FastAPI): + # 初始化网页解析浏览器 + await _init_browser() + async with mcp_app.lifespan(app): + yield + + +# Create main FastAPI app with combined lifespan +app = FastAPI( + title=a2a_app.title, + version=a2a_app.version, + lifespan=combined_lifespan, + openapi_url=None, + docs_url=None, + redoc_url=None, +) + + +@app.middleware("http") +async def otel_context_middleware(request, call_next): + carrier = { + "traceparent": request.headers.get("Traceparent"), + "tracestate": request.headers.get("Tracestate"), + } + logger.debug(f"traceparent exists: {carrier['traceparent'] is not None}") + if carrier["traceparent"] is None: + return await call_next(request) + else: + ctx = TraceContextTextMapPropagator().extract(carrier=carrier) + token = context.attach(ctx) + try: + response = await call_next(request) + finally: + context.detach(token) + return response + + +# Mount A2A routes to main app +for route in a2a_app.routes: + app.routes.append(route) + +# Mount MCP server at /mcp endpoint +app.mount("/mcp", mcp_app) + + +# remove openapi routes +paths = ["/openapi.json", "/docs", "/redoc"] +new_routes = [] +for route in app.router.routes: + if isinstance(route, (APIRoute, Route)) and route.path in paths: + continue + new_routes.append(route) +app.router.routes = new_routes + +# === Build mcp server end === diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/__init__.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/__init__.py new file mode 100644 index 00000000..9cebfb90 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import agent + +__all__ = [ + "agent", +] diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/agent.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/agent.py new file mode 100644 index 00000000..d4deaba2 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/agent.py @@ -0,0 +1,54 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from veadk import Agent +from veadk.agents.sequential_agent import SequentialAgent +from veadk.config import getenv +from veadk.tools.builtin_tools.web_search import web_search + +from market_agent.prompt import PROMPT_MARKET_AGENT, PROMPT_FORMAT_AGENT +from .hook.format_hook import fix_output_format_with_filter +from .tools.link_reader import read_url_link +from .utils.types import VideoConfig, json_response_config + +market_agent = Agent( + name="market_agent", + description="根据用户的需求,生成视频配置脚本", + # instruction=getenv("PROMPT_MARKET_AGENT"), + instruction=PROMPT_MARKET_AGENT, + tools=[web_search, read_url_link], + output_key="video_config", + model_extra_config={ + "extra_body": {"thinking": {"type": getenv("THINKING_MARKET_AGENT", "enabled")}} + }, +) + +format_agent = Agent( + name="format_agent", + description="将模型的输出格式化", + # instruction=getenv("PROMPT_FORMAT_AGENT"), + instruction=PROMPT_FORMAT_AGENT, + generate_content_config=json_response_config, + output_schema=VideoConfig, + output_key="video_config", + after_model_callback=[fix_output_format_with_filter], + model_extra_config={ + "extra_body": {"thinking": {"type": getenv("THINKING_FORMAT_AGENT", "enabled")}} + }, +) + +agent = SequentialAgent( + name="root_agent", + description="根据用户的需求,生成视频配置脚本", + sub_agents=[market_agent, format_agent], +) + +root_agent = agent diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/hook/__init__.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/hook/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/hook/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/hook/format_hook.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/hook/format_hook.py new file mode 100644 index 00000000..7083a993 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/hook/format_hook.py @@ -0,0 +1,120 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import json_repair +from typing import Optional +from google.adk.agents.callback_context import CallbackContext +from google.adk.events import Event +from google.adk.models import LlmResponse +from pydantic import ValidationError +from veadk.utils.logger import get_logger + +from market_agent.tools.is_image import batch_check_images + +logger = get_logger(__name__) + + +async def fix_output_format_with_filter( + *, + callback_context: CallbackContext, + llm_response: LlmResponse, + model_response_event: Optional[Event] = None, +) -> Optional[LlmResponse]: + """ + 检查输出格式是否符合要求,并尝试修复 + 多种情况 + 场景1. (正常->正常)无schema,直接返回原始 llm_responses。 + 场景2. (正常->正常)有schema,输出无需修复且符合schema,返回 llm_responses。(在代码里是替换了一下,但实际上没区别) + 场景3 (异常->异常)有schema,输出无需修复但不符合schema,返回 原始 llm_responses。输出日志 + 场景4. (异常->异常)有schema,输出需要修复,修复失败,返回原始 llm_responses。输出日志 + 场景5. (**异常->正常**)有schema,输出需要修复,修复成功后符合schema,返回 修正后的 llm_responses。 + 场景6. (异常->异常)有schema,输出需要修复,修复成功后不符合schema,返回 原始 llm_responses。输出日志 + + # 过滤部分 + 针对market format agent 出现了resources字段中url 非图片的情况,这里直接通过ContentType过滤 + """ + agent = callback_context._invocation_context.agent + user_id = callback_context._invocation_context.user_id + session_id = callback_context._invocation_context.session.id + invocation_id = callback_context.invocation_id + output_schema = agent.output_schema + + message = f"[fix_output_format]: agent_name:{agent.name} user_id:{user_id} session_id:{session_id} invocation_id:{invocation_id}" + fixed = False + + # 1. 如果没有直接return即可 + if not output_schema: + logger.debug(f"{message}\nNo output_schema, return original llm_response") + return llm_response # 场景1(成功) + + text = llm_response.content.parts[0].text + logger.debug(f"{message}\nOriginal llm_response length: {len(text)}") + + # 2. 检查输出格式是否符合output_schema要求 + try: + output = json.loads(text) + except json.JSONDecodeError: + # 尝试修复 + try: + output = json_repair.loads(text) + if isinstance(output, list): + output = output[0] + fixed = True + except Exception: + logger.warning( + f"{message}\nOutput format is not valid JSON, trying to `json_repair` but failed. Original output length: {len(text)}" + ) + llm_response = llm_response_validate_error( + llm_response, "MarketAgent输出不符合规范,且无法修复,请重试" + ) + return llm_response # 场景4(失败) + + # 3. 检查输出格式是否符合output_schema要求 + try: + output_schema.model_validate(output) + + resources = output["product_info"]["resources"] + is_images_results = await batch_check_images(resources) + image_indices = [is_image for _, is_image, _ in is_images_results if is_image] + if len(image_indices) != len(resources): + logger.warning( + f"{message}\n `resources`字段中存在非图片内容,该url将被过滤去除: resource: {resources}" + ) + # 去掉非图片的url + output["product_info"]["resources"] = [ + resource + for i, (resource, is_image) in enumerate(zip(resources, image_indices)) + if is_image + ] + + llm_response.content.parts[0].text = json.dumps(output, ensure_ascii=False) + return llm_response # 场景2&场景5(成功) + except ValidationError: + if fixed: + logger.warning( + f"{message}\nOutput format was not valid JSON, `json_repair` success but the result is not valid for output_schema. Original output length: {len(text)}" + ) + else: + logger.warning( + f"{message}\nOutput format is valid JSON but not valid for output_schema. Original output length: {len(text)}" + ) + llm_response = llm_response_validate_error( + llm_response, "MarketAgent输出不符合规范,存在异常,请重试" + ) + return llm_response # 场景6 & 场景3(失败) + + +def llm_response_validate_error(llm_response: LlmResponse, reason: str) -> LlmResponse: + llm_response.content.parts[0].text = json.dumps( + {"status": {"success": False, "message": reason}} + ) + return llm_response diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/prompt.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/prompt.py new file mode 100644 index 00000000..6b0b0e05 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/prompt.py @@ -0,0 +1,106 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PROMPT_MARKET_AGENT = """ +#角色 +你是一个资深的电商营销视频策划专家,你将理解用户提供的商品素材,并给出营销建议 +用户可能会提供两种素材: +第一种是上传商品图片 + 文本描述,用户会上传商品图片+文本描述,你需要根据商品图片和文本描述,给出营销建议。 +第二种是一键解析商品链接,你需要解析商品链接,获取图片、文本描述,然后根据图片和文本描述,给出营销建议。 +无论哪种,都请调用 read_url_link 工具,他可以读取图片或者读取网页 +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 +3. 请你严格区分两种输入方式,根据用户的文本描述来确定是这两类的哪一种,如果都不是或者你认为无法分辨,请及时返回错误提示,而不是蛮干 +请你聪明一点,如果链接中有图片相关的描述字段:比如image,那么你就应该认为是第一种方式 + +#背景信息 +现在我们平台提供了食品饮料品类的电商视频生成能力: +成片类型: +1. 商品展示视频: +- 投放场景:适合在淘宝、京东等电商平台的商品主图、详情页投放 +- 视频特征:重视商品直观视觉展示,营造氛围,体现商品的亮点/效果/材质 +- 平台功能:规划和生成创意分镜、智能剪辑 + +#任务和要求 +用户会告诉你一些信息,包括他的商品素材和想要投放的平台,请你使用 web_search 工具以及知识库(las)给出建议: +1. 成片类型建议;并给出理由,并告诉他这个平台的营销特征 +2. 商品卖点解析: +3. 商品适用人群: +4. 分镜策划建议:简略说一下视频画面要怎么展示商品卖点,不超过3个,简要说明重点,不需要有太具体的信息,不要有文字特效 + +#工具 +- web_search:联网搜索工具 +- read_url_link: 读取链接工具 +#注意事项 +1. 最多使用5次web_search工具 + +#参考例子: +示例1: +用户:奶油西瓜,抖音商城主图 +输出: +- 成片类型建议:建议您选择「商品展示视频」;理由:商详页,适合商品展示视频 +- 商品卖点解析 +- 商品适用人群:白领/闺蜜/情侣 +- 背景音乐风格:舒缓/平滑/宁静/古典/.... +- 分镜策划建议: +1. 建议1: 突出天然产地场景 +2. 建议2: 西瓜果肉展示 +3. 建议3: xxx + +# 输出格式 +```json +{ + "video_type": str, 视频类型 + "product_info": { + "name": str, 商品名称 + "selling_point": str, 商品卖点 + "resoucres": list[str] 商品相关素材图片(链接) + "audience": str, 商品适用人群,受众 + }, + "video_advice": str, 视频建议 + "status": { + "success": bool, 是否成功 + "message": str, 错误信息,成功时为空字符串 + } +} +``` +""" + +PROMPT_FORMAT_AGENT = """ +#角色: +你是一个将输入按规定格式输出的格式转换器 + +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 +3. 注意:当遇到Agent执行异常,如缺少内容,运行出错,结果不完整,用户输入内容不足以完成任务时,请在status字段中反馈,而不是在业务字段中反馈描述,如有上述问题,业务字段可以为空。只反馈错误即可 + +#任务描述: +1. 将 视频脚本配置,将其按 "规定格式" 输出。 +2. 关于status字段:status字段包括两部分, 如果业务正常该部分为success: True, message: '',否则为success: False, message: '错误信息' +#规定格式 +```json +{ + "video_type": str, 视频类型 + "product_info": { + "name": str, 商品名称 + "selling_point": str, 商品卖点 + "resources": list[str] 商品相关素材图片(链接) + }, + "video_advice": str, 视频建议 + "status": { + "success": bool, 是否成功 + "message": str, 错误信息,成功时为空字符串 + } +} +``` +""" diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/__init__.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/filter_by_llm.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/filter_by_llm.py new file mode 100644 index 00000000..e1b90484 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/filter_by_llm.py @@ -0,0 +1,108 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import json +import os +from typing import Any + +from openai import AsyncOpenAI +from pydantic import BaseModel +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + +filter_agent_instructions = """ +你是一个专业的图片过滤器,服务于一个商品图片相关的任务 +你将收到一张图片输入,他来自于一个网页的链接,通过网页解析等机制解析下来的, +你需要根据图片的内容判断这张图片是商品,还是类似网页素材,点缀之类的无关内容。 +你不需要返回任何判断,你只需要确定是与否,不允许任何额外的输出 +注意如果你不能确定是否是商品,那它就不是。 + +### 参考输出 +{ + "is_good": true +} +""" + +summarize_text_instructions = """ +你是一个专业的文本总结器,服务于一个商品图片相关的任务 +你将收到一段文本输入,他来自于一个网页的链接,通过网页解析等机制解析下来的, +你需要根据文本的内容总结出这段文本的主要内容,包括商品的名称、价格、描述、特点等。 +""" + + +class IsGood(BaseModel): + is_good: bool + + +def repair_image_input(image_list: list[str]) -> list[dict[str, Any]]: + result = [] + for image in image_list: + image_part = { + "type": "input_image", + "image_url": image, + } # 参考的只会是图片 + result.append(image_part) + + return result + + +async def filter_images(image_list: list[str]) -> list[str]: + inputs = repair_image_input(image_list) + client = AsyncOpenAI( + base_url=os.getenv("MODEL_AGENT_API_BASE"), + api_key=os.getenv("MODEL_AGENT_API_KEY"), + ) + sem = asyncio.Semaphore(10) # 限制并发 + + async def process_message(_input): + async with sem: + try: + response = await client.responses.create( + model="doubao-seed-1-6-251015", + instructions=filter_agent_instructions, + input=[{"role": "user", "content": [_input]}], + text={ + "format": { + "type": "json_schema", + "name": "IsGood", + "schema": IsGood.model_json_schema(), + "strict": True, + } + }, + extra_body={"thinking": {"type": "disabled"}}, + ) + x = json.loads(response.output_text).get("is_good", False) + except Exception: + x = False + return _input["image_url"] if x else None + + result = await asyncio.gather(*(process_message(_input) for _input in inputs)) + result = [r for r in result if r is not None] + return result + + +async def summarize_text(text: str): + client = AsyncOpenAI( + base_url=os.getenv("MODEL_AGENT_API_BASE"), + api_key=os.getenv("MODEL_AGENT_API_KEY"), + ) + try: + response = await client.responses.create( + model="doubao-seed-1-6-251015", + instructions=summarize_text_instructions, + input=text[0:10000], + extra_body={"thinking": {"type": "disabled"}}, + ) + return response.output_text + except Exception: + return text[0:10000] diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/image_understand.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/image_understand.py new file mode 100644 index 00000000..6ad0845f --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/image_understand.py @@ -0,0 +1,51 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Any + +from openai import AsyncOpenAI +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + +filter_agent_instructions = """ +你是一个专业的图片理解评论专家,你现在为一个电商营销视频策划方案服务支撑, +你的工作是阅读你收到的图片,理解图片内容,给出详细的描述, +比如你收到了一双鞋子的电商图片,那么你要描述这是什么样的鞋子,颜色,风格,是什么类型的,是帆布鞋还是运动鞋。 +并且描述这个商品的一些辅助细节,如这个产品的特点、面向人群、使用场景等等。 + +你的输出结果将辅助整个电商营销策划方案的完成实现。 +""" + + +def repair_image_input(image: str) -> dict[str, Any]: + image_part = { + "type": "input_image", + "image_url": image, + } # 参考的只会是图片 + return image_part + + +async def comment_image(image: str) -> dict[str, Any]: + logger.debug(f"开始调用image_understand解析图片:{image}") + image_part = repair_image_input(image) + client = AsyncOpenAI( + base_url=os.getenv("MODEL_AGENT_API_BASE"), + api_key=os.getenv("MODEL_AGENT_API_KEY"), + ) + response = await client.responses.create( + model="doubao-seed-1-6-251015", + instructions=filter_agent_instructions, + input=[{"role": "user", "content": [image_part]}], + extra_body={"thinking": {"type": "disabled"}}, + ) + return {"image": image, "text": response.output_text} diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/is_image.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/is_image.py new file mode 100644 index 00000000..cd812959 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/is_image.py @@ -0,0 +1,162 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import aiohttp +import requests +from typing import List, Tuple + +# 图片魔数映射(前N字节特征) +IMAGE_MAGIC_NUMBERS = { + b"\xff\xd8\xff": "jpeg", # JPG/JPEG + b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a": "png", # PNG + b"\x47\x49\x46\x38\x37\x61": "gif", # GIF87a + b"\x47\x49\x46\x38\x39\x61": "gif", # GIF89a + b"\x52\x49\x46\x46": "webp", # WebP(RIFF开头,后续验证WEBP) + b"\x42\x4d": "bmp", # BMP + b"\x3c\x73\x76\x67": "svg", # SVG(文本开头 Tuple[bool, str]: + """ + 同步判断单个URL是否为图片资源(非URL后缀,仅验证HTTP头/文件内容) + :param url: 待检测URL + :param timeout: 超时时间(秒) + :param allow_redirects: 是否允许重定向 + :return: (是否为图片, 验证依据) + 验证依据可选:content_type / magic_number / error + """ + # 第一步:发起轻量请求(优先HEAD,失败则降级GET) + try: + # 1. 尝试HEAD请求(仅获取响应头,最快) + resp = requests.head( + url, + timeout=timeout, + allow_redirects=allow_redirects, + headers={ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + }, + ) + # 2. HEAD失败则降级为GET(仅读响应头,不下载正文) + if resp.status_code != 200: + resp = requests.get( + url, + timeout=timeout, + allow_redirects=allow_redirects, + stream=True, # 关键:不下载正文 + headers={ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + }, + ) + + # 3. 验证Content-Type(优先级最高,成本最低) + content_type = resp.headers.get("Content-Type", "").lower() + if content_type.startswith("image/"): + return True, "content_type" + + # 第二步:Content-Type不可靠时,验证文件魔数(仅下载前16字节) + try: + # 读取前16字节(足够覆盖所有图片魔数) + header_bytes = resp.raw.read(16) if resp.raw else b"" + # 匹配魔数 + for magic, _ in IMAGE_MAGIC_NUMBERS.items(): + if header_bytes.startswith(magic): + # WebP特殊验证(RIFF后需包含WEBP) + if magic == b"\x52\x49\x46\x46" and b"WEBP" not in header_bytes: + continue + # SVG特殊验证(文本格式,需兼容大小写) + if ( + magic == b"\x3c\x73\x76\x67" + and not header_bytes.lower().startswith(b" Tuple[str, bool, str]: + """ + 异步判断单个URL是否为图片资源(批量场景首选) + :param url: 待检测URL + :param session: aiohttp会话(复用连接,提升批量性能) + :param timeout: 超时时间(秒) + :return: (url, 是否为图片, 验证依据) + """ + timeout_obj = aiohttp.ClientTimeout(total=timeout) + try: + # 1. 发起GET请求(aiohttp对HEAD支持较差,直接用GET+stream) + async with session.get( + url, + timeout=timeout_obj, + allow_redirects=True, + headers={ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + }, + ) as resp: + # 2. 验证Content-Type + content_type = resp.headers.get("Content-Type", "").lower() + if content_type.startswith("image/"): + return url, True, "content_type" + + # 3. 验证魔数(仅读取前16字节) + header_bytes = await resp.content.read(16) + for magic, _ in IMAGE_MAGIC_NUMBERS.items(): + if header_bytes.startswith(magic): + if magic == b"\x52\x49\x46\x46" and b"WEBP" not in header_bytes: + continue + if ( + magic == b"\x3c\x73\x76\x67" + and not header_bytes.lower().startswith(b" List[Tuple[str, bool, str]]: + """ + 批量异步检测URL是否为图片资源 + :param urls: URL列表 + :param timeout: 单URL超时时间 + :param max_concurrency: 最大并发数 + :return: 列表,每个元素为(url, 是否为图片, 验证依据) + """ + # 限制并发数(防止请求过多被封禁) + semaphore = asyncio.Semaphore(max_concurrency) + + async def bounded_check(url): + async with semaphore: + return await async_is_image_resource(url, session, timeout) + + # 创建复用的aiohttp会话(提升性能) + connector = aiohttp.TCPConnector(limit=0) # 连接池不限制(靠semaphore控制) + async with aiohttp.ClientSession(connector=connector) as session: + tasks = [bounded_check(url) for url in urls] + results = await asyncio.gather(*tasks) + return results diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/link_reader.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/link_reader.py new file mode 100644 index 00000000..eb7bfd09 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/link_reader.py @@ -0,0 +1,78 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any +from urllib.parse import urlparse + +from market_agent.tools.image_understand import comment_image +from market_agent.tools.is_image import batch_check_images +from market_agent.tools.web_parse import parse_webpage +from market_agent.tools.filter_by_llm import summarize_text, filter_images +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + + +async def read_url_link(link_list: list[str]) -> str | list[dict[str, Any]]: + """ + 读取并解析网页内容。 + + 此异步方法调用 `LinkReader` 工具,对传入的 URL 执行网页内容/图片解析, + 返回解析结果。 + + Args: + link_list (list[str]): 待解析的网页链接列表。 + + Returns: + 情况1: + list[dict[str, Any]]: 解析后的网页/图片内容列表。 + 每个字典包含以下键值对: + 按顺序返回每个链接的解析结果 + - 'images':list[str] 图片链接列表。 + - 'text': str 对图片/网页的文本解释。 + """ + logger.debug(f"开始解析链接:{link_list}") + is_images_results = await batch_check_images(link_list) + logger.debug(f"图片检测结果: {is_images_results}") + result = [] + for i, link in enumerate(link_list): + try: + # is_images_results 中的每个元素是 (url, is_image, reason) 的元组 + _, is_image, _ = is_images_results[i] + if is_image: + res = await comment_image(link) + result.append(res) + continue + else: + # 调用 `LinkReader` 工具进行网页内容抓取与解析(避免控制台打印完整链接) + logger.debug(f"调用parse_webpage解析链接域名:{urlparse(link).netloc}") + images, text = await parse_webpage(link) + # 过滤掉无效的图片链接 + images = await filter_images(images) + # 对文本内容进行总结 + text = await summarize_text(text) + logger.debug( + f"对url: {link} \n 解析到图片数量: {len(images)}, 解析到文本长度 {len(text)}" + ) + if len(text) < 100: + logger.debug(f"对url: {link} \n 文本过短,长度: {len(text)}") + if len(images) > 5: + logger.debug(f"对url: {link} \n 图片数量过多,选取前5张") + images = images[:5] + result.append({"images": images, "text": text}) + + except Exception as e: + # 捕获并打印异常信息 + logger.error(f"Error parsing {link}: {e}") + # 继续处理下一个链接 + result.append({"images": [], "text": ""}) + + return result diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/web_parse.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/web_parse.py new file mode 100644 index 00000000..b51f2b9e --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/web_parse.py @@ -0,0 +1,41 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from veadk.utils.logger import get_logger +from .web_parser_local import parse_webpage_local + +logger = get_logger(__name__) + + +async def parse_webpage(url, render_js=True, delay=5): + """ + 通用网页解析工具:提取网页的图片URL列表和纯文本内容 + :param url: 目标网页地址 + :param render_js: 是否渲染JS(处理动态页面,默认True) + :param delay: 渲染延迟(秒,默认5) + :return: (img_url_list, text_content) + img_url_list: 图片URL列表(去重、绝对路径) + text_content: 网页纯文本内容(去空格、去换行) + """ + logger.debug(f"开始本地解析网页:{url}") + + try: + # 调用本地网页解析功能 + img_url_list, text_content = await parse_webpage_local(url, render_js, delay) + + logger.debug( + f"解析完成:找到 {len(img_url_list)} 张图片,文本预览长度 {len(text_content)} 字符" + ) + return img_url_list, text_content + + except Exception as e: + logger.error(f"本地解析网页失败:{e}") + return [], f"网页解析失败: {str(e)}" diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/web_parser_local.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/web_parser_local.py new file mode 100644 index 00000000..6b3d849a --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/web_parser_local.py @@ -0,0 +1,211 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import socket +import warnings +from urllib.parse import urljoin + +import requests +from bs4 import BeautifulSoup +from playwright.async_api import async_playwright +from veadk.utils.logger import get_logger + +# 忽略无关警告 +warnings.filterwarnings("ignore") + +# 日志配置 +logger = get_logger(__name__) + +# 全局浏览器实例(复用避免重复启动,提升性能) +_global_browser = None + + +async def _init_browser(): + """初始化 Playwright 浏览器(全局复用)""" + global _global_browser + if not _global_browser: + try: + playwright = await async_playwright().start() + # 启动浏览器(根据系统环境自动选择) + _global_browser = await playwright.chromium.launch( + headless=True, + args=[ + "--no-sandbox", + "--disable-dev-shm-usage", + "--disable-gpu", + "--disable-images", + "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + ], + ) + logger.info("Chromium 浏览器初始化成功") + except Exception as e: + logger.error(f"浏览器初始化失败: {e}", exc_info=True) + raise + + +def _is_public_ip(url: str) -> bool: + """ + 检查URL是否解析为公网IP地址,以防止SSRF攻击 + """ + try: + hostname = url.split("://")[1].split("/")[0].split(":")[0] + ip_address = socket.gethostbyname(hostname) + # 检查IP地址是否为私有、保留或回环地址 + if ip_address.startswith(("10.", "172.", "192.168.", "127.", "169.254.")): + return False + return True + except Exception: + return False + + +async def parse_webpage_local(url: str, render_js: bool = True, delay: int = 5): + """ + 通用网页解析工具:提取网页的图片URL列表和纯文本内容(基于Playwright) + :param url: 目标网页地址 + :param render_js: 是否渲染JS(处理动态页面,默认True) + :param delay: 渲染延迟(秒,默认5) + :return: (img_url_list, text_content) + """ + global _global_browser + + logger.info(f"开始网页解析:{url},render_js={render_js},延迟={delay}秒") + + # 初始化浏览器(如果尚未初始化) + if not _global_browser: + await _init_browser() + + if not _global_browser: + logger.error("浏览器未初始化") + raise RuntimeError("浏览器未初始化") + + page = None + try: + # 创建新页面 + context = await _global_browser.new_context( + user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + ) + page = await context.new_page() + logger.debug("创建了新的浏览器上下文和页面") + + # 页面请求超时配置 + page.set_default_timeout(15 * 1000) # 15秒超时 + logger.debug("将页面超时设置为15秒") + + # 增加DoS防护:检查Content-Length + try: + with requests.get(url, stream=True, timeout=10) as r: + content_length = r.headers.get("Content-Length") + if content_length and int(content_length) > 10 * 1024 * 1024: # 10MB + raise ValueError("响应内容大于10MB,因安全保护拒绝解析") + except requests.RequestException as e: + logger.error(f"检查响应大小时出错: {e}") + raise ValueError("无法访问URL") + + # 访问目标URL + await page.goto(url, wait_until="domcontentloaded" if render_js else "commit") + logger.info(f"成功访问URL:{url}") + + # 渲染JS(等待动态内容加载) + if render_js: + logger.info(f"等待{delay}秒进行JS渲染") + try: + await page.wait_for_load_state("networkidle", timeout=delay * 1000) + except Exception: + import asyncio + + await asyncio.sleep(delay) # 等待页面加载完成 + logger.debug("JS渲染完成") + + # 获取页面完整HTML + html_content = await page.content() + logger.debug(f"获取到页面HTML,长度:{len(html_content)}字符") + + # 1. 提取所有图片URL + img_url_list = [] + + # 1.1 提取标签的图片(src/data-src/lazy-src等) + # 方式1:通过Playwright选择器提取(更高效) + img_elements = await page.query_selector_all("img") + logger.debug(f"在页面上找到{len(img_elements)}个img标签") + + for img_elem in img_elements: + # 获取图片属性 + img_src = ( + await img_elem.get_attribute("src") + or await img_elem.get_attribute("data-src") + or await img_elem.get_attribute("lazy-src") + or await img_elem.get_attribute("data-lazy") + ) + if img_src: + absolute_url = urljoin(url, img_src) + # 过滤无效链接 + if ( + not absolute_url.startswith( + ("data:", "svg:", "javascript:", "blob:") + ) + and "." in absolute_url.split("/")[-1] + ): + img_url_list.append(absolute_url) + logger.debug(f"从img标签中提取了{len(img_url_list)}张有效图片") + + # 1.2 提取背景图片(style中的background-image) + bg_pattern = re.compile(r'background-image:\s*url\(["\']?(.*?)["\']?\)', re.I) + # 获取所有元素的style属性 + all_elements = await page.query_selector_all("*") + logger.debug(f"检查了{len(all_elements)}个元素的背景图片") + + for elem in all_elements: + style = await elem.get_attribute("style") or "" + match = bg_pattern.search(style) + if match: + bg_img = match.group(1) + absolute_bg_url = urljoin(url, bg_img) + if ( + absolute_bg_url not in img_url_list + and not absolute_bg_url.startswith(("data:", "svg:", "blob:")) + ): + img_url_list.append(absolute_bg_url) + logger.debug( + f"从背景样式中提取了{len(img_url_list) - len(set(img_url_list))}张有效图片" + ) + + # 1.3 去重 + img_url_list = list(set(img_url_list)) + logger.debug(f"去重后最终图片列表:{len(img_url_list)}张图片") + + # 2. 提取纯文本内容 + logger.debug("正在提取文本内容") + soup = BeautifulSoup(html_content, "html.parser") + # 移除无用标签 + for useless_tag in soup( + ["script", "style", "noscript", "iframe", "header", "footer"] + ): + useless_tag.extract() + # 格式化文本 + raw_text = soup.get_text(strip=True) + text_content = re.sub(r"\s+", " ", raw_text) + logger.debug(f"提取到文本内容,长度:{len(text_content)}字符") + + logger.info( + f"解析完成:找到 {len(img_url_list)} 张图片,文本长度 {len(text_content)} 字符" + ) + return img_url_list, text_content + + except Exception as e: + logger.error(f"解析网页失败: {e}", exc_info=True) + raise + finally: + # 关闭页面和上下文,释放资源 + if page: + await page.close() + if "context" in locals(): + await context.close() diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/utils/__init__.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/utils/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/utils/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/utils/types.py b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/utils/types.py new file mode 100644 index 00000000..5be27c93 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/market_agent/utils/types.py @@ -0,0 +1,53 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.genai import types +from pydantic import BaseModel, Field + +json_response_config = types.GenerateContentConfig( + response_mime_type="application/json", max_output_tokens=18000 +) + + +class Status(BaseModel): + """A status.""" + + success: bool = Field(description="如果结果成功则为True,否则为False") + message: str = Field(description="运行成功该字段为空,否则为错误信息") + + +# status的描述 +""" +status字段:status字段包括两部分, 如果业务正常该部分为success: True, message: '',否则为success: False, message: '错误信息' +注意:当遇到Agent执行异常,如缺少内容,运行出错,结果不完整,用户输入内容不足以完成任务时,请在status字段中反馈,而不是在业务字段中反馈描述,如有上述问题,业务字段可以为空。只反馈错误即可 +"status": { + "success": bool, 是否成功 + "message": str, 错误信息,成功时为空字符串 + } +""" + + +class ProductInfo(BaseModel): + """A product information.""" + + name: str = Field(description="A Product's Name") + selling_point: str = Field(description="The Product's Selling Point") + resources: list[str] = Field(description="verified URL to an image of the product") + audience: str = Field(description="The Product's Audience") + + +class VideoConfig(BaseModel): + """Video configuration.""" + + video_type: str = Field(description="The type of video to be generated") + product_info: ProductInfo = Field(description="The product information") + video_advice: str = Field(description="The video advice") + status: Status = Field(description="The status of the video configuration") diff --git a/demohouse/mutimedia/backend/app/market-agent/src/requirements.txt b/demohouse/mutimedia/backend/app/market-agent/src/requirements.txt new file mode 100644 index 00000000..251fb835 --- /dev/null +++ b/demohouse/mutimedia/backend/app/market-agent/src/requirements.txt @@ -0,0 +1,12 @@ +veadk-python==0.2.28 +# git+https://github.com/volcengine/veadk-python.git +fastapi +uvicorn[standard] +google-adk==1.18.0 +json-repair +openai +aiohttp +playwright==1.55.0 +lxml[html_clean] +bs4 +requests \ No newline at end of file diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/__init__.py b/demohouse/mutimedia/backend/app/multimedia-agent/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/multimedia-agent/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/config.yaml.example b/demohouse/mutimedia/backend/app/multimedia-agent/config.yaml.example new file mode 100644 index 00000000..15808c70 --- /dev/null +++ b/demohouse/mutimedia/backend/app/multimedia-agent/config.yaml.example @@ -0,0 +1,26 @@ +model: + agent: + provider: openai + name: doubao-seed-1-6-250615 + api_base: https://ark.cn-beijing.volces.com/api/v3/ + api_key: + +logging: + # ERROR + # WARNING + # INFO + # DEBUG + level: DEBUG + +thinking: + root_agent: disabled + +remote_agent: + market_agent: + url: http://localhost:8000 + director_agent: + url: http://localhost:8001 + evaluate_agent: + url: http://localhost:8002 + release_agent: + url: http://localhost:8003 diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/__init__.py b/demohouse/mutimedia/backend/app/multimedia-agent/src/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/multimedia-agent/src/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/agentkit.py b/demohouse/mutimedia/backend/app/multimedia-agent/src/agentkit.py new file mode 100644 index 00000000..91e8c436 --- /dev/null +++ b/demohouse/mutimedia/backend/app/multimedia-agent/src/agentkit.py @@ -0,0 +1,82 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import override + +import uvicorn +from google.adk.agents.base_agent import BaseAgent +from google.adk.artifacts.in_memory_artifact_service import ( + InMemoryArtifactService, +) +from google.adk.auth.credential_service.in_memory_credential_service import ( + InMemoryCredentialService, +) +from google.adk.cli.adk_web_server import AdkWebServer +from google.adk.cli.utils.base_agent_loader import BaseAgentLoader +from google.adk.evaluation.local_eval_set_results_manager import ( + LocalEvalSetResultsManager, +) +from google.adk.evaluation.local_eval_sets_manager import LocalEvalSetsManager +from google.adk.memory.in_memory_memory_service import InMemoryMemoryService +from google.adk.sessions.base_session_service import BaseSessionService +from veadk import Agent +from veadk.memory.short_term_memory import ShortTermMemory + + +class AgentKitAgentLoader(BaseAgentLoader): + def __init__(self, agent: BaseAgent) -> None: + super().__init__() + + self.agent = agent + + @override + def load_agent(self, agent_name: str) -> BaseAgent: + return self.agent + + @override + def list_agents(self) -> list[str]: + return [self.agent.name] + + +class AgentkitAgentServerApp: + def __init__( + self, + agent: BaseAgent, + short_term_memory: BaseSessionService | ShortTermMemory, + ) -> None: + super().__init__() + + _artifact_service = InMemoryArtifactService() + _credential_service = InMemoryCredentialService() + + _eval_sets_manager = LocalEvalSetsManager(agents_dir=".") + _eval_set_results_manager = LocalEvalSetResultsManager(agents_dir=".") + + self.server = AdkWebServer( + agent_loader=AgentKitAgentLoader(agent), + session_service=short_term_memory + if isinstance(short_term_memory, BaseSessionService) + else short_term_memory.session_service, + memory_service=agent.long_term_memory + if isinstance(agent, Agent) and agent.long_term_memory + else InMemoryMemoryService(), + artifact_service=_artifact_service, + credential_service=_credential_service, + eval_sets_manager=_eval_sets_manager, + eval_set_results_manager=_eval_set_results_manager, + agents_dir=".", + ) + + self.app = self.server.get_fast_api_app() + + def run(self, host: str, port: int = 8000) -> None: + """Run the app with Uvicorn server.""" + uvicorn.run(self.app, host=host, port=port) diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/__init__.py b/demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/__init__.py new file mode 100644 index 00000000..9cebfb90 --- /dev/null +++ b/demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import agent + +__all__ = [ + "agent", +] diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/agent.py b/demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/agent.py new file mode 100644 index 00000000..a181253a --- /dev/null +++ b/demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/agent.py @@ -0,0 +1,59 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import httpx + +from veadk.memory.short_term_memory import ShortTermMemory +from veadk.a2a.remote_ve_agent import RemoteVeAgent +from veadk import Agent +from veadk.config import getenv + +from multimedia_agent.prompt import PROMPT_ROOT_AGENT + +market_agent = RemoteVeAgent( + name="market_agent", + auth_method="querystring", + httpx_client=httpx.AsyncClient( + base_url=getenv("REMOTE_AGENT_MARKET_AGENT_URL"), timeout=6000 + ), +) +director_agent = RemoteVeAgent( + name="director_agent", + auth_method="querystring", + httpx_client=httpx.AsyncClient( + base_url=getenv("REMOTE_AGENT_DIRECTOR_AGENT_URL"), timeout=6000 + ), +) +evaluate_agent = RemoteVeAgent( + name="evaluate_agent", + auth_method="querystring", + httpx_client=httpx.AsyncClient( + base_url=getenv("REMOTE_AGENT_EVALUATE_AGENT_URL"), timeout=6000 + ), +) +release_agent = RemoteVeAgent( + name="release_agent", + auth_method="querystring", + httpx_client=httpx.AsyncClient( + base_url=getenv("REMOTE_AGENT_RELEASE_AGENT_URL"), timeout=6000 + ), +) + +root_agent = Agent( + name="root_agent", + description="根据用户的需求,生成电商视频", + instruction=PROMPT_ROOT_AGENT, + sub_agents=[market_agent, director_agent, evaluate_agent, release_agent], + short_term_memory=ShortTermMemory(backend="local"), + model_extra_config={ + "extra_body": {"thinking": {"type": getenv("THINKING_ROOT_AGENT", "enabled")}} + }, +) diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/prompt.py b/demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/prompt.py new file mode 100644 index 00000000..73384c09 --- /dev/null +++ b/demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/prompt.py @@ -0,0 +1,77 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PROMPT_ROOT_AGENT = """ +# 角色 +你是一个电商营销视频生成的总指挥官,负责规划和拆解任务,分配给4个子Agent执行 +Notice:生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文 + +#子Agent +1.market_agent: 负责理解用户提供的商品素材,并生成视频配置脚本 +2.director_agent: 负责根据视频配置脚本,创作分镜脚本; +根据分镜脚本,生成分镜图片列表;根据分镜图片列表,创作分镜视频列表 +3.evaluate_agent: 负责评估分镜图片列表和分镜视频列表的质量 +4.release_agent: 负责将最终的分镜视频列表进行合成 + +#注意事项:输入输出中,任何涉及图片或视频的链接url,不要做任何修改。 +#注意事项: +在market-agent阶段,如果在同一次对话中又收到了相同的内容,那是用户希望你**重新生成**内容,请不要直接进入你认为的下一阶段,或者告知用户你已经生成过了之类的。 + +# 任务说明 +1. 视频配置脚本生成 +输入:用户提供的商品素材,想法 +调用market_agent,生成视频配置脚本 +输出:视频配置脚本 + +2. 分镜脚本生成 +输入:视频配置脚本 +调用director_agent,生成分镜脚本 +输出:分镜脚本 + +3. 分镜图片列表生成 +输入:分镜脚本 +调用director_agent,生成分镜图片列表 +输出:分镜图片列表 + +4. 分镜图片列表评估 +输入:分镜图片列表 +调用evaluate_agent,评估分镜图片列表的质量 +输出:评估过的分镜图片列表 + +5. 分镜视频列表生成 +输入:分镜脚本 +调用director_agent,生成分镜视频列表 +输出:分镜视频列表 + +6. 分镜视频列表评估 +输入:分镜视频列表 +调用evaluate_agent,评估分镜视频列表的质量 +输出:评估过的分镜视频列表 + +7. 视频合成 +输入:评估过的分镜视频列表 +调用release_agent,将分镜视频列表进行合成 +输出:最终的分镜视频 + +#要求 +当子Agent执行正常: +务必直接返回子agent最后的输出,不要在输出中包含任何解释或说明 +当子Agent执行失败 或 你无法理解用户的指令要求: +请按照下述格式对输出进行反馈 +```json +{ + "status": { + "success": bool, 错误 + "message": str, 信息,关于为什么出错,或为什么你无法理解等 + } +} +``` +""" diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/requirements.txt b/demohouse/mutimedia/backend/app/multimedia-agent/src/requirements.txt new file mode 100644 index 00000000..1b0520bd --- /dev/null +++ b/demohouse/mutimedia/backend/app/multimedia-agent/src/requirements.txt @@ -0,0 +1,4 @@ +veadk-python==0.2.28 +fastapi +uvicorn[standard] +google-adk==1.18.0 \ No newline at end of file diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/server.py b/demohouse/mutimedia/backend/app/multimedia-agent/src/server.py new file mode 100644 index 00000000..0dcf141e --- /dev/null +++ b/demohouse/mutimedia/backend/app/multimedia-agent/src/server.py @@ -0,0 +1,21 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from multimedia_agent.agent import root_agent +from veadk.memory.short_term_memory import ShortTermMemory +from agentkit import AgentkitAgentServerApp + +short_term_memory = ShortTermMemory(backend="local") +agent_server_app = AgentkitAgentServerApp( + agent=root_agent, short_term_memory=short_term_memory +) + +app = agent_server_app.app diff --git a/demohouse/mutimedia/backend/app/release-agent/__init__.py b/demohouse/mutimedia/backend/app/release-agent/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/release-agent/config.yaml.example b/demohouse/mutimedia/backend/app/release-agent/config.yaml.example new file mode 100644 index 00000000..c09d2afe --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/config.yaml.example @@ -0,0 +1,37 @@ +model: + agent: + provider: openai + # name: doubao-seed-1-6-251015 + name: doubao-seed-1-6-250615 + api_base: https://ark.cn-beijing.volces.com/api/v3/ + api_key: + format: + name: doubao-seed-1-6-flash-250828 + + +shorten_url_service_url: http://127.0.0.1:8005 +volcengine: + # 默认为本地方式,若切换视频云合成则需要 + access_key: + secret_key: + +tools: + # 默认为本地方式,若切换视频云合成则需要 + vod: + space_name: + task_polling_interval: 20 + max_retries: 60 + + +logging: + # ERROR + # WARNING + # INFO + # DEBUG + level: DEBUG + +thinking: + release_agent: disabled + audio_agent: disabled + film_agent: disabled + format_agent: disabled diff --git a/demohouse/mutimedia/backend/app/release-agent/src/README.md b/demohouse/mutimedia/backend/app/release-agent/src/README.md new file mode 100644 index 00000000..1b51c420 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/README.md @@ -0,0 +1,29 @@ +# 合成发布 Agent + +用来将生成的分镜视频合成,并且发布至指定平台。 + +## 输入输出定义 + +### 输入 + +分镜视频列表 + +### 输出 + +```python +class VideoInfo(BaseModel): + video_type: str + video_title: str + video_release: str + +class OutputMessage(BaseModel): + video_url: str + content: VideoInfo +``` + +## 工具 + +1. [视频合成工具 Moviepy](https://moviepy-cn.readthedocs.io/zh/latest/) +2. [视频云合成工具 vod-mcp-server](https://github.com/volcengine/mcp-server.git#subdirectory=server/mcp_server_vod) + +注:视频合成工具默认采用第一种方式,第二种方式需要自行配置火山视频云相关信息 \ No newline at end of file diff --git a/demohouse/mutimedia/backend/app/release-agent/src/__init__.py b/demohouse/mutimedia/backend/app/release-agent/src/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/release-agent/src/agent.py b/demohouse/mutimedia/backend/app/release-agent/src/agent.py new file mode 100644 index 00000000..8feb09a8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/agent.py @@ -0,0 +1,22 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from release_agent.agent import agent # type: ignore + +from veadk.memory.short_term_memory import ShortTermMemory +from veadk.types import AgentRunConfig + +# [required] instantiate the agent run configuration +agent_run_config = AgentRunConfig( + app_name="release_agent", + agent=agent, # type: ignore + short_term_memory=ShortTermMemory(backend="local"), # type: ignore +) diff --git a/demohouse/mutimedia/backend/app/release-agent/src/app.py b/demohouse/mutimedia/backend/app/release-agent/src/app.py new file mode 100644 index 00000000..5baa0fb5 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/app.py @@ -0,0 +1,214 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from contextlib import asynccontextmanager +from typing import Callable + +from agent import agent_run_config + +from fastapi import FastAPI +from fastapi.routing import APIRoute + +from fastmcp import FastMCP + +from starlette.routing import Route + +from google.adk.a2a.utils.agent_card_builder import AgentCardBuilder +from a2a.types import AgentProvider + +from veadk.a2a.ve_a2a_server import init_app +from veadk.runner import Runner +from veadk.tracing.telemetry.exporters.apmplus_exporter import APMPlusExporter +from veadk.tracing.telemetry.exporters.cozeloop_exporter import CozeloopExporter +from veadk.tracing.telemetry.exporters.tls_exporter import TLSExporter +from veadk.tracing.telemetry.opentelemetry_tracer import OpentelemetryTracer +from veadk.types import AgentRunConfig +from veadk.utils.logger import get_logger +from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator +from opentelemetry import context + +logger = get_logger(__name__) + +assert isinstance(agent_run_config, AgentRunConfig), ( + f"Invalid agent_run_config type: {type(agent_run_config)}, expected `AgentRunConfig`" +) + +app_name = agent_run_config.app_name +agent = agent_run_config.agent +short_term_memory = agent_run_config.short_term_memory + +VEFAAS_REGION = os.getenv("APP_REGION", "cn-beijing") +VEFAAS_FUNC_ID = os.getenv("_FAAS_FUNC_ID", "") +agent_card_builder = AgentCardBuilder( + agent=agent, + provider=AgentProvider( + organization="Volcengine Agent Development Kit (VeADK)", + url=f"https://console.volcengine.com/vefaas/region:vefaas+{VEFAAS_REGION}/function/detail/{VEFAAS_FUNC_ID}", + ), +) + + +def load_tracer() -> None: + EXPORTER_REGISTRY = { + "VEADK_TRACER_APMPLUS": APMPlusExporter, + "VEADK_TRACER_COZELOOP": CozeloopExporter, + "VEADK_TRACER_TLS": TLSExporter, + } + + exporters = [] + for env_var, exporter_cls in EXPORTER_REGISTRY.items(): + if os.getenv(env_var, "").lower() == "true": + if ( + agent.tracers + and isinstance(agent.tracers[0], OpentelemetryTracer) + and any(isinstance(e, exporter_cls) for e in agent.tracers[0].exporters) + ): + logger.warning( + f"Exporter {exporter_cls.__name__} is already defined in agent.tracers[0].exporters. These two exporters will be used at the same time. As a result, your data may be uploaded twice." + ) + else: + exporters.append(exporter_cls()) + + tracer = OpentelemetryTracer(name="veadk_tracer", exporters=exporters) + agent_run_config.agent.tracers.extend([tracer]) + + +def build_mcp_run_agent_func() -> Callable: + runner = Runner( + agent=agent, + short_term_memory=short_term_memory, + app_name=app_name, + user_id="", + ) + + async def run_agent( + user_input: str, + user_id: str = "mcp_user", + session_id: str = "mcp_session", + ) -> str: + # Set user_id for runner + runner.user_id = user_id + + # Running agent and get final output + final_output = await runner.run( + messages=user_input, + session_id=session_id, + ) + return final_output + + run_agent_doc = f"""{agent.description} + Args: + user_input: User's input message (required). + user_id: User identifier. Defaults to "mcp_user". + session_id: Session identifier. Defaults to "mcp_session". + Returns: + Final agent response as a string.""" + + run_agent.__doc__ = run_agent_doc + + return run_agent + + +async def agent_card() -> dict: + agent_card = await agent_card_builder.build() + return agent_card.model_dump() + + +async def get_cozeloop_space_id() -> dict: + return { + "space_id": os.getenv( + "OBSERVABILITY_OPENTELEMETRY_COZELOOP_SERVICE_NAME", default="" + ) + } + + +load_tracer() + +# Build a run_agent function for building MCP server +run_agent_func = build_mcp_run_agent_func() + +a2a_app = init_app( + server_url="0.0.0.0", + app_name=app_name, + agent=agent, + short_term_memory=short_term_memory, +) + +a2a_app.post("/run_agent", operation_id="run_agent", tags=["mcp"])(run_agent_func) +a2a_app.get("/agent_card", operation_id="agent_card", tags=["mcp"])(agent_card) +a2a_app.get( + "/get_cozeloop_space_id", operation_id="get_cozeloop_space_id", tags=["mcp"] +)(get_cozeloop_space_id) + +# === Build mcp server === + +mcp = FastMCP.from_fastapi(app=a2a_app, name=app_name, include_tags={"mcp"}) + +# Create MCP ASGI app +mcp_app = mcp.http_app(path="/", transport="streamable-http") + + +# Combined lifespan management +@asynccontextmanager +async def combined_lifespan(app: FastAPI): + async with mcp_app.lifespan(app): + yield + + +# Create main FastAPI app with combined lifespan +app = FastAPI( + title=a2a_app.title, + version=a2a_app.version, + lifespan=combined_lifespan, + openapi_url=None, + docs_url=None, + redoc_url=None, +) + + +@app.middleware("http") +async def otel_context_middleware(request, call_next): + carrier = { + "traceparent": request.headers.get("Traceparent"), + "tracestate": request.headers.get("Tracestate"), + } + logger.debug(f"traceparent exists: {carrier['traceparent'] is not None}") + if carrier["traceparent"] is None: + return await call_next(request) + else: + ctx = TraceContextTextMapPropagator().extract(carrier=carrier) + token = context.attach(ctx) + try: + response = await call_next(request) + finally: + context.detach(token) + return response + + +# Mount A2A routes to main app +for route in a2a_app.routes: + app.routes.append(route) + +# Mount MCP server at /mcp endpoint +app.mount("/mcp", mcp_app) + + +# remove openapi routes +paths = ["/openapi.json", "/docs", "/redoc"] +new_routes = [] +for route in app.router.routes: + if isinstance(route, (APIRoute, Route)) and route.path in paths: + continue + new_routes.append(route) +app.router.routes = new_routes + +# === Build mcp server end === diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/__init__.py b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/__init__.py new file mode 100644 index 00000000..1bef36da --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/__init__.py @@ -0,0 +1,10 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/agent.py b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/agent.py new file mode 100644 index 00000000..5b7139a6 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/agent.py @@ -0,0 +1,32 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from veadk import Agent +from .sub_agents.film_agent.agent import film_agent +from veadk.config import getenv +from release_agent.prompt import PROMPT_RELEASE_AGENT + +agent = Agent( + name="release_agent", + description="将分镜视频合成最终的视频", + # instruction=getenv("PROMPT_RELEASE_AGENT"), + instruction=PROMPT_RELEASE_AGENT, + sub_agents=[ + film_agent, + ], + model_extra_config={ + "extra_body": { + "thinking": {"type": getenv("THINKING_RELEASE_AGENT", "enabled")} + } + }, +) + +root_agent = agent diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/hook/__init__.py b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/hook/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/hook/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/hook/format_hook.py b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/hook/format_hook.py new file mode 100644 index 00000000..e473cb29 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/hook/format_hook.py @@ -0,0 +1,110 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import json_repair +from typing import Optional +from google.adk.agents.callback_context import CallbackContext +from google.adk.events import Event +from google.adk.models import LlmResponse +from pydantic import ValidationError +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + + +def fix_output_format( + *, + callback_context: CallbackContext, + llm_response: LlmResponse, + model_response_event: Optional[Event] = None, +) -> Optional[LlmResponse]: + """ + 检查输出格式是否符合要求,并尝试修复 + 多种情况 + 场景1. (正常->正常)无schema,直接返回原始 llm_responses。 + 场景2. (正常->正常)有schema,输出无需修复且符合schema,返回 llm_responses。(在代码里是替换了一下,但实际上没区别) + 场景3 (异常->异常)有schema,输出无需修复但不符合schema,返回 原始 llm_responses。输出日志 + 场景4. (异常->异常)有schema,输出需要修复,修复失败,返回原始 llm_responses。输出日志 + 场景5. (**异常->正常**)有schema,输出需要修复,修复成功后符合schema,返回 修正后的 llm_responses。 + 场景6. (异常->异常)有schema,输出需要修复,修复成功后不符合schema,返回 原始 llm_responses。输出日志 + + """ + agent = callback_context._invocation_context.agent + user_id = callback_context._invocation_context.user_id + session_id = callback_context._invocation_context.session.id + invocation_id = callback_context.invocation_id + output_schema = agent.output_schema + + message = f"[fix_output_format]: agent_name:{agent.name} user_id:{user_id} session_id:{session_id} invocation_id:{invocation_id}" + fixed = False + + # 1. 如果没有直接return即可 + if not output_schema: + logger.debug(f"{message}\nNo output_schema, return original llm_response") + return llm_response # 场景1(成功) + + text = llm_response.content.parts[0].text + logger.debug(f"{message}\nOriginal llm_response length: {len(text)}") + + # 2. 检查输出格式是否符合output_schema要求 + try: + output = json.loads(text) + except json.JSONDecodeError: + # 尝试修复 + try: + output = json_repair.loads(text) + if isinstance(output, list): + output = output[0] + fixed = True + except Exception: + logger.warning( + f"{message}\nOutput format is not valid JSON, trying to `json_repair` but failed. Original output length: {len(text)}" + ) + llm_response = llm_response_validate_error( + llm_response, "ReleaseAgent输出不符合规范,且无法修复,请重试" + ) + return llm_response # 场景4(失败) + + # 3. 检查输出格式是否符合output_schema要求 + try: + output_schema.model_validate(output) + if fixed: + llm_response.content.parts[0].text = json.dumps(output, ensure_ascii=False) + fixed_text = json.dumps(output, ensure_ascii=False) + logger.warning( + f"{message}\nOutput format was not valid JSON, but `json_repair` success. Fixed output length: {len(fixed_text)}" + ) + else: + logger.debug( + f"{message}\nOutput format is valid JSON and valid for output_schema. Original output length: {len(text)}" + ) + return llm_response # 场景2&场景5(成功) + except ValidationError: + if fixed: + logger.warning( + f"{message}\nOutput format was not valid JSON, `json_repair` success but the result is not valid for output_schema. Original output length: {len(text)}" + ) + else: + logger.warning( + f"{message}\nOutput format is valid JSON but not valid for output_schema. Original output length: {len(text)}" + ) + llm_response = llm_response_validate_error( + llm_response, "ReleaseAgent输出不符合规范,存在异常,请重试" + ) + return llm_response # 场景6 & 场景3(失败) + + +def llm_response_validate_error(llm_response: LlmResponse, reason: str) -> LlmResponse: + llm_response.content.parts[0].text = json.dumps( + {"status": {"success": False, "message": reason}} + ) + return llm_response diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/prompt.py b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/prompt.py new file mode 100644 index 00000000..9287b96b --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/prompt.py @@ -0,0 +1,112 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +PROMPT_RELEASE_AGENT = """ +#角色: +你是一位食品饮料行业的电商营销视频合成Agent,将分镜视频合成最终的视频。 + +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 + +#子agent +film_agent:将分镜视频合成最终的视频。 +#工具: +audio_agent:根据文本生成语音。 +#任务: +1. 商品展示视频合成 +将selected_video_list传给film_agent,让film_agent进行商品展示视频的合成。 +2. 种草解说视频合成 +2.1 将selected_video_list完整传给audio_agent,让audio_agent为每个分镜生成语音。 +请不要将分镜拆分开单独调用audio_agent,而是将selected_video_list全部传给audio_agent。 +2.2 将带有audio字段的selected_video_list传给film_agent,让film_agent进行种草解说视频的合成。 +#格式 +selected_video_list: + - shot_id: str, 分镜1 + prompt: str, 如何生成分镜视频的详细描述 + action: str, 分镜视频的动作描述 + reference: str, 分镜图片的参考url + words: str, 口播文案 + video: dict, 每个分镜里的视频,视频生成工具返回 + id: int, 视频id + url: str, 视频url +""" + + +PROMPT_AUDIO_AGENT = """ +#角色: +你是一位语音合成的Agent。 + +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 + +#工具: +generate_voices:根据文本生成语音。 +#任务: +1. 语音合成 +输入:selected_video_list +调用generate_voices工具,根据words字段,为每个分镜生成语音。 +注意: +- words字段不要包含任何特殊字符。 +- 同一个视频,voice_type应该保持一致。 +- 不要将不同分镜的语音合并到一个音频文件中。 +输出: + shot_id:分镜1 + prompt: str, 如何生成分镜视频的详细描述 + action: str, 分镜视频的动作描述 + reference: str, 分镜图片的参考url + words: str, 口播文案 + video: dict, 每个分镜里的视频,视频生成工具返回 + id: int, 视频id + url: str, 视频url + audio: dict, 每个分镜里的语音,语音生成工具返回 + id: int, 语音id + url: str, 语音文件路径 +""" + + +PROMPT_FILM_AGENT = """ +#角色: +你是一位视频合成的Agent。 + +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 + +#工具: +video_combine:将分镜视频合成最终的视频。 +#任务: +其中video字段是每个分镜的视频 +任务:调用video_combine工具将分镜视频合成最终的视频。 +输出: + video_url: 视频url +""" + + +PROMPT_FORMAT_AGENT = """ +#角色: +你是一个将输入按规定格式输出的格式转换器 + +Notice: +1. 生成内容不要使用单引号、双引号等字符。语音问中文,不要用英文。 +2. 输入输出以及运行过程中,任何涉及图片或视频的链接url,不要做任何修改。 + +#任务描述: +1. 将 视频url,将其按 "规定格式" 输出。 + +#规定格式 +```json +{ + "video_url": str, 视频url +} +``` +""" diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/__init__.py b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/__init__.py new file mode 100644 index 00000000..274ec918 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .agent import film_agent + +__all__ = [ + "film_agent", +] diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/agent.py b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/agent.py new file mode 100644 index 00000000..11df0d6e --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/agent.py @@ -0,0 +1,58 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from veadk import Agent +from veadk.agents.sequential_agent import SequentialAgent + +from release_agent.hook.format_hook import fix_output_format + +# from release_agent.tools.video_combine import video_combine +from release_agent.tools.video_combine import video_combine +from release_agent.utils.types import ( + max_output_tokens_config, + VideoUrl, + json_response_config, +) +from veadk.config import getenv +from release_agent.prompt import PROMPT_FILM_AGENT, PROMPT_FORMAT_AGENT + +film_generate_agent = Agent( + name="film_generate_agent", + description="将所有分镜的视频合成最终的视频", + instruction=PROMPT_FILM_AGENT, + tools=[video_combine], + generate_content_config=max_output_tokens_config, + model_extra_config={ + "extra_body": {"thinking": {"type": getenv("THINKING_FILM_AGENT", "enabled")}} + }, +) + +format_agent = Agent( + name="format_agent", + model_name=getenv("MODEL_FORMAT_NAME"), + description="将模型的输出格式化", + instruction=PROMPT_FORMAT_AGENT, + generate_content_config=json_response_config, + output_schema=VideoUrl, + output_key="video_url", + after_model_callback=[fix_output_format], + model_extra_config={ + "extra_body": { + "thinking": {"type": getenv("THINKING_FORMAT_AGENT", "disabled")} + } + }, +) + +film_agent = SequentialAgent( + name="film_agent", + description="将所有分镜的视频合成最终的视频", + sub_agents=[film_generate_agent, format_agent], +) diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/__init__.py b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/video_combine.py b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/video_combine.py new file mode 100644 index 00000000..56759c63 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/video_combine.py @@ -0,0 +1,271 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import urllib.parse +import os +import random +import tempfile +import uuid +from typing import List +from typing import Optional + +import aiohttp +from moviepy import CompositeVideoClip, VideoFileClip +from veadk.config import veadk_environments # noqa +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + + +# 短链接服务配置 +shorten_url_service_url = os.getenv("SHORTEN_URL_SERVICE_URL", None) +assert shorten_url_service_url, "SHORTEN_URL_SERVICE_URL is not set" + + +async def resolve_short_url(short_url: str) -> str: + """ + 将短链接还原为原始URL + + Args: + short_url: 短链接URL + + Returns: + 原始URL,如果解析失败则返回短链接本身 + """ + # 避免在控制台打印短链接,改用结构化日志 + logger.debug("Resolving short URL") + if not shorten_url_service_url: + return short_url + + try: + # 从短链接中提取短码 + # 短链接格式: http://127.0.0.1:8005/t/AbC123 或 http://127.0.0.1:8005/t/video/AbC123 + parsed_url = urllib.parse.urlparse(short_url) + path_parts = parsed_url.path.strip("/").split("/") + + if len(path_parts) >= 2 and path_parts[0] == "t": + # 调用短链接服务的重定向接口来获取原始URL + async with aiohttp.ClientSession() as session: + # 使用GET请求获取原始URL(短链接服务直接返回原始URL字符串) + async with session.get(short_url) as response: + if response.status == 200: + # 短链接服务直接返回原始URL字符串 + original_url = await response.text() + original_url = original_url.strip().strip('"') + logger.debug( + f"Successfully resolved short URL: {short_url} -> {original_url}" + ) + return original_url + else: + logger.warning( + f"Failed to resolve short URL: {short_url}, status: {response.status}" + ) + return short_url + else: + logger.warning(f"Not a valid short URL format: {short_url}") + return short_url + + except Exception as e: + logger.error(f"Error resolving short URL {short_url}: {e}") + # 如果解析失败,返回原始短链接 + return short_url + + +async def video_combine(video_urls: List[str]) -> Optional[str]: + """ + 合并多个视频URL为一个视频文件 + + Args: + video_urls: 视频URL列表 + + Returns: + 合并后的视频文件路径,如果合并失败则返回None + """ + + # 获取项目根目录 + current_dir = os.path.abspath(__file__) + project_root = os.path.dirname(current_dir) + for _ in range(4): # 向上四级目录到达项目根目录 + project_root = os.path.dirname(project_root) + + # 创建输出目录在项目根目录下 + output_dir = os.path.join(project_root, "merged_videos") + os.makedirs(output_dir, exist_ok=True) + temp_dir = tempfile.mkdtemp(dir=output_dir) + logger.info(f"Created temporary directory: {temp_dir}") + + # 解析短链接 + resolved_urls = [] + for url in video_urls: + resolved_url = await resolve_short_url(url) + # 仅允许 http/https 协议,降低 SSRF 风险 + parsed = urllib.parse.urlparse(resolved_url) + if parsed.scheme not in {"http", "https"}: + logger.warning(f"Skip non-http(s) URL: {resolved_url}") + continue + resolved_urls.append(resolved_url) + + # 下载视频文件 + downloaded_files = [] + + async with aiohttp.ClientSession() as session: + for idx, url in enumerate(resolved_urls): + try: + # 下载视频 + logger.info( + f"Downloading video {idx + 1}/{len(resolved_urls)} from {url}" + ) + + async with session.get(url, allow_redirects=True) as response: + response.raise_for_status() + # 预检查内容大小,防止极端大文件下载 + content_length = response.headers.get("content-length") + max_file_size = 512 * 1024 * 1024 # 512MB 上限 + if content_length is not None: + try: + if int(content_length) > max_file_size: + logger.error( + f"Video size {int(content_length)} exceeds limit {max_file_size}." + ) + return None + except Exception: + # 如果 content-length 无法解析,继续按流式大小校验 + pass + + # 从content-type提取文件扩展名 + content_type = response.headers.get("content-type", "") + file_extension = ".mp4" # 默认扩展名 + if "video" in content_type: + if "mp4" in content_type: + file_extension = ".mp4" + elif "webm" in content_type: + file_extension = ".webm" + elif "ogg" in content_type: + file_extension = ".ogg" + elif "mov" in content_type: + file_extension = ".mov" + + # 生成简单的随机文件名 + temp_file_path = os.path.join( + temp_dir, + f"video_{random.randint(100000, 999999)}{file_extension}", + ) + + # 按流式传输进行大小限制(兜底) + max_file_size = 512 * 1024 * 1024 # 512MB + total_size = 0 + + with open(temp_file_path, "wb") as f: + async for chunk in response.content.iter_chunked(8192): + if chunk: + total_size += len(chunk) + if total_size > max_file_size: + logger.error( + "Video size exceeds 10GB. Download stopped." + ) + return None + f.write(chunk) + + if ( + os.path.exists(temp_file_path) + and os.path.getsize(temp_file_path) > 0 + ): + downloaded_files.append(temp_file_path) + logger.info( + f"Successfully downloaded video {idx + 1} to {temp_file_path}, size: {total_size / 1024 / 1024:.2f} MB" + ) + else: + logger.error( + f"Failed to download video {idx + 1}: file is empty or doesn't exist" + ) + return None + + except Exception as e: + logger.error(f"Error downloading video {idx + 1} from {url}: {e}") + return None + + if not downloaded_files: + logger.error("No videos were successfully downloaded") + return None + + try: + # 合并视频 + logger.info(f"Starting to merge {len(downloaded_files)} videos") + + # 加载所有视频片段 + video_clips = [] + start_times = [] + clip_start_time = 0.0 + + try: + for file_path in downloaded_files: + # 记录每个片段的开始时间 + start_times.append(clip_start_time) + + # 加载视频片段 + clip = VideoFileClip(file_path) + video_clips.append(clip) + + # 更新下一个片段的开始时间 + clip_start_time += clip.duration + + # 为每个视频片段设置开始时间和位置 + clips = [] + for video_clip, start_time in zip(video_clips, start_times): + # 使用 with_start 和 with_position 方法设置片段属性 + positioned_clip = video_clip.with_start(start_time).with_position( + "center" + ) + clips.append(positioned_clip) + + # 使用 CompositeVideoClip 合并所有片段 + final_clip = CompositeVideoClip(clips) + + # 生成输出文件名 + output_file_name = f"merged_video_{uuid.uuid4()}.mp4" + output_file_path = os.path.join(temp_dir, output_file_name) + + # 保存合并后的视频 + logger.info(f"Saving merged video to {output_file_path}") + final_clip.write_videofile( + output_file_path, codec="libx264", audio_codec="aac", threads=4 + ) + finally: + # 确保无论发生什么错误,都关闭所有视频片段 + for clip in video_clips: + try: + if hasattr(clip, "reader") and clip.reader: + clip.reader.close() + if hasattr(clip, "audio_reader") and clip.audio_reader: + clip.audio_reader.close_proc() + clip.audio_reader.close() + clip.close() + except Exception as e: + logger.error(f"Error closing video clip: {e}") + if "final_clip" in locals(): + try: + if hasattr(final_clip, "close"): + final_clip.close() + except Exception as e: + logger.error(f"Error closing final clip: {e}") + + if os.path.exists(output_file_path) and os.path.getsize(output_file_path) > 0: + logger.info(f"Successfully merged video to local path: {output_file_path}") + return output_file_path + else: + logger.error( + f"Merged video file is empty or doesn't exist: {output_file_path}" + ) + return None + + except Exception as e: + logger.error(f"Error merging videos: {e}") + return None diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/video_combine_vod.py b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/video_combine_vod.py new file mode 100644 index 00000000..6586ad28 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/video_combine_vod.py @@ -0,0 +1,270 @@ +import asyncio +import json +import os +from typing import List, Dict, Any +from typing import Optional +import urllib.parse +import aiohttp +import fastmcp +from fastmcp import Client +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + +# 短链接服务配置 +shorten_url_service_url = os.getenv("SHORTEN_URL_SERVICE_URL", None) +assert shorten_url_service_url, "SHORTEN_URL_SERVICE_URL is not set" + + +async def resolve_short_url(short_url: str) -> str: + """ + 将短链接还原为原始URL + + Args: + short_url: 短链接URL + + Returns: + 原始URL,如果解析失败则返回短链接本身 + """ + # 避免在控制台打印短链接,改用结构化日志 + logger.debug("Resolving short URL") + if not shorten_url_service_url: + return short_url + + try: + # 从短链接中提取短码 + # 短链接格式: http://127.0.0.1:8005/t/AbC123 或 http://127.0.0.1:8005/t/video/AbC123 + parsed_url = urllib.parse.urlparse(short_url) + path_parts = parsed_url.path.strip("/").split("/") + + if len(path_parts) >= 2 and path_parts[0] == "t": + # 调用短链接服务的重定向接口来获取原始URL + async with aiohttp.ClientSession() as session: + # 使用GET请求获取原始URL(短链接服务直接返回原始URL字符串) + async with session.get(short_url) as response: + if response.status == 200: + # 短链接服务直接返回原始URL字符串 + original_url = await response.text() + original_url = original_url.strip().strip('"') + logger.debug( + f"Successfully resolved short URL: {short_url} -> {original_url}" + ) + return original_url + else: + logger.warning( + f"Failed to resolve short URL: {short_url}, status: {response.status}" + ) + return short_url + else: + logger.warning(f"Not a valid short URL format: {short_url}") + return short_url + + except Exception as e: + logger.error(f"Error resolving short URL {short_url}: {e}") + # 如果解析失败,返回原始短链接 + return short_url + +vod_mcp_config = { + "mcpServers": { + "mcp-server-vod": { + "command": "uvx", + "args": [ + "--from", + "git+https://github.com/volcengine/mcp-server.git#subdirectory=server/mcp_server_vod", + "mcp-server-vod", + ], + "env": { + "VOLCENGINE_ACCESS_KEY": os.getenv("VOLCENGINE_ACCESS_KEY"), + "VOLCENGINE_SECRET_KEY": os.getenv("VOLCENGINE_SECRET_KEY"), + }, + } + } +} + + +class VodToolSet: + def __init__( + self, + mcp_config: dict, + space_name: Optional[str] = None, + task_polling_interval: int = 20, + max_retries: int = 30, + ): + self.mcp_client = Client(mcp_config) + self.space_name = space_name + self.task_polling_interval = task_polling_interval + self.max_retries = max_retries + + async def list_tools(self): + async with self.mcp_client as client: + response = await client.list_tools() + return response + + async def _call_tools(self, tool_name: str, arguments: dict[str, Any]): + async with self.mcp_client as client: + response = await client.call_tool( + name=tool_name, + arguments=arguments, + ) + + return [ + json.loads(content.model_dump().get("text", "")) + for content in response.content + ] + + async def video_stitching(self, videos_url: list[str]) -> dict: + new_videos_url = [] + for item in videos_url: + item = resolve_short_url(item) + new_videos_url.append(item) + + response = await self._call_tools( + tool_name="audio_video_stitching", + arguments={ + "type": "video", + "SpaceName": self.space_name, + "videos": new_videos_url, + }, + ) + + task_id = response[0]["VCreativeId"] + + for _ in range(self.max_retries): + response = await self._get_task_message(task_id) + status = response.get("Status", "error") + if status in {"success", "failed_run"}: + break + elif status == "error": + return { + "film_url": "", + "success": False, + "message": "视频合成工具繁忙,请重试", + } + else: + await asyncio.sleep(self.task_polling_interval) + else: + return {"url": "", "status": "timeout"} + + return { + "film_url": response.get("OutputJson", {}).get("url", ""), + "success": status == "success", + "message": status, + } + + async def _get_task_message(self, task_id: str) -> dict: + try: + response = await self._call_tools( + tool_name="get_v_creative_task_result", + arguments={"VCreativeId": task_id, "SpaceName": self.space_name}, + ) + status = response[0] + return status + except fastmcp.exceptions.ToolError as e: + logger.error( + f"Error getting task message: fastmcp.exceptions.ToolError: {e}" + ) + return {"Status": "mcp_error"} + + async def generate(self, video_list: list[dict[str, Any]]) -> dict[str, Any]: + """ + 处理视频列表: + { + "video": {"url": "xxx"}, + "audio": {"url": "xxx"} # optional + } + """ + + if not video_list: + raise ValueError("video_list not found") + + videos: list = [] + + for i, shot in enumerate(video_list, start=1): + video_info = shot.get("video", {}) + video_url = video_info.get("url") if isinstance(video_info, dict) else None + if not video_url: + raise ValueError(f"shot[{i}] missing video.url") + + videos.append(video_url) + + video_product = videos + # 第二步:这些视频合并在一起 + result = await self.video_stitching(video_product) + + logger.debug(f"[video_combine] result: {result}") + return result + + + +async def video_combine(video_list: List[Dict[str, Any]]) -> Dict[str, Any]: + """ + Tool Name: + video_combine + + Description: + 该工具用于将多个视频片段(shots)按照给定顺序剪辑拼接为一个完整的视频。 每个视频片段包含其独立的视觉描述(prompt)、动作说明(action)以及视频文件信息。 + 工具可用于自动化生成广告视频、产品展示片或创意短片。 + + Args: + video_list (List[Dict]): + 包含多个视频片段(shot)的列表,每个元素为一个字典,字段说明如下: + - shot_id (str): + 当前镜头的唯一标识符。 + - prompt (str): + 对镜头画面的详细视觉描述,用于说明画面构图、主体、光线、氛围等信息。 + - action (str): + 对镜头运动、转场或特效的文字描述,如“镜头缓慢推进”或“伴随光晕特效”。 + - video (Dict): + 当前镜头对应的视频文件信息,包含: + - id (str): 视频文件在系统中的唯一标识。 + - url (str): 视频文件的可访问 URL(例如对象存储链接)。 + - audio (Dict, optional): + 当前镜头对应的音频文件信息,包含: + - id (str): 音频文件在系统中的唯一标识。 + - url (str): 音频文件的可访问 URL。 + + Returns: + output_video (str): + 拼接完成的视频文件路径或可访问 URL。 + + Example: + >>> video_list = [ + ... { + ... "shot_id": "shot_1", + ... "prompt": "主体为望梅好杨梅汁透明玻璃瓶,瓶内红色杨梅汁清澈可见...", + ... "action": "镜头从全景缓慢旋转推近瓶身", + ... "video": {"id": "1", "url": "https://example.com/video1.mp4"}, + ... "audio": {"id": "1", "url": "https://example.com/audio1.mp3"} + ... }, + ... ... + ... ] + >>> result = video_combine(video_list) + >>> print(result) + { + "film_url": 'https://example.com/merged_video.mp4', + "status": "success" + } + + Notes: + - 所有输入视频应具有兼容的分辨率与帧率,否则需要预处理以统一参数。 + - 工具会根据 video_list 的顺序依次拼接视频。 + """ + vod_tool_set = VodToolSet( + space_name=os.getenv("TOOLS_VOD_SPACE_NAME", None), + task_polling_interval=int( + os.getenv("TOOLS_VOD_TASK_POLLING_INTERVAL", "20") + ), + max_retries=int(os.getenv("TOOLS_VOD_MAX_RETRIES", "60")), + mcp_config=vod_mcp_config, + ) + try: + film_url = await vod_tool_set.generate(video_list) + + return film_url + except Exception as e: + logger.error(f"Failed to generate film: {e}") + return { + "film_url": "", + "success": False, + "message": "视频合成工具执行出错,请重试", + } \ No newline at end of file diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/utils/__init__.py b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/utils/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/utils/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/utils/types.py b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/utils/types.py new file mode 100644 index 00000000..d9c60379 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/release_agent/utils/types.py @@ -0,0 +1,50 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.genai import types +from pydantic import BaseModel, Field +from typing import Optional + +json_response_config = types.GenerateContentConfig( + response_mime_type="application/json", max_output_tokens=18000 +) + +max_output_tokens_config = types.GenerateContentConfig(max_output_tokens=18000) + + +class VideoUrl(BaseModel): + """A video url.""" + + video_url: str = Field(description="The url of the video") + + +class Film(BaseModel): + url: str + + +class Video(BaseModel): + index: int + video_gen_task_id: str + video_url: str + video_data: Optional[bytes] = None + + +class Tone(BaseModel): + index: int + words: str + tone: str + + +class Audio(BaseModel): + index: int + audio_gen_task_id: str + audio_url: str + audio_data: Optional[bytes] = None diff --git a/demohouse/mutimedia/backend/app/release-agent/src/requirements.txt b/demohouse/mutimedia/backend/app/release-agent/src/requirements.txt new file mode 100644 index 00000000..cf8587c8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/release-agent/src/requirements.txt @@ -0,0 +1,6 @@ +veadk-python==0.2.28 +fastapi +uvicorn[standard] +moviepy[ffmpeg]>=2.2.1 +google-adk==1.18.0 +json-repair \ No newline at end of file diff --git a/demohouse/mutimedia/backend/app/short_link/README.md b/demohouse/mutimedia/backend/app/short_link/README.md new file mode 100644 index 00000000..761db323 --- /dev/null +++ b/demohouse/mutimedia/backend/app/short_link/README.md @@ -0,0 +1,180 @@ +# 短链接服务 (Short Link Service) + +一个基于FastAPI的轻量级短链接生成和记录服务,支持两种存储模式:内存字典模式和Redis模式。 + +## 功能特点 + +- 🔗 将长URL转换为短链接 +- 🔄 支持自定义短链接类型(如 `/t/type/shortcode`) +- 📊 自动检测重复URL,避免生成重复短码 +- 💾 双模式存储:内存字典(默认)或Redis +- ⚡ 高性能异步处理 +- 🛡️ 简洁的API接口 + +## 快速开始 + +### 安装依赖 + +```bash +pip install -r requirements.txt +``` + +### 运行服务(默认字典模式) + +```bash +uvicorn app:app --host 0.0.0.0 --port 8000 +``` + +服务启动后,默认使用内存字典模式,无需额外配置。 + +## 存储模式 + +### 1. 字典模式(默认) + +- ✅ 零配置,开箱即用 +- ✅ 无需额外依赖 +- ⚠️ 数据存储在内存中,服务重启后数据会丢失 +- ⚠️ 不适合多实例部署 + +### 2. Redis模式 + +- ✅ 数据持久化 +- ✅ 支持多实例共享数据 +- ✅ 适合生产环境 +- ⚠️ 需要安装和配置Redis + +#### 启用Redis模式 + +1. 安装Redis依赖: +```bash +pip install redis +``` + +2. 设置环境变量: +```bash +export SHORT_LINK_MODE=redis +export REDIS_HOST=localhost +export REDIS_PORT=6379 +export REDIS_PASSWORD=your_password # 可选 +export REDIS_DB=0 # 可选,默认0 +``` + +3. 运行服务: +```bash +uvicorn app:app --host 0.0.0.0 --port 8005 +``` + +## API接口 + +### 生成短链接 + +**POST** `/shorten` + +请求体: +```json +{ + "url": "https://example.com/very/long/url/path", + "type": "blog" // 可选,用于生成 /t/type/shortcode 格式的链接 +} +``` + +响应: +```json +{ + "short_code": "AbC123", + "short_url": "localhost:8005/t/AbC123" +} +``` + +### 短链接跳转 + +**GET** `/t/{short_code}` + +或 + +**GET** `/t/{type}/{short_code}` + +示例: +- `localhost:8000/t/AbC123` → 返回原始长URL +- `localhost:8000/t/blog/AbC123` → 返回原始长URL + +## 环境变量 + +| 变量名 | 说明 | 默认值 | 备注 | +|--------|------|--------|------| +| `SHORT_LINK_MODE` | 存储模式 | `dict` | 可选:`dict` 或 `redis` | +| `SHORT_LINK_DOMAIN` | 短链接域名 | `localhost:8000` | 用于生成完整的短链接 | +| `REDIS_HOST` | Redis主机 | - | Redis模式下必填 | +| `REDIS_PORT` | Redis端口 | `6379` | 可选 | +| `REDIS_PASSWORD` | Redis密码 | - | 可选 | +| `REDIS_DB` | Redis数据库 | `0` | 可选 | +| `REDIS_USERNAME` | Redis用户名 | - | 可选 | + +## 短码算法 + +服务使用62进制编码(0-9, A-Z, a-z)将自增ID转换为短码,确保: + +- 短码长度随ID增长而增长 +- 无冲突,每个ID对应唯一短码 +- 可读性好,避免混淆字符 + +## 注意事项 + +1. **数据持久化**:字典模式下数据只存在于内存,重启服务会清空所有短链接数据 +2. **重复检测**:系统会自动检测重复URL并返回已有的短码 +3. **TTL支持**:Redis模式下支持24小时TTL,字典模式暂不支持过期功能 +4. **性能**:字典模式适合开发测试,Redis模式适合生产环境 + +## 示例代码 + +### Python调用示例 + +```python +import requests + +# 生成短链接 +response = requests.post("http://localhost:8005/shorten", json={ + "url": "https://example.com/very/long/url/path", + "type": "article" +}) +result = response.json() +print(f"短链接:{result['short_url']}") +``` + +### cURL调用示例 + +```bash +# 生成短链接 +curl -X POST "http://localhost:8005/shorten" \ + -H "Content-Type: application/json" \ + -d '{"url": "https://example.com", "type": "blog"}' + +# 访问短链接(会自动跳转) +curl -i "http://localhost:8005/t/AbC123" +``` + +## 部署建议 + +### 开发环境 +- 使用默认的字典模式 +- 适合快速开发和测试 + +### 生产环境 +- 使用Redis模式确保数据持久化 +- 配置域名和环境变量 +- 考虑添加监控和日志 +- 可以部署多个实例实现负载均衡 + +## 故障排查 + +### Redis连接失败 +- 检查Redis服务是否运行 +- 确认网络连接和认证配置 +- 查看控制台错误信息,系统会自动回退到字典模式 + +### 短链接失效 +- 字典模式:检查服务是否重启过 +- Redis模式:检查TTL设置和Redis状态 + +### 端口占用 +- 修改运行端口:`uvicorn app:app --port 8005` \ No newline at end of file diff --git a/demohouse/mutimedia/backend/app/short_link/__init__.py b/demohouse/mutimedia/backend/app/short_link/__init__.py new file mode 100644 index 00000000..5311dfd8 --- /dev/null +++ b/demohouse/mutimedia/backend/app/short_link/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + diff --git a/demohouse/mutimedia/backend/app/short_link/app.py b/demohouse/mutimedia/backend/app/short_link/app.py new file mode 100644 index 00000000..6b957816 --- /dev/null +++ b/demohouse/mutimedia/backend/app/short_link/app.py @@ -0,0 +1,185 @@ +# Copyright (c) 2025 Bytedance Ltd. and/or its affiliates +# Licensed under the 【火山方舟】原型应用软件自用许可协议 +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# https://www.volcengine.com/docs/82379/1433703 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import hashlib +import logging +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel + +# 配置模式 +SHORT_LINK_MODE = os.getenv( + "SHORT_LINK_MODE", "dict" +) # 默认为字典模式,可选值: "redis", "dict" + +# 条件导入Redis +if SHORT_LINK_MODE == "redis": + try: + import redis.asyncio as redis + + REDIS_AVAILABLE = True + except ImportError: + logging.getLogger("short_link").warning( + "Redis模式已选择但未安装redis库,请运行: pip install redis;已回退字典模式" + ) + REDIS_AVAILABLE = False + SHORT_LINK_MODE = "dict" # 回退到字典模式 +else: + REDIS_AVAILABLE = False + +# 轻量日志 +logger = logging.getLogger("short_link") +if not logger.handlers: + logging.basicConfig(level=logging.INFO) + +# 创建FastAPI应用 +app = FastAPI( + title="Short Link Service", + version="1.0.0", + docs_url=None, + redoc_url=None, + openapi_url=None, +) + +# 存储后端初始化 +if SHORT_LINK_MODE == "redis" and REDIS_AVAILABLE: + # 连接Redis + storage_client = redis.Redis( + host=os.getenv("REDIS_HOST"), + port=int(os.getenv("REDIS_PORT", 6379)), + username=os.getenv("REDIS_USERNAME"), + password=os.getenv("REDIS_PASSWORD"), + db=int(os.getenv("REDIS_DB", 0)), + decode_responses=True, + ) +else: + # 使用字典作为存储后端 + logger.info(f"使用字典模式存储短链接 (SHORT_LINK_MODE={SHORT_LINK_MODE})") + # 初始化字典存储 + dict_storage = { + "auto_id_counter": 0, + "long_md5": {}, # long:md5:{md5} -> short_code + "short": {}, # short:{short_code} -> url + } + + # 模拟Redis客户端的异步接口 + class DictStorageClient: + def __init__(self, storage): + self.storage = storage + + async def get(self, key: str): + if key.startswith("long:md5:"): + md5 = key.replace("long:md5:", "") + return self.storage["long_md5"].get(md5) + elif key.startswith("short:"): + short_code = key.replace("short:", "") + return self.storage["short"].get(short_code) + return None + + async def setex(self, key: str, ttl: int, value: str): + # 字典模式不支持TTL,但保留接口兼容性 + if key.startswith("long:md5:"): + md5 = key.replace("long:md5:", "") + self.storage["long_md5"][md5] = value + elif key.startswith("short:"): + short_code = key.replace("short:", "") + self.storage["short"][short_code] = value + + async def incr(self, key: str): + if key == "auto_id:counter": + self.storage["auto_id_counter"] += 1 + return self.storage["auto_id_counter"] + return 0 + + storage_client = DictStorageClient(dict_storage) + +# 进制转换字符集 +CHAR_SET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" +BASE = len(CHAR_SET) + + +def encode_id(unique_id: int) -> str: + """ + 将自增ID转换为短码 + :param unique_id: 自增ID + :return: 短码 + """ + if unique_id == 0: + return CHAR_SET[0] + short_code = [] + while unique_id > 0: + unique_id, remainder = divmod(unique_id, BASE) + short_code.append(CHAR_SET[remainder]) + return "".join(reversed(short_code)) + + +class URLRequest(BaseModel): + url: str + type: str = None + + +@app.post("/shorten", response_model=dict) +async def shorten_url(request: URLRequest): + """ + 生成短链接 + :param url: 原始长URL + :return: 短码和短链接 + """ + # 计算URL的MD5值 + url = request.url + url_md5 = hashlib.md5(url.encode()).hexdigest() + + # 检查长URL是否已经生成过短码 + existing_short_code = await storage_client.get(f"long:md5:{url_md5}") + if existing_short_code: + domain = os.getenv("SHORT_LINK_DOMAIN", "http://localhost:8005") + if request.type: + short_url = f"{domain}/t/{request.type}/{existing_short_code}" + else: + short_url = f"{domain}/t/{existing_short_code}" + return { + "short_code": existing_short_code, + "short_url": short_url, + } + + # 获取自增ID + unique_id = await storage_client.incr("auto_id:counter") + + # 将自增ID转换为短码 + short_code = encode_id(unique_id) + + # 存储三个核心映射 + await storage_client.setex(f"long:md5:{url_md5}", 24 * 3600, short_code) + await storage_client.setex(f"short:{short_code}", 24 * 3600, url) + + # 返回结果 + domain = os.getenv("SHORT_LINK_DOMAIN", "http://localhost:8005") + if request.type: + short_url = f"{domain}/t/{request.type}/{short_code}" + else: + short_url = f"{domain}/t/{short_code}" + return {"short_code": short_code, "short_url": short_url} + + +@app.get("/t/{short_code}") +@app.get("/t/{type}/{short_code}") +async def redirect_url(short_code: str, type: str = None): + """ + 短链接跳转 + :param type: 资源类型 (可选) + :param short_code: 短码 + :return: 重定向到原始长URL + """ + # 获取原始长URL + url = await storage_client.get(f"short:{short_code}") + if not url: + raise HTTPException(status_code=404, detail="Short code not found") + return url.strip('"') diff --git a/demohouse/mutimedia/backend/app/short_link/requirements.txt b/demohouse/mutimedia/backend/app/short_link/requirements.txt new file mode 100644 index 00000000..d8e10ee3 --- /dev/null +++ b/demohouse/mutimedia/backend/app/short_link/requirements.txt @@ -0,0 +1,6 @@ +fastapi==0.104.1 +uvicorn[standard]==0.24.0 + +# Redis支持(可选) +# 如果需要使用Redis模式,请取消下面的注释: +# redis>=4.0.0 \ No newline at end of file diff --git a/demohouse/mutimedia/backend/pyproject.toml b/demohouse/mutimedia/backend/pyproject.toml new file mode 100644 index 00000000..4d22ca38 --- /dev/null +++ b/demohouse/mutimedia/backend/pyproject.toml @@ -0,0 +1,16 @@ +[project] +name = "veadk-multimedia-sample" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ + "google-adk==1.18.0", + "veadk-python==0.2.28", + "json-repair>=0.54.1", + "aiohttp", + "uvicorn>=0.38.0", + "moviepy[ffmpeg]>=2.2.1", + "playwright>=1.57.0", + "requests>=2.32.5", +] From 4bf9e278d7f159169de857d84008f5f52e8f87eb Mon Sep 17 00:00:00 2001 From: "hanzhi.421" Date: Mon, 15 Dec 2025 14:41:03 +0800 Subject: [PATCH 2/3] fix(demohouse/multimedia): multimedia agent backend --- demohouse/{mutimedia => multimedia}/README.md | 0 demohouse/{mutimedia => multimedia}/backend/.gitignore | 0 demohouse/{mutimedia => multimedia}/backend/app/__init__.py | 0 .../backend/app/director-agent/__init__.py | 0 .../backend/app/director-agent/config.yaml.example | 0 .../backend/app/director-agent/src/README.md | 0 .../backend/app/director-agent/src/__init__.py | 0 .../backend/app/director-agent/src/agent.py | 0 .../backend/app/director-agent/src/app.py | 0 .../backend/app/director-agent/src/director_agent/__init__.py | 0 .../backend/app/director-agent/src/director_agent/agent.py | 0 .../app/director-agent/src/director_agent/hook/__init__.py | 0 .../director-agent/src/director_agent/hook/check_and_raise.py | 0 .../app/director-agent/src/director_agent/hook/format_hook.py | 0 .../app/director-agent/src/director_agent/hook/shorten_url.py | 0 .../backend/app/director-agent/src/director_agent/prompt.py | 0 .../director-agent/src/director_agent/sub_agents/__init__.py | 0 .../src/director_agent/sub_agents/image/__init__.py | 0 .../director-agent/src/director_agent/sub_agents/image/agent.py | 0 .../src/director_agent/sub_agents/storyboard/__init__.py | 0 .../src/director_agent/sub_agents/storyboard/agent.py | 0 .../src/director_agent/sub_agents/video/__init__.py | 0 .../director-agent/src/director_agent/sub_agents/video/agent.py | 0 .../app/director-agent/src/director_agent/tools/__init__.py | 0 .../src/director_agent/tools/image_generate_builtin_fix.py | 0 .../src/director_agent/tools/image_generate_gather.py | 0 .../src/director_agent/tools/video_generate_http.py | 0 .../app/director-agent/src/director_agent/utils/__init__.py | 0 .../app/director-agent/src/director_agent/utils/types.py | 0 .../backend/app/director-agent/src/requirements.txt | 2 +- .../backend/app/evaluate-agent/__init__.py | 0 .../backend/app/evaluate-agent/config.yaml.example | 0 .../backend/app/evaluate-agent/src/README.md | 0 .../backend/app/evaluate-agent/src/__init__.py | 0 .../backend/app/evaluate-agent/src/agent.py | 0 .../backend/app/evaluate-agent/src/app.py | 0 .../backend/app/evaluate-agent/src/evaluate_agent/__init__.py | 0 .../backend/app/evaluate-agent/src/evaluate_agent/agent.py | 0 .../app/evaluate-agent/src/evaluate_agent/hook/__init__.py | 0 .../src/evaluate_agent/hook/direct_output_callback.py | 0 .../backend/app/evaluate-agent/src/evaluate_agent/prompt.py | 0 .../app/evaluate-agent/src/evaluate_agent/tools/__init__.py | 0 .../app/evaluate-agent/src/evaluate_agent/tools/geval.py | 0 .../app/evaluate-agent/src/evaluate_agent/utils/__init__.py | 0 .../app/evaluate-agent/src/evaluate_agent/utils/types.py | 0 .../backend/app/evaluate-agent/src/requirements.txt | 0 demohouse/{mutimedia => multimedia}/backend/app/main.py | 0 .../backend/app/market-agent/__init__.py | 0 .../backend/app/market-agent/config.yaml.example | 0 .../backend/app/market-agent/src/README.md | 0 .../backend/app/market-agent/src/__init__.py | 0 .../backend/app/market-agent/src/agent.py | 0 .../backend/app/market-agent/src/app.py | 0 .../backend/app/market-agent/src/market_agent/__init__.py | 0 .../backend/app/market-agent/src/market_agent/agent.py | 0 .../backend/app/market-agent/src/market_agent/hook/__init__.py | 0 .../app/market-agent/src/market_agent/hook/format_hook.py | 0 .../backend/app/market-agent/src/market_agent/prompt.py | 0 .../backend/app/market-agent/src/market_agent/tools/__init__.py | 0 .../app/market-agent/src/market_agent/tools/filter_by_llm.py | 0 .../app/market-agent/src/market_agent/tools/image_understand.py | 0 .../backend/app/market-agent/src/market_agent/tools/is_image.py | 0 .../app/market-agent/src/market_agent/tools/link_reader.py | 0 .../app/market-agent/src/market_agent/tools/web_parse.py | 0 .../app/market-agent/src/market_agent/tools/web_parser_local.py | 0 .../backend/app/market-agent/src/market_agent/utils/__init__.py | 0 .../backend/app/market-agent/src/market_agent/utils/types.py | 0 .../backend/app/market-agent/src/requirements.txt | 0 .../backend/app/multimedia-agent/__init__.py | 0 .../backend/app/multimedia-agent/config.yaml.example | 0 .../backend/app/multimedia-agent/src/__init__.py | 0 .../backend/app/multimedia-agent/src/agentkit.py | 0 .../app/multimedia-agent/src/multimedia_agent/__init__.py | 0 .../backend/app/multimedia-agent/src/multimedia_agent/agent.py | 0 .../backend/app/multimedia-agent/src/multimedia_agent/prompt.py | 0 .../backend/app/multimedia-agent/src/requirements.txt | 0 .../backend/app/multimedia-agent/src/server.py | 0 .../backend/app/release-agent/__init__.py | 0 .../backend/app/release-agent/config.yaml.example | 0 .../backend/app/release-agent/src/README.md | 0 .../backend/app/release-agent/src/__init__.py | 0 .../backend/app/release-agent/src/agent.py | 0 .../backend/app/release-agent/src/app.py | 0 .../backend/app/release-agent/src/release_agent/__init__.py | 0 .../backend/app/release-agent/src/release_agent/agent.py | 0 .../app/release-agent/src/release_agent/hook/__init__.py | 0 .../app/release-agent/src/release_agent/hook/format_hook.py | 0 .../backend/app/release-agent/src/release_agent/prompt.py | 0 .../src/release_agent/sub_agents/film_agent/__init__.py | 0 .../src/release_agent/sub_agents/film_agent/agent.py | 0 .../app/release-agent/src/release_agent/tools/__init__.py | 0 .../app/release-agent/src/release_agent/tools/video_combine.py | 0 .../release-agent/src/release_agent/tools/video_combine_vod.py | 0 .../app/release-agent/src/release_agent/utils/__init__.py | 0 .../backend/app/release-agent/src/release_agent/utils/types.py | 0 .../backend/app/release-agent/src/requirements.txt | 0 .../{mutimedia => multimedia}/backend/app/short_link/README.md | 0 .../backend/app/short_link/__init__.py | 0 .../{mutimedia => multimedia}/backend/app/short_link/app.py | 0 .../backend/app/short_link/requirements.txt | 0 demohouse/{mutimedia => multimedia}/backend/pyproject.toml | 0 101 files changed, 1 insertion(+), 1 deletion(-) rename demohouse/{mutimedia => multimedia}/README.md (100%) rename demohouse/{mutimedia => multimedia}/backend/.gitignore (100%) rename demohouse/{mutimedia => multimedia}/backend/app/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/config.yaml.example (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/README.md (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/agent.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/app.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/agent.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/hook/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/hook/check_and_raise.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/hook/format_hook.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/hook/shorten_url.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/prompt.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/sub_agents/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/sub_agents/image/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/sub_agents/image/agent.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/sub_agents/storyboard/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/sub_agents/storyboard/agent.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/sub_agents/video/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/sub_agents/video/agent.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/tools/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/tools/image_generate_builtin_fix.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/tools/image_generate_gather.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/tools/video_generate_http.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/utils/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/director_agent/utils/types.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/director-agent/src/requirements.txt (83%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/config.yaml.example (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/README.md (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/agent.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/app.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/evaluate_agent/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/evaluate_agent/agent.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/evaluate_agent/hook/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/evaluate_agent/hook/direct_output_callback.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/evaluate_agent/prompt.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/evaluate_agent/tools/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/evaluate_agent/tools/geval.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/evaluate_agent/utils/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/evaluate_agent/utils/types.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/evaluate-agent/src/requirements.txt (100%) rename demohouse/{mutimedia => multimedia}/backend/app/main.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/config.yaml.example (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/README.md (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/agent.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/app.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/agent.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/hook/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/hook/format_hook.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/prompt.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/tools/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/tools/filter_by_llm.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/tools/image_understand.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/tools/is_image.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/tools/link_reader.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/tools/web_parse.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/tools/web_parser_local.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/utils/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/market_agent/utils/types.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/market-agent/src/requirements.txt (100%) rename demohouse/{mutimedia => multimedia}/backend/app/multimedia-agent/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/multimedia-agent/config.yaml.example (100%) rename demohouse/{mutimedia => multimedia}/backend/app/multimedia-agent/src/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/multimedia-agent/src/agentkit.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/multimedia-agent/src/multimedia_agent/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/multimedia-agent/src/multimedia_agent/agent.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/multimedia-agent/src/multimedia_agent/prompt.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/multimedia-agent/src/requirements.txt (100%) rename demohouse/{mutimedia => multimedia}/backend/app/multimedia-agent/src/server.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/config.yaml.example (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/README.md (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/agent.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/app.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/release_agent/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/release_agent/agent.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/release_agent/hook/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/release_agent/hook/format_hook.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/release_agent/prompt.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/release_agent/sub_agents/film_agent/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/release_agent/sub_agents/film_agent/agent.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/release_agent/tools/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/release_agent/tools/video_combine.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/release_agent/tools/video_combine_vod.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/release_agent/utils/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/release_agent/utils/types.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/release-agent/src/requirements.txt (100%) rename demohouse/{mutimedia => multimedia}/backend/app/short_link/README.md (100%) rename demohouse/{mutimedia => multimedia}/backend/app/short_link/__init__.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/short_link/app.py (100%) rename demohouse/{mutimedia => multimedia}/backend/app/short_link/requirements.txt (100%) rename demohouse/{mutimedia => multimedia}/backend/pyproject.toml (100%) diff --git a/demohouse/mutimedia/README.md b/demohouse/multimedia/README.md similarity index 100% rename from demohouse/mutimedia/README.md rename to demohouse/multimedia/README.md diff --git a/demohouse/mutimedia/backend/.gitignore b/demohouse/multimedia/backend/.gitignore similarity index 100% rename from demohouse/mutimedia/backend/.gitignore rename to demohouse/multimedia/backend/.gitignore diff --git a/demohouse/mutimedia/backend/app/__init__.py b/demohouse/multimedia/backend/app/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/__init__.py rename to demohouse/multimedia/backend/app/__init__.py diff --git a/demohouse/mutimedia/backend/app/director-agent/__init__.py b/demohouse/multimedia/backend/app/director-agent/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/__init__.py rename to demohouse/multimedia/backend/app/director-agent/__init__.py diff --git a/demohouse/mutimedia/backend/app/director-agent/config.yaml.example b/demohouse/multimedia/backend/app/director-agent/config.yaml.example similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/config.yaml.example rename to demohouse/multimedia/backend/app/director-agent/config.yaml.example diff --git a/demohouse/mutimedia/backend/app/director-agent/src/README.md b/demohouse/multimedia/backend/app/director-agent/src/README.md similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/README.md rename to demohouse/multimedia/backend/app/director-agent/src/README.md diff --git a/demohouse/mutimedia/backend/app/director-agent/src/__init__.py b/demohouse/multimedia/backend/app/director-agent/src/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/__init__.py rename to demohouse/multimedia/backend/app/director-agent/src/__init__.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/agent.py b/demohouse/multimedia/backend/app/director-agent/src/agent.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/agent.py rename to demohouse/multimedia/backend/app/director-agent/src/agent.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/app.py b/demohouse/multimedia/backend/app/director-agent/src/app.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/app.py rename to demohouse/multimedia/backend/app/director-agent/src/app.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/__init__.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/__init__.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/__init__.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/agent.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/agent.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/agent.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/agent.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/__init__.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/hook/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/__init__.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/hook/__init__.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/check_and_raise.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/hook/check_and_raise.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/check_and_raise.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/hook/check_and_raise.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/format_hook.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/hook/format_hook.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/format_hook.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/hook/format_hook.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/shorten_url.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/hook/shorten_url.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/hook/shorten_url.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/hook/shorten_url.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/prompt.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/prompt.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/prompt.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/prompt.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/__init__.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/__init__.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/__init__.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/image/__init__.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/image/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/image/__init__.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/image/__init__.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/image/agent.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/image/agent.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/image/agent.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/image/agent.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/__init__.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/__init__.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/__init__.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/agent.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/agent.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/agent.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/storyboard/agent.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/video/__init__.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/video/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/video/__init__.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/video/__init__.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/video/agent.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/video/agent.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/sub_agents/video/agent.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/sub_agents/video/agent.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/__init__.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/tools/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/__init__.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/tools/__init__.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/image_generate_builtin_fix.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/tools/image_generate_builtin_fix.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/image_generate_builtin_fix.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/tools/image_generate_builtin_fix.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/image_generate_gather.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/tools/image_generate_gather.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/image_generate_gather.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/tools/image_generate_gather.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/video_generate_http.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/tools/video_generate_http.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/tools/video_generate_http.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/tools/video_generate_http.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/utils/__init__.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/utils/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/utils/__init__.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/utils/__init__.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/director_agent/utils/types.py b/demohouse/multimedia/backend/app/director-agent/src/director_agent/utils/types.py similarity index 100% rename from demohouse/mutimedia/backend/app/director-agent/src/director_agent/utils/types.py rename to demohouse/multimedia/backend/app/director-agent/src/director_agent/utils/types.py diff --git a/demohouse/mutimedia/backend/app/director-agent/src/requirements.txt b/demohouse/multimedia/backend/app/director-agent/src/requirements.txt similarity index 83% rename from demohouse/mutimedia/backend/app/director-agent/src/requirements.txt rename to demohouse/multimedia/backend/app/director-agent/src/requirements.txt index 119c4e07..2b5658fb 100644 --- a/demohouse/mutimedia/backend/app/director-agent/src/requirements.txt +++ b/demohouse/multimedia/backend/app/director-agent/src/requirements.txt @@ -1,4 +1,4 @@ -veadk-python==0.2.27 +veadk-python==0.2.28 # git+https://github.com/volcengine/veadk-python.git fastapi uvicorn[standard] diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/__init__.py b/demohouse/multimedia/backend/app/evaluate-agent/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/__init__.py rename to demohouse/multimedia/backend/app/evaluate-agent/__init__.py diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/config.yaml.example b/demohouse/multimedia/backend/app/evaluate-agent/config.yaml.example similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/config.yaml.example rename to demohouse/multimedia/backend/app/evaluate-agent/config.yaml.example diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/README.md b/demohouse/multimedia/backend/app/evaluate-agent/src/README.md similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/README.md rename to demohouse/multimedia/backend/app/evaluate-agent/src/README.md diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/__init__.py b/demohouse/multimedia/backend/app/evaluate-agent/src/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/__init__.py rename to demohouse/multimedia/backend/app/evaluate-agent/src/__init__.py diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/agent.py b/demohouse/multimedia/backend/app/evaluate-agent/src/agent.py similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/agent.py rename to demohouse/multimedia/backend/app/evaluate-agent/src/agent.py diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/app.py b/demohouse/multimedia/backend/app/evaluate-agent/src/app.py similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/app.py rename to demohouse/multimedia/backend/app/evaluate-agent/src/app.py diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/__init__.py b/demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/__init__.py rename to demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/__init__.py diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/agent.py b/demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/agent.py similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/agent.py rename to demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/agent.py diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/__init__.py b/demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/__init__.py rename to demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/__init__.py diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/direct_output_callback.py b/demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/direct_output_callback.py similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/direct_output_callback.py rename to demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/hook/direct_output_callback.py diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/prompt.py b/demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/prompt.py similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/prompt.py rename to demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/prompt.py diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/__init__.py b/demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/__init__.py rename to demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/__init__.py diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/geval.py b/demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/geval.py similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/geval.py rename to demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/tools/geval.py diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/__init__.py b/demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/__init__.py rename to demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/__init__.py diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/types.py b/demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/types.py similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/types.py rename to demohouse/multimedia/backend/app/evaluate-agent/src/evaluate_agent/utils/types.py diff --git a/demohouse/mutimedia/backend/app/evaluate-agent/src/requirements.txt b/demohouse/multimedia/backend/app/evaluate-agent/src/requirements.txt similarity index 100% rename from demohouse/mutimedia/backend/app/evaluate-agent/src/requirements.txt rename to demohouse/multimedia/backend/app/evaluate-agent/src/requirements.txt diff --git a/demohouse/mutimedia/backend/app/main.py b/demohouse/multimedia/backend/app/main.py similarity index 100% rename from demohouse/mutimedia/backend/app/main.py rename to demohouse/multimedia/backend/app/main.py diff --git a/demohouse/mutimedia/backend/app/market-agent/__init__.py b/demohouse/multimedia/backend/app/market-agent/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/__init__.py rename to demohouse/multimedia/backend/app/market-agent/__init__.py diff --git a/demohouse/mutimedia/backend/app/market-agent/config.yaml.example b/demohouse/multimedia/backend/app/market-agent/config.yaml.example similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/config.yaml.example rename to demohouse/multimedia/backend/app/market-agent/config.yaml.example diff --git a/demohouse/mutimedia/backend/app/market-agent/src/README.md b/demohouse/multimedia/backend/app/market-agent/src/README.md similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/README.md rename to demohouse/multimedia/backend/app/market-agent/src/README.md diff --git a/demohouse/mutimedia/backend/app/market-agent/src/__init__.py b/demohouse/multimedia/backend/app/market-agent/src/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/__init__.py rename to demohouse/multimedia/backend/app/market-agent/src/__init__.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/agent.py b/demohouse/multimedia/backend/app/market-agent/src/agent.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/agent.py rename to demohouse/multimedia/backend/app/market-agent/src/agent.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/app.py b/demohouse/multimedia/backend/app/market-agent/src/app.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/app.py rename to demohouse/multimedia/backend/app/market-agent/src/app.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/__init__.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/__init__.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/__init__.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/agent.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/agent.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/agent.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/agent.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/hook/__init__.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/hook/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/hook/__init__.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/hook/__init__.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/hook/format_hook.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/hook/format_hook.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/hook/format_hook.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/hook/format_hook.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/prompt.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/prompt.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/prompt.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/prompt.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/__init__.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/__init__.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/__init__.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/filter_by_llm.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/filter_by_llm.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/filter_by_llm.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/filter_by_llm.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/image_understand.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/image_understand.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/image_understand.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/image_understand.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/is_image.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/is_image.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/is_image.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/is_image.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/link_reader.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/link_reader.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/link_reader.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/link_reader.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/web_parse.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/web_parse.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/web_parse.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/web_parse.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/web_parser_local.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/web_parser_local.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/tools/web_parser_local.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/tools/web_parser_local.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/utils/__init__.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/utils/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/utils/__init__.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/utils/__init__.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/market_agent/utils/types.py b/demohouse/multimedia/backend/app/market-agent/src/market_agent/utils/types.py similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/market_agent/utils/types.py rename to demohouse/multimedia/backend/app/market-agent/src/market_agent/utils/types.py diff --git a/demohouse/mutimedia/backend/app/market-agent/src/requirements.txt b/demohouse/multimedia/backend/app/market-agent/src/requirements.txt similarity index 100% rename from demohouse/mutimedia/backend/app/market-agent/src/requirements.txt rename to demohouse/multimedia/backend/app/market-agent/src/requirements.txt diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/__init__.py b/demohouse/multimedia/backend/app/multimedia-agent/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/multimedia-agent/__init__.py rename to demohouse/multimedia/backend/app/multimedia-agent/__init__.py diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/config.yaml.example b/demohouse/multimedia/backend/app/multimedia-agent/config.yaml.example similarity index 100% rename from demohouse/mutimedia/backend/app/multimedia-agent/config.yaml.example rename to demohouse/multimedia/backend/app/multimedia-agent/config.yaml.example diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/__init__.py b/demohouse/multimedia/backend/app/multimedia-agent/src/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/multimedia-agent/src/__init__.py rename to demohouse/multimedia/backend/app/multimedia-agent/src/__init__.py diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/agentkit.py b/demohouse/multimedia/backend/app/multimedia-agent/src/agentkit.py similarity index 100% rename from demohouse/mutimedia/backend/app/multimedia-agent/src/agentkit.py rename to demohouse/multimedia/backend/app/multimedia-agent/src/agentkit.py diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/__init__.py b/demohouse/multimedia/backend/app/multimedia-agent/src/multimedia_agent/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/__init__.py rename to demohouse/multimedia/backend/app/multimedia-agent/src/multimedia_agent/__init__.py diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/agent.py b/demohouse/multimedia/backend/app/multimedia-agent/src/multimedia_agent/agent.py similarity index 100% rename from demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/agent.py rename to demohouse/multimedia/backend/app/multimedia-agent/src/multimedia_agent/agent.py diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/prompt.py b/demohouse/multimedia/backend/app/multimedia-agent/src/multimedia_agent/prompt.py similarity index 100% rename from demohouse/mutimedia/backend/app/multimedia-agent/src/multimedia_agent/prompt.py rename to demohouse/multimedia/backend/app/multimedia-agent/src/multimedia_agent/prompt.py diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/requirements.txt b/demohouse/multimedia/backend/app/multimedia-agent/src/requirements.txt similarity index 100% rename from demohouse/mutimedia/backend/app/multimedia-agent/src/requirements.txt rename to demohouse/multimedia/backend/app/multimedia-agent/src/requirements.txt diff --git a/demohouse/mutimedia/backend/app/multimedia-agent/src/server.py b/demohouse/multimedia/backend/app/multimedia-agent/src/server.py similarity index 100% rename from demohouse/mutimedia/backend/app/multimedia-agent/src/server.py rename to demohouse/multimedia/backend/app/multimedia-agent/src/server.py diff --git a/demohouse/mutimedia/backend/app/release-agent/__init__.py b/demohouse/multimedia/backend/app/release-agent/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/__init__.py rename to demohouse/multimedia/backend/app/release-agent/__init__.py diff --git a/demohouse/mutimedia/backend/app/release-agent/config.yaml.example b/demohouse/multimedia/backend/app/release-agent/config.yaml.example similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/config.yaml.example rename to demohouse/multimedia/backend/app/release-agent/config.yaml.example diff --git a/demohouse/mutimedia/backend/app/release-agent/src/README.md b/demohouse/multimedia/backend/app/release-agent/src/README.md similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/README.md rename to demohouse/multimedia/backend/app/release-agent/src/README.md diff --git a/demohouse/mutimedia/backend/app/release-agent/src/__init__.py b/demohouse/multimedia/backend/app/release-agent/src/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/__init__.py rename to demohouse/multimedia/backend/app/release-agent/src/__init__.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/agent.py b/demohouse/multimedia/backend/app/release-agent/src/agent.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/agent.py rename to demohouse/multimedia/backend/app/release-agent/src/agent.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/app.py b/demohouse/multimedia/backend/app/release-agent/src/app.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/app.py rename to demohouse/multimedia/backend/app/release-agent/src/app.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/__init__.py b/demohouse/multimedia/backend/app/release-agent/src/release_agent/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/release_agent/__init__.py rename to demohouse/multimedia/backend/app/release-agent/src/release_agent/__init__.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/agent.py b/demohouse/multimedia/backend/app/release-agent/src/release_agent/agent.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/release_agent/agent.py rename to demohouse/multimedia/backend/app/release-agent/src/release_agent/agent.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/hook/__init__.py b/demohouse/multimedia/backend/app/release-agent/src/release_agent/hook/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/release_agent/hook/__init__.py rename to demohouse/multimedia/backend/app/release-agent/src/release_agent/hook/__init__.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/hook/format_hook.py b/demohouse/multimedia/backend/app/release-agent/src/release_agent/hook/format_hook.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/release_agent/hook/format_hook.py rename to demohouse/multimedia/backend/app/release-agent/src/release_agent/hook/format_hook.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/prompt.py b/demohouse/multimedia/backend/app/release-agent/src/release_agent/prompt.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/release_agent/prompt.py rename to demohouse/multimedia/backend/app/release-agent/src/release_agent/prompt.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/__init__.py b/demohouse/multimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/__init__.py rename to demohouse/multimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/__init__.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/agent.py b/demohouse/multimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/agent.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/agent.py rename to demohouse/multimedia/backend/app/release-agent/src/release_agent/sub_agents/film_agent/agent.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/__init__.py b/demohouse/multimedia/backend/app/release-agent/src/release_agent/tools/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/__init__.py rename to demohouse/multimedia/backend/app/release-agent/src/release_agent/tools/__init__.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/video_combine.py b/demohouse/multimedia/backend/app/release-agent/src/release_agent/tools/video_combine.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/video_combine.py rename to demohouse/multimedia/backend/app/release-agent/src/release_agent/tools/video_combine.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/video_combine_vod.py b/demohouse/multimedia/backend/app/release-agent/src/release_agent/tools/video_combine_vod.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/release_agent/tools/video_combine_vod.py rename to demohouse/multimedia/backend/app/release-agent/src/release_agent/tools/video_combine_vod.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/utils/__init__.py b/demohouse/multimedia/backend/app/release-agent/src/release_agent/utils/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/release_agent/utils/__init__.py rename to demohouse/multimedia/backend/app/release-agent/src/release_agent/utils/__init__.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/release_agent/utils/types.py b/demohouse/multimedia/backend/app/release-agent/src/release_agent/utils/types.py similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/release_agent/utils/types.py rename to demohouse/multimedia/backend/app/release-agent/src/release_agent/utils/types.py diff --git a/demohouse/mutimedia/backend/app/release-agent/src/requirements.txt b/demohouse/multimedia/backend/app/release-agent/src/requirements.txt similarity index 100% rename from demohouse/mutimedia/backend/app/release-agent/src/requirements.txt rename to demohouse/multimedia/backend/app/release-agent/src/requirements.txt diff --git a/demohouse/mutimedia/backend/app/short_link/README.md b/demohouse/multimedia/backend/app/short_link/README.md similarity index 100% rename from demohouse/mutimedia/backend/app/short_link/README.md rename to demohouse/multimedia/backend/app/short_link/README.md diff --git a/demohouse/mutimedia/backend/app/short_link/__init__.py b/demohouse/multimedia/backend/app/short_link/__init__.py similarity index 100% rename from demohouse/mutimedia/backend/app/short_link/__init__.py rename to demohouse/multimedia/backend/app/short_link/__init__.py diff --git a/demohouse/mutimedia/backend/app/short_link/app.py b/demohouse/multimedia/backend/app/short_link/app.py similarity index 100% rename from demohouse/mutimedia/backend/app/short_link/app.py rename to demohouse/multimedia/backend/app/short_link/app.py diff --git a/demohouse/mutimedia/backend/app/short_link/requirements.txt b/demohouse/multimedia/backend/app/short_link/requirements.txt similarity index 100% rename from demohouse/mutimedia/backend/app/short_link/requirements.txt rename to demohouse/multimedia/backend/app/short_link/requirements.txt diff --git a/demohouse/mutimedia/backend/pyproject.toml b/demohouse/multimedia/backend/pyproject.toml similarity index 100% rename from demohouse/mutimedia/backend/pyproject.toml rename to demohouse/multimedia/backend/pyproject.toml From 29a4f4033478ec82190243268365ebbba78d2d29 Mon Sep 17 00:00:00 2001 From: "hanzhi.421" Date: Mon, 15 Dec 2025 19:26:49 +0800 Subject: [PATCH 3/3] fix(demohouse/multimedia): multimedia agent backend --- demohouse/multimedia/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/demohouse/multimedia/README.md b/demohouse/multimedia/README.md index c8d2baca..a7c37791 100644 --- a/demohouse/multimedia/README.md +++ b/demohouse/multimedia/README.md @@ -7,11 +7,11 @@ ### 费用说明 -| 相关服务 | 描述 | 计费说明 | -|-------------------------------------------------------------------------------------------------------------|---------------------------------------| --- | -| [Doubao-Seed-1.6](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-seed-1-6) | 负责对实时捕捉的屏幕截图进行视觉内容理解,结合当前画面进行深度思考并回答。 | [多种计费方式](https://www.volcengine.com/docs/82379/1099320) -| [Doubao-Seedance 1.0 pro](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-seedance-1-0-pro) | 负责将图片和文字描述转为视频。 | [多种计费方式](https://www.volcengine.com/docs/82379/1099320) |\ -| [Doubao-Seedream 4.5 pro](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-seedream-4-5) | 负责根据文字或参考图生成图片 | [多种计费方式](https://www.volcengine.com/docs/82379/1099320) |\ +| 相关服务 | 描述 | 计费说明 | +|-------------------------------------------------------------------------------------------------------------|-------------------| --- | +| [Doubao-Seed-1.6](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-seed-1-6) | 负责理解用户信息并转化为工具调用。 | [多种计费方式](https://www.volcengine.com/docs/82379/1099320) +| [Doubao-Seedance 1.0 pro](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-seedance-1-0-pro) | 负责将图片和文字描述转为视频。 | [多种计费方式](https://www.volcengine.com/docs/82379/1099320) |\ +| [Doubao-Seedream 4.5 pro](https://console.volcengine.com/ark/region:ark+cn-beijing/model/detail?Id=doubao-seedream-4-5) | 负责根据文字或参考图生成图片 | [多种计费方式](https://www.volcengine.com/docs/82379/1099320) |\ ## 环境准备