diff --git a/.dockerignore b/.dockerignore index c9663b50..1e0620b0 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,5 +1,8 @@ .git .gitignore +fastapi_app/.env +deploy/docker.env +deploy/profiles/*.env __pycache__/ *.pyc *.pyo diff --git a/.gitignore b/.gitignore index fae185e7..5bd91f60 100644 --- a/.gitignore +++ b/.gitignore @@ -328,3 +328,4 @@ deploy/precise_clean.sh # Runtime temp directories /tmp*/ /outputs/system/tmp/ +deploy/docker.env diff --git a/DEPLOY.md b/DEPLOY.md index 1a55cc75..bf188c25 100644 --- a/DEPLOY.md +++ b/DEPLOY.md @@ -1,7 +1,52 @@ # Paper2Any Deployment +## 0. 运行前依赖边界 + +这个项目现在需要区分 4 类依赖,不要再把它们都塞进一个 requirements 里: + +- `requirements-base.txt` + 通用 Python 运行时依赖。 +- `requirements-paper.txt` + 论文 / PDF / 科研绘图相关额外 Python 包。 +- `requirements-cu12.txt` + NVIDIA Linux + CUDA 12 的额外 GPU 运行时包。 +- `requirements-system-ubuntu.txt` + Ubuntu/Debian 系统工具包名,不是 Python 包。 + +几个关键事实: + +- `ffmpeg` +- `libreoffice/soffice` +- `inkscape` +- `poppler-utils` +- `wkhtmltopdf` +- `tectonic` + +这些都不是 `pip` 包。 + +当前 `deploy/start.sh` / `deploy/start_nv.sh` / `deploy/start_muxi.sh` 只负责: + +- 读取 profile +- 选择 Python +- 校验部分 Python 运行时 +- 启动模型服务 / 后端 / 前端 + +它们**不会自动安装系统包**,也**不会自动安装 npm / conda / pip 依赖**。 + ## 1. 配置文件职责 +## 0. 先决定用哪套 `.env` + +现在推荐先做这个选择: + +- **粗粒度模式**:`fastapi_app/.env.simple.example` + `frontend-workflow/.env.simple.example` +- **细粒度模式**:`fastapi_app/.env.example` + `frontend-workflow/.env.example` + +建议: + +- 大多数部署直接先用粗粒度模式 +- 只有需要逐个 workflow 控模型/provider 时,再切细粒度模式 + 这个项目现在只保留三类配置文件,各管各的,不要重复写同一套 URL / key。 ### `fastapi_app/.env` @@ -264,6 +309,10 @@ bash deploy/stop_stack.sh ### 仅启动后端 ```bash +set -a +source deploy/profiles/nv.env +set +a + bash deploy/start.sh ``` diff --git a/Dockerfile b/Dockerfile index e473cc24..e32d69b5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,22 @@ -FROM python:3.11-slim +ARG PYTHON_BASE_IMAGE=python:3.11-slim +FROM ${PYTHON_BASE_IMAGE} + +ARG INSTALL_CUDA=0 WORKDIR /app ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PIP_NO_CACHE_DIR=1 \ - PIP_DISABLE_PIP_VERSION_CHECK=1 + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_ROOT_USER_ACTION=ignore \ + PAPER2ANY_RUNTIME_TMPDIR=/app/outputs/system/tmp RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ + ca-certificates \ curl \ + ffmpeg \ git \ inkscape \ libgl1 \ @@ -29,13 +36,17 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && rm wkhtmltox_0.12.6.1-3.bookworm_amd64.deb \ && rm -rf /var/lib/apt/lists/* -COPY requirements-base.txt requirements-paper.txt requirements-paper-backup.txt ./ +COPY requirements-base.txt requirements-paper.txt requirements-paper-backup.txt requirements-cu12.txt ./ RUN pip install --upgrade pip && \ - (pip install -r requirements-paper.txt || pip install -r requirements-paper-backup.txt) + pip install -r requirements-paper.txt && \ + if [ "$INSTALL_CUDA" = "1" ]; then pip install -r requirements-cu12.txt; fi COPY . . +RUN pip install -e . && \ + mkdir -p /app/outputs/system/tmp /app/models /app/logs /app/data /app/database /app/raw_data_store /app/rebuttal_sessions + EXPOSE 8000 -CMD ["uvicorn", "fastapi_app.main:app", "--host", "0.0.0.0", "--port", "8000"] +CMD ["python", "-m", "uvicorn", "fastapi_app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md index 00e53443..bfc2c9a4 100644 --- a/README.md +++ b/README.md @@ -354,6 +354,27 @@ Paper2Any currently includes the following sub-capabilities: ![Python](https://img.shields.io/badge/Python-3.11+-3776AB?style=flat-square&logo=python&logoColor=white) ![pip](https://img.shields.io/badge/pip-latest-3776AB?style=flat-square&logo=pypi&logoColor=white) +### `.env` Modes + +Paper2Any now supports two configuration styles: + +- **Simple mode**: use `*.env.simple.example`. Recommended for most self-hosted users. +- **Advanced mode**: use `*.env.example`. Use this only when you need workflow-specific model/provider overrides. + +Quick choice: + +```bash +cp fastapi_app/.env.simple.example fastapi_app/.env +cp frontend-workflow/.env.simple.example frontend-workflow/.env +``` + +If you need fine-grained workflow overrides instead: + +```bash +cp fastapi_app/.env.example fastapi_app/.env +cp frontend-workflow/.env.example frontend-workflow/.env +``` +
🐳 Docker (Recommended) — Deployment & Updates @@ -363,8 +384,9 @@ git clone https://github.com/OpenDCAI/Paper2Any.git cd Paper2Any # 2. Configure environment variables -cp fastapi_app/.env.example fastapi_app/.env -cp frontend-workflow/.env.example frontend-workflow/.env +cp fastapi_app/.env.simple.example fastapi_app/.env +cp frontend-workflow/.env.simple.example frontend-workflow/.env +cp deploy/docker.env.example deploy/docker.env ``` **Required configuration:** @@ -374,12 +396,21 @@ cp frontend-workflow/.env.example frontend-workflow/.env # Internal API auth key. Must match frontend VITE_API_KEY. BACKEND_API_KEY=your-backend-api-key -# Required: Your LLM API URL (replace with your own) -DEFAULT_LLM_API_URL=https://api.openai.com/v1/ +# Recommended: let backend own all workflow model choices +APP_BILLING_MODE=free +PAPER2ANY_CONFIG_MODE=simple + +# Required: unified text entry +SIMPLE_TEXT_API_URL=https://your-text-gateway/v1 +SIMPLE_TEXT_API_KEY=your_text_key + +# Optional but recommended: unified image entry +SIMPLE_IMAGE_API_URL=https://your-image-gateway +SIMPLE_IMAGE_API_KEY=your_image_key # Optional: DrawIO OCR / VLM service -PAPER2DRAWIO_OCR_API_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 -PAPER2DRAWIO_OCR_API_KEY=your_dashscope_key +SIMPLE_OCR_API_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 +SIMPLE_OCR_API_KEY=your_dashscope_key # Optional: MinerU official remote API MINERU_API_BASE_URL=https://mineru.net/api/v4 @@ -399,52 +430,63 @@ MINERU_API_KEY=your_mineru_api_key # Must match BACKEND_API_KEY in fastapi_app/.env VITE_API_KEY=your-backend-api-key -# Required: LLM API URLs available in the UI dropdown (comma separated) -VITE_DEFAULT_LLM_API_URL=https://api.openai.com/v1 -VITE_LLM_API_URLS=https://api.openai.com/v1 +# Usually keep VITE_API_BASE_URL empty in Docker, because nginx proxies /api and /outputs +VITE_API_BASE_URL= + +# Frontend display defaults only +VITE_DEFAULT_LLM_API_URL=https://your-text-gateway/v1 +VITE_DEFAULT_LLM_MODEL=gpt-4o -# Optional: DrawIO page model candidates shown in the UI -VITE_PAPER2DRAWIO_MODEL=claude-sonnet-4-5-20250929,gpt-5.2 # Optional: Supabase (keep consistent with backend) # VITE_SUPABASE_URL=https://your-project-id.supabase.co # VITE_SUPABASE_ANON_KEY=your_supabase_anon_key ``` +`deploy/docker.env` (compose overrides): +```bash +BACKEND_PORT=8000 +FRONTEND_PORT=3000 +DOCKER_APP_WORKERS=1 + +# Optional: enable local SAM3 container by running DOCKER_WITH_SAM3=1 bash deploy/docker-up.sh +SAM3_PORT=8021 +SAM3_SERVER_URLS= +``` + ```bash # 3. Build + run -docker compose up -d --build +bash deploy/docker-up.sh ``` Open: - Frontend: http://localhost:3000 - Backend health: http://localhost:8000/health -> **GPU services note:** Docker only starts the frontend and backend. No GPU model services are included. +> **GPU services note:** Docker starts backend + frontend by default. > - Paper2PPT, Paper2Figure, Knowledge Base, etc. only need LLM APIs and work out of the box. -> - **PDF2PPT, Image2PPT, Image2Drawio** require the SAM3 segmentation service (needs GPU), deployed separately: +> - **PDF2PPT, Image2PPT, Image2Drawio** require SAM3 segmentation. +> - You can either point backend `.env` to an external SAM3 service with `SAM3_SERVER_URLS=...`, +> or start the optional local SAM3 compose profile: > ```bash -> # On a machine with GPU -> python -m dataflow_agent.toolkits.model_servers.sam3_server \ -> --port 8001 --checkpoint models/sam3/sam3.pt \ -> --bpe models/sam3/bpe_simple_vocab_16e6.txt.gz --device cuda +> DOCKER_WITH_SAM3=1 bash deploy/docker-up.sh > ``` -> Then add to `fastapi_app/.env`: `SAM3_SERVER_URLS=http://GPU_MACHINE_IP:8001` > > See the "Advanced: Local Model Server Load Balancing" section below for details. Modify & update: -- After changing code or `.env`, rebuild: `docker compose up -d --build` +- After changing code or `.env`, rebuild: `bash deploy/docker-up.sh` - Pull latest code and rebuild: - `git pull` - - `docker compose up -d --build` + - `bash deploy/docker-up.sh` Common commands: -- View logs: `docker compose logs -f` -- Stop services: `docker compose down` +- View logs: `bash deploy/docker-logs.sh` +- Stop services: `bash deploy/docker-down.sh` +- Build only: `bash deploy/docker-build.sh` Notes: - The first build may take a while (system deps + Python deps). -- Frontend env is baked at build time (compose build args). If you change it, rebuild with `docker compose up -d --build`. +- Frontend env is baked at build time. If you change `frontend-workflow/.env` or `deploy/docker.env`, rebuild with `bash deploy/docker-up.sh`. - Outputs/models are mounted to the host (`./outputs`, `./models`) for persistence.
@@ -473,23 +515,37 @@ pip install -e . #### 2. Install Paper2Any-specific Dependencies (Required) -Paper2Any involves LaTeX rendering, vector graphics processing as well as PPT/PDF conversion, which require extra dependencies: +Paper2Any involves LaTeX rendering, vector graphics processing as well as PPT/PDF conversion, which require extra dependencies. + +The dependency boundary is now: +- `requirements-base.txt`: shared cross-platform Python runtime +- `requirements-paper.txt`: paper / PDF / figure extras +- `requirements-cu12.txt`: NVIDIA CUDA 12 Linux GPU extras +- `requirements-system-ubuntu.txt`: Ubuntu/Debian system packages, not Python packages ```bash -# 1. Python dependencies -pip install -r requirements-paper.txt || pip install -r requirements-paper-backup.txt +# 1. Paper / PDF / figure Python extras +pip install -r requirements-paper.txt -# 2. LaTeX engine (tectonic) - recommended via conda +# 2. NVIDIA GPU runtime extras (Linux + CUDA 12 only) +pip install -r requirements-cu12.txt + +# 3. LaTeX engine (tectonic) - recommended via conda conda install -c conda-forge tectonic -y -# 3. Resolve doclayout_yolo dependency conflicts (Important) +# 4. Resolve doclayout_yolo dependency conflicts (Important) pip install doclayout_yolo --no-deps -# 4. System dependencies (Ubuntu example) +# 5. System dependencies (Ubuntu example; full list is mirrored in requirements-system-ubuntu.txt) sudo apt-get update -sudo apt-get install -y inkscape libreoffice poppler-utils wkhtmltopdf +sudo apt-get install -y ffmpeg inkscape libreoffice poppler-utils wkhtmltopdf ``` +> [!IMPORTANT] +> `ffmpeg`, `libreoffice/soffice`, `inkscape`, `poppler-utils`, `wkhtmltopdf`, and `tectonic` +> are external system tools. They are not installed by `pip`, and `deploy/start*.sh` +> does not auto-install them. + #### 3. Environment Variables ```bash @@ -576,16 +632,15 @@ VITE_LLM_API_URLS=https://api.apiyi.com/v1,http://b.apiyi.com:16888/v1,http://12 ```bash VITE_SUPABASE_URL=https://your-project.supabase.co VITE_SUPABASE_ANON_KEY=your-anon-key -SUPABASE_SERVICE_ROLE_KEY=your-service-role-key -SUPABASE_JWT_SECRET=your-jwt-secret ``` ##### Running Without Supabase If you skip Supabase configuration: - ✅ All core features work normally -- ✅ CLI scripts work without any configuration -- ❌ No user authentication or quotas +- ✅ CLI scripts do not require Supabase +- ❌ No user authentication +- ❌ No cloud account features such as points, redeem, invite, and history - ❌ No cloud file storage @@ -656,12 +711,15 @@ pip install -e . #### 2. Install Paper2Any-specific Dependencies (Recommended) -Paper2Any involves LaTeX rendering and vector graphics processing, which require extra dependencies (see `requirements-paper.txt`): +Paper2Any involves LaTeX rendering and vector graphics processing, which require extra dependencies: ```bash # Python dependencies pip install -r requirements-paper.txt +# NVIDIA GPU runtime extras (Linux only; skip on Windows) +# pip install -r requirements-cu12.txt + # tectonic: LaTeX engine (recommended via conda) conda install -c conda-forge tectonic -y ``` @@ -694,18 +752,8 @@ pip install vllm-0.11.0+cu124-cp312-cp312-win_amd64.whl **Paper2Any - Paper Workflow Web Frontend (Recommended)** ```bash -# Configure local backend runtime (single source of truth) -# Edit deploy/app_config.sh: -# APP_PORT=8000 -# APP_WORKERS=2 - -# Start backend API -./deploy/start.sh - -# Start frontend (new terminal) -cd frontend-workflow -npm install -npm run dev +# Recommended one-click entrypoint on NVIDIA machines +bash deploy/start_nv.sh ``` Default local addresses: @@ -713,12 +761,15 @@ Default local addresses: - Backend health: http://127.0.0.1:8000/health Useful local deploy commands: -- Start backend: `./deploy/start.sh` +- Start full stack (recommended): `bash deploy/start_nv.sh` +- Start backend only after loading a deploy profile: + `set -a && source deploy/profiles/nv.env && set +a && bash deploy/start.sh` - Stop backend: `./deploy/stop.sh` - Restart backend: `./deploy/restart.sh` Notes: -- `deploy/start.sh` and `deploy/stop.sh` both read the same runtime config from `deploy/app_config.sh`. +- `deploy/start.sh` reads `deploy/app_config.sh`, but it does not load `deploy/profiles/*.env` by itself. +- `deploy/start_nv.sh` is the safe one-click entrypoint because it loads `deploy/profiles/nv.env`, prepares local models, starts model servers, then starts backend and frontend. - If you change `APP_PORT`, update the frontend proxy target in `frontend-workflow/vite.config.ts` as well. **Configure Frontend Proxy** @@ -768,15 +819,8 @@ vllm serve opendatalab/MinerU2.5-2509-1.2B ` #### 🎨 Web Frontend (Recommended) ```bash -# Configure deploy/app_config.sh first if you want to change the local port/workers - -# Start backend API -./deploy/start.sh - -# Start frontend (new terminal) -cd frontend-workflow -npm install -npm run dev +# Recommended one-click entrypoint on NVIDIA machines +bash deploy/start_nv.sh ``` Visit `http://localhost:3000`. diff --git a/README_CN.md b/README_CN.md index 0e487787..bfebc127 100644 --- a/README_CN.md +++ b/README_CN.md @@ -345,6 +345,27 @@ Paper2Any 当前包含以下几个子能力: ![Python](https://img.shields.io/badge/Python-3.11+-3776AB?style=flat-square&logo=python&logoColor=white) ![pip](https://img.shields.io/badge/pip-latest-3776AB?style=flat-square&logo=pypi&logoColor=white) +### `.env` 配置模式 + +现在有两套配置方式: + +- **粗粒度模式**:使用 `*.env.simple.example`。推荐大多数自部署用户直接用这套。 +- **细粒度模式**:使用 `*.env.example`。只有需要逐个 workflow 覆盖模型和 provider 时再用。 + +推荐起步: + +```bash +cp fastapi_app/.env.simple.example fastapi_app/.env +cp frontend-workflow/.env.simple.example frontend-workflow/.env +``` + +如果确实需要细粒度覆盖,再改成: + +```bash +cp fastapi_app/.env.example fastapi_app/.env +cp frontend-workflow/.env.example frontend-workflow/.env +``` +
🐳 Docker 快速启动(推荐)— 部署与更新 @@ -354,8 +375,9 @@ git clone https://github.com/OpenDCAI/Paper2Any.git cd Paper2Any # 2. 配置环境变量 -cp fastapi_app/.env.example fastapi_app/.env -cp frontend-workflow/.env.example frontend-workflow/.env +cp fastapi_app/.env.simple.example fastapi_app/.env +cp frontend-workflow/.env.simple.example frontend-workflow/.env +cp deploy/docker.env.example deploy/docker.env ``` **必须修改的配置项:** @@ -365,12 +387,21 @@ cp frontend-workflow/.env.example frontend-workflow/.env # 内部接口鉴权 key,必须与前端 VITE_API_KEY 一致 BACKEND_API_KEY=your-backend-api-key -# 必填:你的 LLM API 地址(替换为你自己的) -DEFAULT_LLM_API_URL=https://api.openai.com/v1/ +# 推荐:由后端统一决定 workflow 使用的模型 +APP_BILLING_MODE=free +PAPER2ANY_CONFIG_MODE=simple + +# 必填:统一文本入口 +SIMPLE_TEXT_API_URL=https://your-text-gateway/v1 +SIMPLE_TEXT_API_KEY=your_text_key + +# 可选但推荐:统一生图入口 +SIMPLE_IMAGE_API_URL=https://your-image-gateway +SIMPLE_IMAGE_API_KEY=your_image_key # 可选:DrawIO OCR / VLM 服务 -PAPER2DRAWIO_OCR_API_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 -PAPER2DRAWIO_OCR_API_KEY=your_dashscope_key +SIMPLE_OCR_API_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 +SIMPLE_OCR_API_KEY=your_dashscope_key # 可选:MinerU 官方远端 API MINERU_API_BASE_URL=https://mineru.net/api/v4 @@ -390,52 +421,63 @@ MINERU_API_KEY=your_mineru_api_key # 必须与后端 BACKEND_API_KEY 完全一致 VITE_API_KEY=your-backend-api-key -# 必填:前端可用的 LLM API 地址(逗号分隔,显示在 UI 下拉菜单中) -VITE_DEFAULT_LLM_API_URL=https://api.openai.com/v1 -VITE_LLM_API_URLS=https://api.openai.com/v1 +# Docker 下通常保持为空,由 nginx 反代 /api 和 /outputs +VITE_API_BASE_URL= + +# 前端只负责展示默认值,不控制后端真实模型 +VITE_DEFAULT_LLM_API_URL=https://your-text-gateway/v1 +VITE_DEFAULT_LLM_MODEL=gpt-4o -# 可选:DrawIO 页面展示的模型候选列表 -VITE_PAPER2DRAWIO_MODEL=claude-sonnet-4-5-20250929,gpt-5.2 # 可选:Supabase(与后端保持一致) # VITE_SUPABASE_URL=https://your-project-id.supabase.co # VITE_SUPABASE_ANON_KEY=your_supabase_anon_key ``` +`deploy/docker.env`(compose 覆盖项): +```bash +BACKEND_PORT=8000 +FRONTEND_PORT=3000 +DOCKER_APP_WORKERS=1 + +# 可选:本地 SAM3 容器端口 +SAM3_PORT=8021 +SAM3_SERVER_URLS= +``` + ```bash # 3. 构建并启动 -docker compose up -d --build +bash deploy/docker-up.sh ``` 访问地址: - 前端:http://localhost:3000 - 后端健康检查:http://localhost:8000/health -> **GPU 服务说明:** Docker 默认启动的是前后端服务,不包含 GPU 模型服务。 +> **GPU 服务说明:** Docker 默认启动后端 + 前端。 > - Paper2PPT、Paper2Figure、知识库等功能仅依赖 LLM API,Docker 启动后即可使用。 -> - **PDF2PPT、Image2PPT、Image2Drawio** 依赖 SAM3 图像分割服务(需要 GPU),需额外部署: +> - **PDF2PPT、Image2PPT、Image2Drawio** 依赖 SAM3 图像分割。 +> - 你可以在 `fastapi_app/.env` 里配置外部 `SAM3_SERVER_URLS=...`, +> 或者直接启用 compose 里的本地 SAM3 profile: > ```bash -> # 在有 GPU 的机器上启动 SAM3 服务 -> python -m dataflow_agent.toolkits.model_servers.sam3_server \ -> --port 8001 --checkpoint models/sam3/sam3.pt \ -> --bpe models/sam3/bpe_simple_vocab_16e6.txt.gz --device cuda +> DOCKER_WITH_SAM3=1 bash deploy/docker-up.sh > ``` -> 然后在 `fastapi_app/.env` 中添加:`SAM3_SERVER_URLS=http://GPU机器IP:8001` > > 详见下方「高级配置:本地模型服务负载均衡」部分。 修改与更新: -- 代码或 `.env` 变更后重新构建:`docker compose up -d --build` +- 代码或 `.env` 变更后重新构建:`bash deploy/docker-up.sh` - 拉取最新代码并重建: - `git pull` - - `docker compose up -d --build` + - `bash deploy/docker-up.sh` 常用命令: -- 查看日志:`docker compose logs -f` -- 停止服务:`docker compose down` +- 查看日志:`bash deploy/docker-logs.sh` +- 停止服务:`bash deploy/docker-down.sh` +- 只构建:`bash deploy/docker-build.sh` 说明: - 首次构建会比较慢(系统依赖 + Python 依赖)。 -- 前端配置在构建期生效(compose build args),修改后需重新 `docker compose up -d --build`。 +- 前端配置在构建期生效,修改 `frontend-workflow/.env` 或 `deploy/docker.env` 后需重新 `bash deploy/docker-up.sh`。 - 输出和模型目录会挂载到宿主机(`./outputs`、`./models`),数据不会丢。
@@ -464,23 +506,36 @@ pip install -e . #### 2. 安装 Paper2Any 相关依赖(必须) -Paper2Any 涉及 LaTeX 渲染、矢量图处理以及 PPT/PDF 转换,需要额外依赖: +Paper2Any 涉及 LaTeX 渲染、矢量图处理以及 PPT/PDF 转换,需要额外依赖。 + +当前依赖边界建议如下: +- `requirements-base.txt`:跨平台通用 Python 运行时 +- `requirements-paper.txt`:论文 / PDF / 科研绘图相关额外 Python 包 +- `requirements-cu12.txt`:NVIDIA CUDA 12 的 Linux GPU 额外依赖 +- `requirements-system-ubuntu.txt`:Ubuntu/Debian 系统包,不是 Python 包 ```bash -# 1. Python 依赖 -pip install -r requirements-paper.txt || pip install -r requirements-paper-backup.txt +# 1. 论文 / PDF / 科研绘图额外 Python 依赖 +pip install -r requirements-paper.txt -# 2. LaTeX 引擎 (tectonic) - 推荐用 conda 安装 +# 2. NVIDIA GPU 运行时额外依赖(仅 Linux + CUDA 12) +pip install -r requirements-cu12.txt + +# 3. LaTeX 引擎 (tectonic) - 推荐用 conda 安装 conda install -c conda-forge tectonic -y -# 3. 解决 doclayout_yolo 依赖冲突(重要) +# 4. 解决 doclayout_yolo 依赖冲突(重要) pip install doclayout_yolo --no-deps -# 4. 系统依赖 (Ubuntu 示例) +# 5. 系统依赖 (Ubuntu 示例;完整列表见 requirements-system-ubuntu.txt) sudo apt-get update -sudo apt-get install -y inkscape libreoffice poppler-utils wkhtmltopdf +sudo apt-get install -y ffmpeg inkscape libreoffice poppler-utils wkhtmltopdf ``` +> [!IMPORTANT] +> `ffmpeg`、`libreoffice/soffice`、`inkscape`、`poppler-utils`、`wkhtmltopdf`、`tectonic` +> 这些都是系统工具,不是 `pip` 包;`deploy/start*.sh` 也不会自动安装它们。 + #### 3. 配置环境变量 ```bash @@ -564,16 +619,15 @@ VITE_LLM_API_URLS=https://api.apiyi.com/v1,http://b.apiyi.com:16888/v1,http://12 ```bash VITE_SUPABASE_URL=https://your-project.supabase.co VITE_SUPABASE_ANON_KEY=your-anon-key -SUPABASE_SERVICE_ROLE_KEY=your-service-role-key -SUPABASE_JWT_SECRET=your-jwt-secret ``` ##### 不配置 Supabase 的情况 如果跳过 Supabase 配置: - ✅ 所有核心功能正常工作 -- ✅ CLI 脚本无需任何配置即可使用 -- ❌ 无用户认证或配额限制 +- ✅ CLI 脚本不依赖 Supabase +- ❌ 无用户认证 +- ❌ 无账户积分、兑换码、邀请码、历史文件等账号能力 - ❌ 无云文件存储 @@ -644,12 +698,15 @@ pip install -e . #### 2. 安装 Paper2Any 相关依赖(推荐) -Paper2Any 涉及 LaTeX 渲染与矢量图处理,需要额外依赖(见 requirements-paper.txt): +Paper2Any 涉及 LaTeX 渲染与矢量图处理,需要额外依赖: ```bash # Python 依赖 pip install -r requirements-paper.txt +# NVIDIA GPU 运行时额外依赖(仅 Linux,需要时再装) +# pip install -r requirements-cu12.txt + # tectonic:LaTeX 引擎(推荐用 conda 安装) conda install -c conda-forge tectonic -y ``` @@ -682,18 +739,8 @@ pip install vllm-0.11.0+cu124-cp312-cp312-win_amd64.whl **Paper2Any - 论文工作流 Web 前端(推荐)** ```bash -# 本地后端运行配置统一在 deploy/app_config.sh 中维护 -# 可在该文件中修改: -# APP_PORT=8000 -# APP_WORKERS=2 - -# 启动后端 API -./deploy/start.sh - -# 启动前端(新终端) -cd frontend-workflow -npm install -npm run dev +# NVIDIA 机器推荐直接使用一键入口 +bash deploy/start_nv.sh ``` 本地默认访问地址: @@ -701,12 +748,15 @@ npm run dev - 后端健康检查:http://127.0.0.1:8000/health 本地部署常用命令: -- 启动后端:`./deploy/start.sh` +- 推荐启动整套:`bash deploy/start_nv.sh` +- 仅启动后端(需先加载 profile): + `set -a && source deploy/profiles/nv.env && set +a && bash deploy/start.sh` - 停止后端:`./deploy/stop.sh` - 重启后端:`./deploy/restart.sh` 说明: -- `deploy/start.sh` 和 `deploy/stop.sh` 都会读取同一个 `deploy/app_config.sh`,端口不再分别写死。 +- `deploy/start.sh` 会读取 `deploy/app_config.sh`,但不会自动加载 `deploy/profiles/*.env`。 +- `deploy/start_nv.sh` 才是当前最稳妥的一键入口:它会加载 `deploy/profiles/nv.env`、准备本地模型、启动模型服务,再启动后端和前端。 - 如果修改了 `APP_PORT`,也要同步更新 `frontend-workflow/vite.config.ts` 里的前端代理地址。 **配置前端代理** @@ -755,15 +805,8 @@ vllm serve opendatalab/MinerU2.5-2509-1.2B ` #### 🎨 Web 前端(推荐) ```bash -# 如需修改本地端口或 worker 数,请先编辑 deploy/app_config.sh - -# 启动后端 API -./deploy/start.sh - -# 启动前端(新终端) -cd frontend-workflow -npm install -npm run dev +# NVIDIA 机器推荐直接使用一键入口 +bash deploy/start_nv.sh ``` 访问 `http://localhost:3000`。 diff --git a/dataflow_agent/agentroles/paper2any_agents/content_expander_agent.py b/dataflow_agent/agentroles/paper2any_agents/content_expander_agent.py index 45c4c4c4..708740e6 100644 --- a/dataflow_agent/agentroles/paper2any_agents/content_expander_agent.py +++ b/dataflow_agent/agentroles/paper2any_agents/content_expander_agent.py @@ -51,15 +51,21 @@ def get_task_prompt_params(self, pre_tool_results: Dict[str, Any]) -> Dict[str, - text_content: 待扩写的文本 - expansion_round: 当前扩写轮次 """ + language = "zh" + request = getattr(self.state, "request", None) + if request is not None: + language = str(getattr(request, "language", None) or language).strip() or language return { "text_content": self.state.text_content, - "expansion_round": 0, + "expansion_round": int(getattr(self.state, "expansion_round", 0) or 0), + "language": language, } def get_default_pre_tool_results(self) -> Dict[str, Any]: return { "text_content": "", - "expansion_round": 0 + "expansion_round": 0, + "language": "zh", } # ---------- 结果写回 ---------- @@ -72,7 +78,16 @@ def update_state_result( """ 将扩写后的文本(字符串)写回 State。 """ - state.text_content = result + if isinstance(result, dict): + text_value = result.get("text") + if isinstance(text_value, str): + state.text_content = text_value + else: + state.text_content = str(text_value or "") + elif isinstance(result, str): + state.text_content = result + else: + state.text_content = str(result or "") super().update_state_result(state, result, pre_tool_results) diff --git a/dataflow_agent/agentroles/paper2any_agents/long_paper_outline_agent.py b/dataflow_agent/agentroles/paper2any_agents/long_paper_outline_agent.py index 4ac0b83c..cc7ef880 100644 --- a/dataflow_agent/agentroles/paper2any_agents/long_paper_outline_agent.py +++ b/dataflow_agent/agentroles/paper2any_agents/long_paper_outline_agent.py @@ -37,7 +37,7 @@ def role_name(self) -> str: @property def system_prompt_template_name(self) -> str: - return "system_prompt_for_outline_agent" + return "system_prompt_for_long_paper_outline_agent" @property def task_prompt_template_name(self) -> str: @@ -87,9 +87,16 @@ def update_state_result( 注意:在 Workflow 的 generate_outline_for_batch 中, 会从返回的 State 中读取 pagecontent。 """ - # 结果预期是一个 List[Dict] (页面列表) + if not isinstance(result, list): + log.warning("[long_paper_outline_agent] Invalid result, discard invalid payload and mark pagecontent empty.") + state.pagecontent = [] + setattr(state, "outline_generation_error", "long_paper_outline_agent did not return a valid JSON array") + super().update_state_result(state, [], pre_tool_results) + return + state.pagecontent = result - log.info(f"[long_paper_outline_agent] 生成了 {len(result) if isinstance(result, list) else 0} 页内容") + setattr(state, "outline_generation_error", "") + log.info(f"[long_paper_outline_agent] 生成了 {len(result)} 页内容") super().update_state_result(state, result, pre_tool_results) diff --git a/dataflow_agent/agentroles/paper2any_agents/outline_agent.py b/dataflow_agent/agentroles/paper2any_agents/outline_agent.py index 5068f0bc..7b152f50 100644 --- a/dataflow_agent/agentroles/paper2any_agents/outline_agent.py +++ b/dataflow_agent/agentroles/paper2any_agents/outline_agent.py @@ -40,13 +40,11 @@ def role_name(self) -> str: # noqa: D401 @property def system_prompt_template_name(self) -> str: - # TODO: 修改为真实的模板 id - return "system_prompt_for_outline_agent" + return "system_prompt_for_paper2ppt_outline_agent" @property def task_prompt_template_name(self) -> str: - # TODO: 修改为真实的模板 id - return "task_prompt_for_outline_agent" + return "task_prompt_for_paper2ppt_outline_agent" # ---------- Prompt 参数 ---------- def get_task_prompt_params(self, pre_tool_results: Dict[str, Any]) -> Dict[str, Any]: @@ -78,8 +76,15 @@ def update_state_result( pre_tool_results: Dict[str, Any], ): """将推理结果写回 MainState,可按需重写""" + if not isinstance(result, list): + log.warning("[outline_agent] Invalid result, discard invalid payload and mark pagecontent empty.") + state.pagecontent = [] + setattr(state, "outline_generation_error", "outline_agent did not return a valid JSON array") + super().update_state_result(state, [], pre_tool_results) + return state.pagecontent = result + setattr(state, "outline_generation_error", "") log.info(f"[outline_agent]: outline_agent 生成了 {len(result)} 页内容") super().update_state_result(state, result, pre_tool_results) @@ -167,4 +172,4 @@ def create_outline_agent( use_vlm=use_vlm, vlm_config=vlm_config, **kwargs, - ) \ No newline at end of file + ) diff --git a/dataflow_agent/agentroles/paper2any_agents/outline_refine_agent.py b/dataflow_agent/agentroles/paper2any_agents/outline_refine_agent.py index 02170686..cd629d18 100644 --- a/dataflow_agent/agentroles/paper2any_agents/outline_refine_agent.py +++ b/dataflow_agent/agentroles/paper2any_agents/outline_refine_agent.py @@ -31,11 +31,11 @@ def role_name(self) -> str: # noqa: D401 @property def system_prompt_template_name(self) -> str: - return "system_prompt_for_outline_refine_agent" + return "system_prompt_for_paper2ppt_outline_refine_agent" @property def task_prompt_template_name(self) -> str: - return "task_prompt_for_outline_refine_agent" + return "task_prompt_for_paper2ppt_outline_refine_agent" def get_task_prompt_params(self, pre_tool_results: Dict[str, Any]) -> Dict[str, Any]: return { diff --git a/dataflow_agent/agentroles/paper2any_agents/websearch_curator.py b/dataflow_agent/agentroles/paper2any_agents/websearch_curator.py index 2e0ff7e8..b8211915 100644 --- a/dataflow_agent/agentroles/paper2any_agents/websearch_curator.py +++ b/dataflow_agent/agentroles/paper2any_agents/websearch_curator.py @@ -672,4 +672,4 @@ def create_websearch_curator_agent( """ 便捷创建函数。 """ - return WebsearchChiefCuratorAgent.create(tool_manager=tool_manager, **kwargs) \ No newline at end of file + return WebsearchChiefCuratorAgent.create(tool_manager=tool_manager, **kwargs) diff --git a/dataflow_agent/agentroles/paper2any_agents/websearch_initial_analyzer.py b/dataflow_agent/agentroles/paper2any_agents/websearch_initial_analyzer.py index 0c164650..0b4f645e 100644 --- a/dataflow_agent/agentroles/paper2any_agents/websearch_initial_analyzer.py +++ b/dataflow_agent/agentroles/paper2any_agents/websearch_initial_analyzer.py @@ -553,5 +553,3 @@ def create_websearch_initial_analyzer_agent( - - diff --git a/dataflow_agent/agentroles/paper2any_agents/websearch_planner.py b/dataflow_agent/agentroles/paper2any_agents/websearch_planner.py index 7a858620..70e811ba 100644 --- a/dataflow_agent/agentroles/paper2any_agents/websearch_planner.py +++ b/dataflow_agent/agentroles/paper2any_agents/websearch_planner.py @@ -218,5 +218,3 @@ def create_websearch_planner_agent( 便捷创建函数。 """ return WebsearchPlannerAgent.create(tool_manager=tool_manager, **kwargs) - - diff --git a/dataflow_agent/agentroles/paper2any_agents/websearch_researcher.py b/dataflow_agent/agentroles/paper2any_agents/websearch_researcher.py index 5c17dba9..e6e4fbf7 100644 --- a/dataflow_agent/agentroles/paper2any_agents/websearch_researcher.py +++ b/dataflow_agent/agentroles/paper2any_agents/websearch_researcher.py @@ -877,4 +877,4 @@ async def run(self, state: MainState, **kwargs) -> Dict[str, Any]: return result_payload def create_websearch_researcher_agent(tool_manager: Optional[ToolManager] = None, **kwargs) -> WebsearchResearcherAgent: - return WebsearchResearcherAgent.create(tool_manager=tool_manager, **kwargs) \ No newline at end of file + return WebsearchResearcherAgent.create(tool_manager=tool_manager, **kwargs) diff --git a/dataflow_agent/promptstemplates/resources/pt_long_paper_repo.py b/dataflow_agent/promptstemplates/resources/pt_long_paper_repo.py index 147451a8..aa844e55 100644 --- a/dataflow_agent/promptstemplates/resources/pt_long_paper_repo.py +++ b/dataflow_agent/promptstemplates/resources/pt_long_paper_repo.py @@ -12,7 +12,7 @@ class LongPaperOutlineAgent: """ # 系统提示词:与普通 outline_agent 共享或专用 - system_prompt_for_outline_agent = """ + system_prompt_for_long_paper_outline_agent = """ 你是一位拥有丰富学术汇报经验的PPT设计专家及大纲生成助手。你的核心任务是将一篇学术论文(或长文档的一部分)转化为一份逻辑清晰、视觉布局合理的PPT演示大纲。 请遵循以下严格规则: @@ -20,6 +20,8 @@ class LongPaperOutlineAgent: 2. **视觉导向**:在规划每一页PPT时,不仅要生成文字内容,必须明确指出该页是否需要展示特定的插图(Images)或表格(Tables)。 3. **布局建议**:为每一页提供具体的布局指导(例如:左文右图、上标题下表格、两栏对比等)。 4. **格式严格**:输出必须且只能是标准的 JSON 格式数组。严禁包含 markdown 标记(如 ```json)、前言、后语或任何非 JSON 字符。 +5. **语言绝对一致**:如果 `language=en`,则 `title`、`layout_description`、`key_points` 中禁止出现中文;如果 `language=zh`,则这些字段必须全部使用中文。严禁中英混用。 +6. **key_points 只能是字符串数组**:`key_points` 中每个元素必须是纯字符串,绝对不能输出对象、嵌套数组或带 `text/value/content` 字段的结构。 """ # 1. 首页 Prompt (Is First Batch) @@ -38,6 +40,9 @@ class LongPaperOutlineAgent: 2. 后续页面开始进入正文介绍(如背景、引言、核心问题等)。 3.输出内容的语言为 **{language}**。 4. 不需要致谢页(除非文本很短,这是唯一一批)。 +5. **必须严格返回恰好 {pages_to_generate} 个 JSON 数组元素,不能少也不能多。** +6. `key_points` 必须是 `List`,每个元素都是一句简洁要点,不允许对象。 +7. 如果 `{language}` 为 `en`,输出中不得包含中文字符。 **输出格式要求(JSON Array):** 请返回一个 JSON 数组,数组中每个对象代表一页PPT,结构如下: @@ -78,6 +83,9 @@ class LongPaperOutlineAgent: 2. 承接上一批次的内容,继续展开当前的章节。 3. 如果文本包含新的章节标题,请作为新的一页或新章节的开始。 4. 输出内容的语言为 **{language}**。 +5. **必须严格返回恰好 {pages_to_generate} 个 JSON 数组元素,不能少也不能多。** +6. `key_points` 必须是 `List`,每个元素都是一句简洁要点,不允许对象。 +7. 如果 `{language}` 为 `en`,输出中不得包含中文字符。 **输出格式要求(JSON Array):** JSON 数组,每个对象代表一页PPT。 @@ -115,6 +123,9 @@ class LongPaperOutlineAgent: 1. 生成剩余的正文内容(结论、未来展望等)。 2. **最后一页必须是致谢(Thank You)**:简短的结束语。 3.输出内容的语言为 **{language}**。 +4. **必须严格返回恰好 {pages_to_generate} 个 JSON 数组元素,不能少也不能多。** +5. `key_points` 必须是 `List`,每个元素都是一句简洁要点,不允许对象。 +6. 如果 `{language}` 为 `en`,输出中不得包含中文字符。 **输出格式要求(JSON Array):** JSON 数组,每个对象代表一页PPT。 @@ -152,6 +163,7 @@ class ContentExpander: system_prompt_for_content_expander = """ 你是一个专业的学术写作助手和内容扩写专家。你的任务是将输入的简短文本或草稿,扩写成篇幅更长、细节更丰富、逻辑更严密的文章或报告。 你的扩写应保持专业性,增加必要的背景介绍、详细的解释、具体的例子或论证,以满足生成长篇 PPT 的内容需求。 +请严格遵守目标输出语言要求:如果 `language=en`,整个输出必须完全使用英文;如果 `language=zh`,整个输出必须完全使用中文。严禁中英混写。 """ task_prompt_for_content_expander = """ @@ -165,8 +177,9 @@ class ContentExpander: 1. **大幅增加篇幅**:在保持原意的前提下,通过增加细节、举例、背景分析、优缺点对比等方式,显著增加字数。 2. **结构完整**:如果输入是片段,请将其补全为完整的章节;如果输入是提纲,请将其展开为全文。 3. **保持连贯**:确保扩写后的内容逻辑通顺,段落过渡自然。 -4. **输出限制**:直接输出扩写后的完整文本,不要包含任何类似于“好的,这是扩写后的内容”的废话。不要使用 Markdown 代码块包裹。 -5. 如果需要表格,必须输出md表格内容,Table_1, xxx +4. **输出语言**:本轮扩写后的全文必须严格使用 **{language}**。如果 `{language}` 为 `en`,输出中不得包含中文字符;如果 `{language}` 为 `zh`,输出必须全部使用中文。 +5. **输出限制**:直接输出扩写后的完整文本,不要包含任何类似于“好的,这是扩写后的内容”的废话。不要使用 Markdown 代码块包裹。 +6. 如果需要表格,必须输出md表格内容,Table_1, xxx 请开始扩写: """ diff --git a/dataflow_agent/promptstemplates/resources/pt_paper2ppt_outline_repo.py b/dataflow_agent/promptstemplates/resources/pt_paper2ppt_outline_repo.py new file mode 100644 index 00000000..9cc82be5 --- /dev/null +++ b/dataflow_agent/promptstemplates/resources/pt_paper2ppt_outline_repo.py @@ -0,0 +1,190 @@ +""" +Prompt templates dedicated to paper2ppt outline generation/refinement. +""" + + +class Paper2PPTOutline: + system_prompt_for_paper2ppt_outline_agent = """ +你是一位拥有丰富学术汇报经验的 PPT 设计专家及大纲生成助手。你的核心任务是将一篇学术论文或一段研究正文转化为逻辑清晰、视觉布局合理的 PPT 演示大纲。 + +请遵循以下严格规则: +1. 深度理解:仔细阅读输入内容,提取核心论点、方法、实验结果和结论。 +2. 视觉导向:在规划每一页 PPT 时,明确指出该页适合的布局,并仅在确有必要时引用一个原图或表格。 +3. 格式严格:输出必须且只能是标准 JSON 数组。严禁包含 markdown 标记、前言、后语或任何非 JSON 字符。 +4. 语言绝对一致:如果 `language=en`,则 `title`、`layout_description`、`key_points` 中禁止出现中文;如果 `language=zh`,则这些字段必须全部使用中文。严禁中英混用。 +5. key_points 只能是字符串数组:`key_points` 中每个元素必须是纯字符串,绝对不能输出对象、嵌套数组或带 `text/value/content` 字段的结构。 +6. 页面粒度:每个数组元素必须只对应一页 PPT,不能把整篇论文原文直接塞进单页。 +7. 要点长度:每个 `key_points` 元素必须是面向 PPT 的短句;不要输出大段原文摘抄。 +""" + + task_prompt_for_paper2ppt_outline_agent = """ +请根据以下提供的论文全文内容,生成一份详细的 PPT 演示文稿大纲。 + +输入论文内容: +{text_content} +{minueru_output} + +约束条件: +1. 目标 PPT 页数:{page_count} 页。 +2. 第一页必须是封面,只保留主题和汇报人,不要额外正文。 +3. 最后一页必须是致谢 / Thank You。 +4. 输出语言必须严格使用 {language}。 +5. 每一页只能给出该页需要的摘要和要点,禁止把长段论文原文复制进单页。 +6. `key_points` 必须是 `List`,每个元素都是一句简洁要点。 + +输出格式要求(JSON Array): +[ + {{ + "title": "Slide title", + "layout_description": "具体版式说明", + "key_points": ["要点1", "要点2"], + "asset_ref": null + }} +] +""" + + system_prompt_for_paper2ppt_outline_refine_agent = """ +你是一位拥有丰富学术汇报经验的 PPT 设计专家及大纲编辑助手。你的核心任务是:在不改变页数与顺序的前提下,基于用户反馈与论文内容,对已有 PPT 大纲进行更精准、更完善的改写与补充。 + +请遵循以下严格规则: +1. 仅允许修改每页内容字段:`title` / `layout_description` / `key_points`。 +2. 默认保留 `asset_ref`,除非用户反馈明确要求修改。 +3. 禁止编造论文中不存在的具体事实、数值、指标或结论。 +4. 输出必须且只能是标准 JSON 数组。 +5. `key_points` 必须保持为纯字符串数组,且每个元素为适合 PPT 的简洁短句。 +""" + + task_prompt_for_paper2ppt_outline_refine_agent = """ +请根据以下提供的论文内容、当前大纲以及用户反馈,对大纲进行“只改内容”的修订与完善。 + +论文内容: +{text_content} +{minueru_output} + +当前大纲(JSON Array): +{pagecontent} + +用户反馈: +{outline_feedback} + +约束: +1. 页数必须保持不变,总页数仍为 {page_count}。 +2. 输出语言必须严格使用 {language}。 +3. 只返回合法 JSON 数组,不要返回任何解释性文字。 +""" + + system_prompt_for_paper2ppt_outline_edit_planner_agent = """ +你是一位负责“编辑计划”的 PPT 大纲调度助手。你的任务不是直接重写整份大纲,而是把用户的自然语言修改意见转换成结构化编辑计划。 + +请遵循以下规则: +1. 你只能输出一个 JSON Object,不能输出解释文字。 +2. 页面编号一律基于当前大纲的原始页号(从 1 开始)。 +3. 如果用户只是要求“整体润色、整体学术化、整体精简、统一风格”,请设置 `apply_global_rewrite=true`,不要随意删除页面。 +4. 只有在用户明确要求“新增、删除、重排、拆分、合并”时,才使用 `insert_after` / `delete` / `move`。 +5. `operations` 只允许以下类型: + - `update`: 修改现有某几页内容 + - `delete`: 删除某几页 + - `insert_after`: 在某页后新增若干页 + - `move`: 将某几页移动到另一页之后 +6. `global_instruction` 用一句话概括这次整体修改目标;没有明确全局目标时,复述用户反馈即可。 +7. 不要发明不存在的页号。 + +输出 JSON 结构: +{ + "global_instruction": "一句话概括整体修改意图", + "apply_global_rewrite": true, + "operations": [ + { + "type": "update", + "page_numbers": [2, 3], + "instruction": "把 related work 更精简,突出 gap" + }, + { + "type": "insert_after", + "page_number": 12, + "count": 2, + "instruction": "补两页实验结果页,分别讲主结果和消融实验" + }, + { + "type": "delete", + "page_numbers": [20] + }, + { + "type": "move", + "page_numbers": [5, 6], + "after_page_number": 9 + } + ] +} +""" + + task_prompt_for_paper2ppt_outline_edit_planner_agent = """ +请根据当前大纲摘要和用户反馈,产出一个结构化编辑计划。 + +当前大纲页数:{page_count} +输出语言:{language} + +当前大纲摘要: +{outline_digest} + +相关原文摘录(仅供校准主题,不要求逐句复用): +{source_excerpt} + +用户反馈: +{outline_feedback} + +要求: +1. 默认尽量保留原有页数和结构,除非用户明确要求增删或重排。 +2. 如果反馈是整体性的,请把 `apply_global_rewrite` 设为 `true`。 +3. 如果反馈只针对局部页,请优先使用 `update`。 +4. 只返回 JSON Object。 +""" + + system_prompt_for_paper2ppt_outline_patch_rewriter_agent = """ +你是一位局部大纲修订助手。你的任务是只重写当前给定的小批量页面,而不是整份 PPT。 + +请遵循以下规则: +1. 输出必须是合法 JSON 数组,数组长度必须与输入页数完全一致。 +2. 输出顺序必须与输入顺序一一对应,禁止丢页、并页或增页。 +3. 每页只允许修改 `title`、`layout_description`、`key_points`,默认保留 `asset_ref`。 +4. `key_points` 必须是纯字符串数组,且每个元素是适合 PPT 的短句。 +5. 输出语言必须严格使用 {language}。 +6. 如果某页没有明确局部修改要求,就在遵守整体修改目标的前提下只做适度润色,不要重写整页主题。 +7. 禁止把长段论文原文直接贴进单页。 +""" + + task_prompt_for_paper2ppt_outline_patch_rewriter_agent = """ +请只修订下面这个局部页面块。不要重写整个大纲。 + +当前处理页范围:第 {chunk_start} 页到第 {chunk_end} 页 +该块页数:{page_count} +输出语言:{language} + +整体修改目标: +{global_instruction} + +用户原始反馈: +{outline_feedback} + +当前块的逐页特殊指令: +{page_specific_instructions} + +相邻页面标题: +- Previous: {previous_title} +- Next: {next_title} + +相关原文摘录(仅供事实校准,不要求逐句复用): +{source_excerpt} + +当前页面块(JSON Array): +{pagecontent} + +输出要求: +1. 返回一个合法 JSON 数组,长度必须与输入完全一致。 +2. 每个元素都包含: + - `title` + - `layout_description` + - `key_points` + - `asset_ref` +3. 不要输出任何解释性文字。 +""" diff --git a/dataflow_agent/toolkits/multimodaltool/providers.py b/dataflow_agent/toolkits/multimodaltool/providers.py index 73a619c7..aa253f3a 100644 --- a/dataflow_agent/toolkits/multimodaltool/providers.py +++ b/dataflow_agent/toolkits/multimodaltool/providers.py @@ -303,13 +303,141 @@ def parse_generation_response(self, data: Dict[str, Any]) -> str: raise RuntimeError("candidates is empty") content = candidates[0].get("content", {}) parts = content.get("parts", []) - inline_data = parts[0].get("inlineData", {}) - return inline_data.get("data") + if not parts: + raise RuntimeError("parts is empty") + for part in parts: + inline_data = part.get("inlineData", {}) + b64 = inline_data.get("data") + if b64: + return b64 + raise RuntimeError("inlineData.data is empty") except Exception as e: log.error(f"Failed to parse APIYI Gemini response: {e}") log.error(f"Response preview: {str(data)[:500]}") raise + +class IkunCodeGeminiProvider(AIProviderStrategy): + """ + IKunCode 上的 Gemini 图像生成接口。 + 与 APIYI 同为 Google Native 风格,但字段命名遵循 IKunCode 文档: + - image_size + - inlineData / mimeType + """ + + def match(self, api_url: str, model: str) -> bool: + return ( + detect_provider(api_url) is Provider.IKUNCODE + and (is_gemini_3_pro(model) or is_gemini_31_flash(model)) + ) + + def _get_base_url(self, api_url: str) -> str: + base = api_url.rstrip("/") + if base.endswith("/v1beta"): + return base + if base.endswith("/v1"): + return f"{base[:-3]}/v1beta" + return f"{base}/v1beta" + + def _image_config(self, aspect_ratio: str, resolution: str) -> Dict[str, Any]: + return { + "aspectRatio": aspect_ratio, + "image_size": resolution, + } + + def build_generation_request(self, api_url: str, model: str, prompt: str, **kwargs) -> Tuple[str, Dict[str, Any], bool]: + base = self._get_base_url(api_url) + aspect_ratio = kwargs.get("aspect_ratio", "16:9") + resolution = kwargs.get("resolution", "2K") + url = f"{base}/models/{model}:generateContent" + payload = { + "contents": [{"parts": [{"text": prompt}]}], + "generationConfig": { + "responseModalities": ["IMAGE"], + "imageConfig": self._image_config(aspect_ratio, resolution), + }, + } + return url, payload, False + + def build_edit_request(self, api_url: str, model: str, prompt: str, image_b64: str, **kwargs) -> Tuple[str, Dict[str, Any], bool]: + base = self._get_base_url(api_url) + aspect_ratio = kwargs.get("aspect_ratio", "16:9") + resolution = kwargs.get("resolution", "2K") + fmt = kwargs.get("image_fmt", "png") + url = f"{base}/models/{model}:generateContent" + payload = { + "contents": [ + { + "parts": [ + { + "inlineData": { + "mimeType": f"image/{fmt}", + "data": image_b64, + } + }, + {"text": prompt}, + ] + } + ], + "generationConfig": { + "responseModalities": ["IMAGE"], + "imageConfig": self._image_config(aspect_ratio, resolution), + }, + } + return url, payload, False + + def build_multi_image_edit_request( + self, + api_url: str, + model: str, + prompt: str, + image_b64_list: List[Tuple[str, str]], + **kwargs + ) -> Tuple[str, Dict[str, Any], bool]: + base = self._get_base_url(api_url) + aspect_ratio = kwargs.get("aspect_ratio", "16:9") + resolution = kwargs.get("resolution", "2K") + parts: List[Dict[str, Any]] = [] + for b64, fmt in image_b64_list: + parts.append( + { + "inlineData": { + "mimeType": f"image/{fmt}", + "data": b64, + } + } + ) + parts.append({"text": prompt}) + url = f"{base}/models/{model}:generateContent" + payload = { + "contents": [{"parts": parts}], + "generationConfig": { + "responseModalities": ["IMAGE"], + "imageConfig": self._image_config(aspect_ratio, resolution), + }, + } + return url, payload, False + + def parse_generation_response(self, data: Dict[str, Any]) -> str: + try: + candidates = data.get("candidates", []) + if not candidates: + raise RuntimeError("candidates is empty") + content = candidates[0].get("content", {}) + parts = content.get("parts", []) + if not parts: + raise RuntimeError("parts is empty") + for part in parts: + inline_data = part.get("inlineData", {}) + b64 = inline_data.get("data") + if b64: + return b64 + raise RuntimeError("inlineData.data is empty") + except Exception as e: + log.error(f"Failed to parse IKunCode Gemini response: {e}") + log.error(f"Response preview: {str(data)[:500]}") + raise + # Gemini TTS 无 speakingRate 参数,通过文本前加 Pacing 指令控制语速 # steady:不论长短都保持稳定、自然的语速(避免短句偏慢、fast 偏快) _TTS_PACE_PREFIX = { @@ -1226,6 +1354,7 @@ def parse_generation_response(self, data: Dict[str, Any]) -> str: # 注册顺序 STRATEGIES = [ + IkunCodeGeminiProvider(), ApiYiGeminiProvider(), ApiYiSeeDreamProvider(), ApiYiGPTImageProvider(), diff --git a/dataflow_agent/toolkits/multimodaltool/req_img.py b/dataflow_agent/toolkits/multimodaltool/req_img.py index e7415708..5b209e85 100644 --- a/dataflow_agent/toolkits/multimodaltool/req_img.py +++ b/dataflow_agent/toolkits/multimodaltool/req_img.py @@ -111,6 +111,8 @@ async def _post_raw( for part in content.get("parts", []): if "inline_data" in part: part["inline_data"]["data"] = " ...[base64]... " + if "inlineData" in part: + part["inlineData"]["data"] = " ...[base64]... " log.info(f"Payload Preview: {json.dumps(debug_payload, ensure_ascii=False)}") except Exception: diff --git a/dataflow_agent/toolkits/multimodaltool/utils.py b/dataflow_agent/toolkits/multimodaltool/utils.py index 51f9923a..f49fc1da 100644 --- a/dataflow_agent/toolkits/multimodaltool/utils.py +++ b/dataflow_agent/toolkits/multimodaltool/utils.py @@ -11,6 +11,7 @@ class Provider(str, Enum): APIYI = "apiyi" + IKUNCODE = "ikuncode" LOCAL_123 = "local_123" OTHER = "other" @@ -20,6 +21,8 @@ def detect_provider(api_url: str) -> Provider: """ 根据 api_url 粗略识别服务商 """ + if "ikuncode" in api_url: + return Provider.IKUNCODE if "apiyi" in api_url: return Provider.APIYI if "123.129.219.111" in api_url: diff --git a/dataflow_agent/toolkits/postertool/src/agents/parser.py b/dataflow_agent/toolkits/postertool/src/agents/parser.py index 7ff9a955..a02df88a 100644 --- a/dataflow_agent/toolkits/postertool/src/agents/parser.py +++ b/dataflow_agent/toolkits/postertool/src/agents/parser.py @@ -19,8 +19,10 @@ from dataflow_agent.toolkits.multimodaltool.mineru_tool import ( _extract_block_text, _normalize_mineru_blocks, + _should_use_remote_mineru, crop_mineru_blocks_with_meta, run_aio_batch_two_step_extract, + run_mineru_pdf_extract_http, ) from src.config.poster_config import load_config from src.state.poster_state import PosterState @@ -146,6 +148,25 @@ def _extract_raw_text_with_mineru( pdf_path: str, content_dir: Path, ) -> Tuple[str, Dict[str, Any]]: + if _should_use_remote_mineru(): + log_agent_info(self.name, "using remote MinerU API for poster text extraction") + markdown_text, auto_dir = asyncio.run( + run_mineru_pdf_extract_http( + pdf_path=pdf_path, + output_dir=str(content_dir), + port=self.mineru_port, + dpi=self.render_dpi, + ) + ) + text = self.clean_pattern.sub("", markdown_text).strip() + if not text: + raise ValueError("MinerU remote API returned no usable text") + log_agent_info( + self.name, + f"extracted {len(text)} chars via remote MinerU into {auto_dir}", + ) + return text, {} + pages_dir = content_dir / "mineru_pages" pages_dir.mkdir(parents=True, exist_ok=True) diff --git a/dataflow_agent/toolkits/postertool/src/agents/renderer.py b/dataflow_agent/toolkits/postertool/src/agents/renderer.py index 3259809c..5f019f04 100644 --- a/dataflow_agent/toolkits/postertool/src/agents/renderer.py +++ b/dataflow_agent/toolkits/postertool/src/agents/renderer.py @@ -424,6 +424,36 @@ def _parse_and_add_runs(self, paragraph, text: str, font_family: str, if segment['italic']: run.font.italic = True + + def _append_format_segment( + self, + segments: list, + text: str, + *, + bold: bool = False, + italic: bool = False, + color: Optional[str] = None, + ) -> None: + """Append a formatting segment while coalescing adjacent identical styles.""" + if not text: + return + + if segments: + last = segments[-1] + if ( + last["bold"] == bold + and last["italic"] == italic + and last["color"] == color + ): + last["text"] += text + return + + segments.append({ + "text": text, + "bold": bold, + "italic": italic, + "color": color, + }) def _tokenize_formatting(self, text: str) -> list: """tokenize text into formatting segments with precise position tracking""" @@ -452,73 +482,80 @@ def _tokenize_formatting(self, text: str) -> list: # process colored text with automatic bold if colored_text.strip(): # only process non-empty content - segments.append({ - 'text': colored_text, - 'bold': True, # all colored text is bold - 'italic': False, - 'color': color_hex - }) + self._append_format_segment( + segments, + colored_text, + bold=True, # all colored text is bold + italic=False, + color=color_hex, + ) # move past the entire color block i = closing_tag_end continue else: # malformed color tag, treat as regular text - segments.append({ - 'text': text[i], - 'bold': False, - 'italic': False, - 'color': None - }) + self._append_format_segment(segments, text[i]) i += 1 continue + + # check for bold italic: ***text*** + bold_italic_match = re.match(r'\*\*\*(.+?)\*\*\*', text[i:]) + if bold_italic_match: + self._append_format_segment( + segments, + bold_italic_match.group(1), + bold=True, + italic=True, + ) + i += bold_italic_match.end() + continue # check for bold: **text** - bold_match = re.match(r'\*\*(.*?)\*\*', text[i:]) + bold_match = re.match(r'\*\*(.+?)\*\*', text[i:]) if bold_match: - bold_text = bold_match.group(1) - segments.append({ - 'text': bold_text, - 'bold': True, - 'italic': False, - 'color': None - }) + self._append_format_segment( + segments, + bold_match.group(1), + bold=True, + italic=False, + ) i += bold_match.end() continue # check for italic: *text* - italic_match = re.match(r'\*(.*?)\*', text[i:]) + italic_match = re.match(r'\*(.+?)\*', text[i:]) if italic_match: - italic_text = italic_match.group(1) - segments.append({ - 'text': italic_text, - 'bold': False, - 'italic': True, - 'color': None - }) + self._append_format_segment( + segments, + italic_match.group(1), + bold=False, + italic=True, + ) i += italic_match.end() continue # regular text - find next formatting marker next_format = re.search(r'(\*\*|\*| list: diff --git a/dataflow_agent/workflow/wf_paper2page_content.py b/dataflow_agent/workflow/wf_paper2page_content.py index b5aab416..ae88df16 100644 --- a/dataflow_agent/workflow/wf_paper2page_content.py +++ b/dataflow_agent/workflow/wf_paper2page_content.py @@ -20,6 +20,23 @@ log = get_logger(__name__) + +def _resolve_outline_model(state: Paper2FigureState) -> str | None: + request = getattr(state, "request", None) + request_model = str(getattr(request, "model", "") or "").strip() + if request_model: + return request_model + + explicit_outline_model = str(getattr(request, "outline_model", "") or "").strip() + if explicit_outline_model: + return explicit_outline_model + + configured_outline_model = os.getenv("PAPER2PPT_OUTLINE_MODEL", "").strip() + if configured_outline_model: + return configured_outline_model + + return None + def _ensure_result_path(state: Paper2FigureState) -> str: """ 参考 wf_paper2figure_with_sam.py 的做法: @@ -256,6 +273,7 @@ async def outline_agent(state: Paper2FigureState) -> Paper2FigureState: """ agent = create_react_agent( name="outline_agent", + model_name=_resolve_outline_model(state), temperature=0.1, max_retries=5, parser_type="json", @@ -269,6 +287,7 @@ async def outline_refine_agent(state: Paper2FigureState) -> Paper2FigureState: """ agent = create_react_agent( name="outline_refine_agent", + model_name=_resolve_outline_model(state), parser_type="json", max_retries=5 ) @@ -282,6 +301,7 @@ async def deep_research_agent(state: Paper2FigureState) -> Paper2FigureState: log.info("[paper2page_content] Entering deep_research_agent...") agent = create_simple_agent( name="deep_research_agent", + model_name=_resolve_outline_model(state), temperature=0.7, parser_type="text", # 直接输出长文本 ) diff --git a/dataflow_agent/workflow/wf_paper2page_content_for_long_paper.py b/dataflow_agent/workflow/wf_paper2page_content_for_long_paper.py index 01896afd..3d3cef2f 100644 --- a/dataflow_agent/workflow/wf_paper2page_content_for_long_paper.py +++ b/dataflow_agent/workflow/wf_paper2page_content_for_long_paper.py @@ -1,9 +1,10 @@ from __future__ import annotations +import copy import json import os +import re import time -import copy from pathlib import Path from typing import List, Dict, Any @@ -25,6 +26,23 @@ log = get_logger(__name__) + +def _resolve_outline_model(state: Paper2FigureState) -> str | None: + request = getattr(state, "request", None) + request_model = str(getattr(request, "model", "") or "").strip() + if request_model: + return request_model + + explicit_outline_model = str(getattr(request, "outline_model", "") or "").strip() + if explicit_outline_model: + return explicit_outline_model + + configured_outline_model = os.getenv("PAPER2PPT_OUTLINE_MODEL", "").strip() + if configured_outline_model: + return configured_outline_model + + return None + """ Workflow: paper2page_content_for_long_paper Description: 专门用于处理长文档(如书籍、长论文、长篇报告)生成大量 PPT 页面的工作流。 @@ -93,6 +111,186 @@ def _calculate_target_chars(target_pages: int, text: str = "") -> int: return target +def _extract_plain_text(value: Any) -> str: + if value is None: + return "" + if isinstance(value, str): + return value.strip() + if isinstance(value, (int, float, bool)): + return str(value) + if isinstance(value, dict): + preferred_keys = ( + "text", + "value", + "content", + "summary", + "title", + "label", + "body", + "description", + "reason", + "point", + "raw", + ) + for key in preferred_keys: + extracted = _extract_plain_text(value.get(key)) + if extracted: + return extracted + for item in value.values(): + extracted = _extract_plain_text(item) + if extracted: + return extracted + return "" + if isinstance(value, (list, tuple, set)): + parts = [_extract_plain_text(item) for item in value] + return "\n\n".join(part for part in parts if part) + return str(value).strip() + + +def _normalize_outline_points(value: Any, *, limit: int = 5) -> List[str]: + if isinstance(value, list): + items = [_extract_plain_text(item) for item in value] + else: + items = [_extract_plain_text(value)] + cleaned = [item for item in items if item] + return cleaned[:limit] + + +def _clip_outline_point(text: str, *, limit: int = 220) -> str: + text = re.sub(r"\s+", " ", str(text or "")).strip() + if len(text) <= limit: + return text + clipped = text[:limit].rsplit(" ", 1)[0].strip() + return (clipped or text[:limit]).rstrip(",;:.- ") + "..." + + +def _normalize_outline_page_item(raw: Any) -> Dict[str, Any] | None: + if not isinstance(raw, dict): + return None + + title = _extract_plain_text(raw.get("title")) + layout_description = _extract_plain_text(raw.get("layout_description")) + key_points = _normalize_outline_points(raw.get("key_points"), limit=6) + asset_ref_text = _extract_plain_text(raw.get("asset_ref")) + + # ReAct 失败或空对象时,不要把错误占位直接透传给前端。 + if raw.get("error") and not title and not key_points: + return None + if not title and not layout_description and not key_points and not asset_ref_text: + return None + + normalized = dict(raw) + normalized["title"] = title + normalized["layout_description"] = layout_description + normalized["key_points"] = key_points + normalized["asset_ref"] = asset_ref_text or None + return normalized + + +def _normalize_outline_pages(items: List[Any]) -> List[Dict[str, Any]]: + normalized: List[Dict[str, Any]] = [] + for raw in items: + item = _normalize_outline_page_item(raw) + if item is not None: + normalized.append(item) + return normalized + + +def _split_batch_text_into_units(content: str) -> List[str]: + content = _extract_plain_text(content) + paragraphs = [part.strip() for part in re.split(r"\n\s*\n", content or "") if part.strip()] + if paragraphs: + return paragraphs + + lines = [line.strip() for line in (content or "").splitlines() if line.strip()] + if lines: + return lines + + collapsed = re.sub(r"\s+", " ", str(content or "")).strip() + if not collapsed: + return [] + + sentence_parts = [ + part.strip() + for part in re.split(r"(?<=[。!?!?\.])\s+", collapsed) + if part.strip() + ] + if len(sentence_parts) > 1: + return sentence_parts + + chunk_size = 220 + return [collapsed[i:i + chunk_size].strip() for i in range(0, len(collapsed), chunk_size) if collapsed[i:i + chunk_size].strip()] + + +def _build_fallback_pages_for_batch( + *, + batch: Dict[str, Any], + existing_pages: List[Dict[str, Any]], + page_budget: int, + language: str, +) -> List[Dict[str, Any]]: + if page_budget <= 0: + return [] + + batch_titles = [str(title).strip() for title in (batch.get("section_titles") or []) if str(title).strip()] + units = _split_batch_text_into_units(str(batch.get("content") or "")) + if not units: + units = ["Content summary pending refinement."] + + missing = max(0, page_budget - len(existing_pages)) + if missing <= 0: + return [] + + fallback_pages: List[Dict[str, Any]] = [] + unit_count = len(units) + chunk_size = max(1, (unit_count + missing - 1) // missing) + use_chinese = str(language or "").strip().lower().startswith("zh") + default_heading_prefix = "章节" if use_chinese else "Section" + closing_title = "感谢聆听" if use_chinese else "Thank You" + closing_points = ["感谢聆听", "欢迎交流与提问"] if use_chinese else ["Thank you for your attention.", "Questions & Discussion"] + fallback_layout = ( + "结构化学术内容页,包含一个简洁摘要和若干支持要点,延续前后页叙事。" + if use_chinese else + "Structured academic content slide with one concise summary paragraph and supporting bullet points. Preserve narrative continuity with neighboring slides." + ) + + for fallback_idx in range(missing): + if fallback_idx == missing - 1 and batch.get("is_last"): + fallback_pages.append({ + "title": closing_title, + "layout_description": ( + "结束页,包含简短致谢与答疑提示。" + if use_chinese else + "Closing page with a concise thank-you message and optional Q&A prompt." + ), + "key_points": closing_points, + "asset_ref": None, + }) + continue + + start = fallback_idx * chunk_size + end = min(unit_count, start + chunk_size) + excerpt_units = units[start:end] or units[-1:] + excerpt = " ".join(excerpt_units) + heading = batch_titles[min(fallback_idx, len(batch_titles) - 1)] if batch_titles else f"{default_heading_prefix} {fallback_idx + 1}" + points = [ + _clip_outline_point(text.strip()) + for text in excerpt_units[:4] + if text.strip() + ] + if not points: + points = [_clip_outline_point(excerpt[:220].strip())] if excerpt.strip() else ["Expand this section in the editor."] + + fallback_pages.append({ + "title": heading if fallback_idx == 0 else f"{heading} ({fallback_idx + 1})", + "layout_description": fallback_layout, + "key_points": points[:5], + "asset_ref": None, + }) + + return fallback_pages + + # ============================================================ # Workflow 工厂函数 # ============================================================ @@ -251,7 +449,7 @@ async def expand_text_iteratively(state: Paper2FigureState) -> Paper2FigureState TEXT 循环扩写:扩写到足够长度 """ target_pages = getattr(state, "target_pages", 60) - current_text = state.text_content or "" + current_text = _extract_plain_text(state.text_content) # 动态计算目标 target_chars = _calculate_target_chars(target_pages, current_text) @@ -266,6 +464,7 @@ async def expand_text_iteratively(state: Paper2FigureState) -> Paper2FigureState agent = create_simple_agent( name = "content_expander", + model_name=_resolve_outline_model(state), temperature=0.7, parser_type="text", ) @@ -276,9 +475,11 @@ async def expand_text_iteratively(state: Paper2FigureState) -> Paper2FigureState state = await agent.execute(state=state) - # 增加类型检查,防止 agent 返回 dict 导致后续切片报错 - # 用户要求:直接把字典当字符串 - current_text = str(state.text_content) if state.text_content else "" + expanded_text = _extract_plain_text(state.text_content) + if expanded_text: + current_text = expanded_text + else: + log.warning("[long_paper] 扩写结果为空,保留上一轮文本内容") # 重新计算目标(以防语言变化) target_chars = _calculate_target_chars(target_pages, current_text) @@ -299,12 +500,13 @@ async def generate_long_content_from_topic(state: Paper2FigureState) -> Paper2Fi target_pages = getattr(state, "target_pages", 60) max_rounds = state.max_rounds - current_text = state.text_content or "" + current_text = _extract_plain_text(state.text_content) target_chars = target_pages * 800 log.info(f"[long_paper] 从 TOPIC 生成长文,当前: {len(current_text)} 字符") agent = create_simple_agent( name="topic_writer", + model_name=_resolve_outline_model(state), parser_type="text", ) for round_num in range(max_rounds): @@ -313,7 +515,11 @@ async def generate_long_content_from_topic(state: Paper2FigureState) -> Paper2Fi state = await agent.execute(state=state) - current_text = str(state.text_content) if state.text_content else "" + generated_text = _extract_plain_text(state.text_content) + if generated_text: + current_text = generated_text + else: + log.warning("[long_paper] topic_writer 返回空内容,保留上一轮文本") # 动态更新目标 target_chars = _calculate_target_chars(target_pages, current_text) @@ -330,6 +536,7 @@ async def outline_refine_agent(state: Paper2FigureState) -> Paper2FigureState: """ agent = create_react_agent( name="outline_refine_agent", + model_name=_resolve_outline_model(state), parser_type="json", max_retries=5 ) @@ -345,7 +552,7 @@ async def consolidate_long_text(state: Paper2FigureState) -> Paper2FigureState: log.info(f"[long_paper] 使用 PDF markdown: {len(state.long_text)} 字符") elif state.text_content: # TEXT/TOPIC 路径使用 text_content - state.long_text = state.text_content + state.long_text = _extract_plain_text(state.text_content) log.info(f"[long_paper] 使用 text_content: {len(state.long_text)} 字符") else: state.long_text = "" @@ -372,6 +579,7 @@ async def ensure_sufficient_content(state: Paper2FigureState) -> Paper2FigureSta log.info(f"[long_paper] 内容不足({len(long_text)} < {target_chars} chars),开始补充扩写") agent = create_content_expander( + model_name=_resolve_outline_model(state), temperature=0.7, parser_type="text", ) @@ -385,9 +593,11 @@ async def ensure_sufficient_content(state: Paper2FigureState) -> Paper2FigureSta state = await agent.execute(state=state) - # 增加类型检查 - # 用户要求:直接把字典当字符串 - current_text = str(state.text_content) if state.text_content else "" + expanded_text = _extract_plain_text(state.text_content) + if expanded_text: + current_text = expanded_text + else: + log.warning("[long_paper] 补充扩写结果为空,继续使用已有正文") # 重新计算目标 target_chars = _calculate_target_chars(target_pages, current_text) @@ -434,6 +644,7 @@ async def generate_outline_for_batch( # 调用 long_paper_outline_agent agent = create_react_agent( name = "long_paper_outline_agent", + model_name=_resolve_outline_model(state), temperature=0.1, max_retries=5, parser_type="json", @@ -445,6 +656,7 @@ async def generate_outline_for_batch( pages = result_state.pagecontent or [] if not isinstance(pages, list): pages = [pages] + pages = _normalize_outline_pages(pages) log.info(f"[long_paper] 批次 {batch_idx + 1}/{total_batches} 生成了 {len(pages)} 页") return pages @@ -514,23 +726,46 @@ async def outline_for_long_text(state: Paper2FigureState) -> Paper2FigureState: results = await asyncio.gather(*tasks) log.info(f"[long_paper] 并行执行完成,收到 {len(results)} 个结果") + normalized_batches: List[tuple[List[Dict[str, Any]], Dict[str, Any]]] = [] + for chunk_pages, batch in zip(results, batch_info): + page_budget = int(batch.get("pages_to_generate", 1) or 1) + selected = list(_normalize_outline_pages(chunk_pages)[:page_budget]) + normalized_batches.append((selected, batch)) + + if normalized_batches and all(len(selected) == 0 for selected, _ in normalized_batches): + log.error("[long_paper] 所有批次均未生成有效 outline,拒绝使用全量 fallback 伪造大纲") + state.pagecontent = [] + setattr(state, "outline_generation_error", "long_paper_outline_agent returned no valid pages for every batch") + return state + # 5. 按顺序处理结果 all_pages = [] - for chunk_pages, batch in zip(results, batch_info): + for selected, batch in normalized_batches: batch_idx = int(batch.get("batch_index", 0)) page_budget = int(batch.get("pages_to_generate", 1) or 1) - selected = list(chunk_pages[:page_budget]) - if len(chunk_pages) > page_budget: + raw_count = len(selected) + if raw_count > page_budget: log.warning( - f"[long_paper] 批次 {batch_idx + 1}: 生成 {len(chunk_pages)} 页," + f"[long_paper] 批次 {batch_idx + 1}: 生成 {raw_count} 页," f"按预算保留 {page_budget} 页" ) - elif len(chunk_pages) < page_budget: + elif raw_count < page_budget: log.warning( - f"[long_paper] 批次 {batch_idx + 1}: 生成页数不足 {len(chunk_pages)}/{page_budget}" + f"[long_paper] 批次 {batch_idx + 1}: 生成页数不足 {raw_count}/{page_budget}" + ) + fallback_pages = _build_fallback_pages_for_batch( + batch=batch, + existing_pages=selected, + page_budget=page_budget, + language=getattr(getattr(state, "request", None), "language", "en"), ) + if fallback_pages: + log.warning( + f"[long_paper] 批次 {batch_idx + 1}: 使用 {len(fallback_pages)} 页 fallback 补齐到 {page_budget} 页" + ) + selected.extend(fallback_pages) else: - log.info(f"[long_paper] 批次 {batch_idx + 1}: 生成 {len(chunk_pages)} 页,符合预算") + log.info(f"[long_paper] 批次 {batch_idx + 1}: 生成 {raw_count} 页,符合预算") all_pages.extend(selected) if len(all_pages) != target_pages: diff --git a/deploy/backend_watchdog.sh b/deploy/backend_watchdog.sh new file mode 100755 index 00000000..7d48b7eb --- /dev/null +++ b/deploy/backend_watchdog.sh @@ -0,0 +1,333 @@ +#!/bin/bash +# FastAPI backend watchdog for port 8000. + +set -u + +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "$PROJECT_ROOT" || exit 1 + +source "$PROJECT_ROOT/deploy/app_config.sh" + +LOG_DIR="$PROJECT_ROOT/logs" +UVICORN_PID_FILE="$LOG_DIR/uvicorn.pid" +WATCHDOG_PID_FILE="$LOG_DIR/backend_watchdog.pid" +WATCHDOG_LOG_FILE="$LOG_DIR/backend_watchdog.log" +WATCHDOG_LOCK_DIR="$LOG_DIR/backend_watchdog.lock" + +WATCHDOG_INTERVAL_SECONDS="${WATCHDOG_INTERVAL_SECONDS:-15}" +WATCHDOG_FAIL_THRESHOLD="${WATCHDOG_FAIL_THRESHOLD:-2}" +WATCHDOG_RESTART_COOLDOWN_SECONDS="${WATCHDOG_RESTART_COOLDOWN_SECONDS:-30}" +WATCHDOG_HEALTH_TIMEOUT_SECONDS="${WATCHDOG_HEALTH_TIMEOUT_SECONDS:-5}" +WATCHDOG_START_WAIT_SECONDS="${WATCHDOG_START_WAIT_SECONDS:-25}" + +mkdir -p "$LOG_DIR" + +timestamp() { + date '+%Y-%m-%d %H:%M:%S' +} + +log() { + printf '%s | %s\n' "$(timestamp)" "$*" +} + +health_url() { + printf 'http://127.0.0.1:%s/health' "$APP_PORT" +} + +find_port_listener_pids() { + local port="$1" + + if command -v lsof >/dev/null 2>&1; then + lsof -tiTCP:"$port" -sTCP:LISTEN 2>/dev/null | sort -u + return 0 + fi + + if command -v ss >/dev/null 2>&1; then + ss -ltnp 2>/dev/null \ + | awk -v port=":$port" '$4 ~ port { print $NF }' \ + | grep -oE 'pid=[0-9]+' \ + | cut -d= -f2 \ + | sort -u + return 0 + fi + + if command -v netstat >/dev/null 2>&1; then + netstat -ltnp 2>/dev/null \ + | awk -v port=":$port" '$4 ~ port { split($7, parts, "/"); if (parts[1] ~ /^[0-9]+$/) print parts[1] }' \ + | sort -u + return 0 + fi + + return 1 +} + +backend_port_listening() { + [[ -n "$(find_port_listener_pids "$APP_PORT" || true)" ]] +} + +backend_health_ok() { + local response + response="$( + curl -fsS \ + --max-time "$WATCHDOG_HEALTH_TIMEOUT_SECONDS" \ + "$(health_url)" \ + 2>/dev/null || true + )" + [[ "$response" == *'"status":"ok"'* ]] +} + +current_watchdog_pid() { + if [[ -f "$WATCHDOG_PID_FILE" ]]; then + cat "$WATCHDOG_PID_FILE" 2>/dev/null || true + fi +} + +backend_pid_from_pidfile() { + if [[ -f "$UVICORN_PID_FILE" ]]; then + cat "$UVICORN_PID_FILE" 2>/dev/null || true + fi +} + +backend_process_running() { + local pid + pid="$(backend_pid_from_pidfile)" + if [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null; then + return 0 + fi + + pgrep -f "uvicorn fastapi_app.main:app" >/dev/null 2>&1 +} + +watchdog_running() { + local pid + pid="$(current_watchdog_pid)" + [[ -n "$pid" ]] && kill -0 "$pid" 2>/dev/null +} + +write_pid() { + echo "$1" > "$WATCHDOG_PID_FILE" +} + +cleanup_pid_file() { + if ! watchdog_running; then + rm -f "$WATCHDOG_PID_FILE" + fi +} + +acquire_lock() { + mkdir "$WATCHDOG_LOCK_DIR" 2>/dev/null +} + +release_lock() { + rmdir "$WATCHDOG_LOCK_DIR" 2>/dev/null || true +} + +wait_for_backend() { + local attempt + for attempt in $(seq 1 "$WATCHDOG_START_WAIT_SECONDS"); do + if backend_port_listening; then + return 0 + fi + sleep 1 + done + return 1 +} + +restart_backend() { + if ! acquire_lock; then + log "[watchdog] another restart is already in progress" + return 1 + fi + + log "[watchdog] backend unhealthy, running deploy/start.sh" + if bash "$PROJECT_ROOT/deploy/start.sh" >> "$WATCHDOG_LOG_FILE" 2>&1; then + if wait_for_backend; then + log "[watchdog] backend recovered successfully" + release_lock + return 0 + fi + log "[watchdog] deploy/start.sh returned success, but port $APP_PORT did not recover in time" + else + log "[watchdog] deploy/start.sh failed" + fi + + release_lock + return 1 +} + +run_once() { + if backend_port_listening; then + log "[watchdog] backend port $APP_PORT is listening" + return 0 + fi + + if backend_process_running; then + log "[watchdog] port $APP_PORT is down, but backend process is still alive" + return 0 + fi + + restart_backend +} + +run_loop() { + local consecutive_failures=0 + + trap 'rm -f "$WATCHDOG_PID_FILE"; release_lock; exit 0' INT TERM EXIT + write_pid "$$" + log "[watchdog] started, interval=${WATCHDOG_INTERVAL_SECONDS}s fail_threshold=${WATCHDOG_FAIL_THRESHOLD}" + + while true; do + if backend_port_listening; then + consecutive_failures=0 + else + consecutive_failures=$((consecutive_failures + 1)) + if backend_process_running; then + log "[watchdog] port $APP_PORT is down, but backend process is still alive (${consecutive_failures}/${WATCHDOG_FAIL_THRESHOLD})" + else + log "[watchdog] port $APP_PORT is down (${consecutive_failures}/${WATCHDOG_FAIL_THRESHOLD})" + fi + if (( consecutive_failures >= WATCHDOG_FAIL_THRESHOLD )); then + restart_backend || true + consecutive_failures=0 + sleep "$WATCHDOG_RESTART_COOLDOWN_SECONDS" + continue + fi + fi + + sleep "$WATCHDOG_INTERVAL_SECONDS" + done +} + +start_watchdog() { + cleanup_pid_file + if watchdog_running; then + log "[watchdog] already running with PID $(current_watchdog_pid)" + exit 0 + fi + + if command -v setsid >/dev/null 2>&1; then + nohup setsid bash "$0" run >> "$WATCHDOG_LOG_FILE" 2>&1 < /dev/null & + else + nohup bash "$0" run >> "$WATCHDOG_LOG_FILE" 2>&1 < /dev/null & + fi + sleep 1 + + if watchdog_running; then + log "[watchdog] started with PID $(current_watchdog_pid)" + exit 0 + fi + + log "[watchdog] failed to start" + exit 1 +} + +stop_watchdog() { + local pid + local waited=0 + pid="$(current_watchdog_pid)" + if [[ -z "$pid" ]]; then + log "[watchdog] not running" + rm -f "$WATCHDOG_PID_FILE" + exit 0 + fi + + if kill "$pid" 2>/dev/null; then + while kill -0 "$pid" 2>/dev/null && (( waited < 5 )); do + sleep 1 + waited=$((waited + 1)) + done + if kill -0 "$pid" 2>/dev/null; then + kill -KILL "$pid" 2>/dev/null || true + sleep 1 + fi + cleanup_pid_file + release_lock + if kill -0 "$pid" 2>/dev/null; then + log "[watchdog] failed to stop PID $pid" + exit 1 + fi + log "[watchdog] stopped" + exit 0 + fi + + log "[watchdog] failed to stop PID $pid" + exit 1 +} + +status_watchdog() { + cleanup_pid_file + if watchdog_running; then + log "[watchdog] running with PID $(current_watchdog_pid)" + else + log "[watchdog] not running" + fi + + if backend_port_listening; then + log "[watchdog] backend port $APP_PORT is listening" + else + log "[watchdog] backend port $APP_PORT is NOT listening" + fi + + if backend_health_ok; then + log "[watchdog] backend health OK on $(health_url)" + elif backend_process_running; then + log "[watchdog] backend health FAILED, but uvicorn process is still alive" + else + log "[watchdog] backend health FAILED on $(health_url)" + fi + + log "[watchdog] log file: $WATCHDOG_LOG_FILE" +} + +show_logs() { + touch "$WATCHDOG_LOG_FILE" + tail -n 80 "$WATCHDOG_LOG_FILE" +} + +usage() { + cat <<'EOF' +Usage: + bash deploy/backend_watchdog.sh start + bash deploy/backend_watchdog.sh stop + bash deploy/backend_watchdog.sh status + bash deploy/backend_watchdog.sh logs + bash deploy/backend_watchdog.sh run + bash deploy/backend_watchdog.sh run-once + +Optional env vars: + WATCHDOG_INTERVAL_SECONDS + WATCHDOG_FAIL_THRESHOLD + WATCHDOG_RESTART_COOLDOWN_SECONDS + WATCHDOG_HEALTH_TIMEOUT_SECONDS + WATCHDOG_START_WAIT_SECONDS +EOF +} + +case "${1:-status}" in + start) + start_watchdog + ;; + stop) + stop_watchdog + ;; + status) + status_watchdog + ;; + logs) + show_logs + ;; + run) + run_loop + ;; + run-once) + run_once + ;; + -h|--help|help) + usage + ;; + *) + echo "Unknown command: ${1:-}" >&2 + usage + exit 1 + ;; +esac diff --git a/deploy/docker-build.sh b/deploy/docker-build.sh new file mode 100644 index 00000000..72bb7f16 --- /dev/null +++ b/deploy/docker-build.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -euo pipefail + +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "$PROJECT_ROOT" || exit 1 + +# shellcheck disable=SC1091 +source "$PROJECT_ROOT/deploy/docker_env.sh" + +docker compose "${COMPOSE_FILE_ARGS[@]}" build "$@" diff --git a/deploy/docker-down.sh b/deploy/docker-down.sh new file mode 100644 index 00000000..c27bcb70 --- /dev/null +++ b/deploy/docker-down.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -euo pipefail + +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "$PROJECT_ROOT" || exit 1 + +source "$PROJECT_ROOT/deploy/docker_env.sh" + +docker compose "${COMPOSE_FILE_ARGS[@]}" down "$@" diff --git a/deploy/docker-logs.sh b/deploy/docker-logs.sh new file mode 100644 index 00000000..099b090f --- /dev/null +++ b/deploy/docker-logs.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -euo pipefail + +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "$PROJECT_ROOT" || exit 1 + +source "$PROJECT_ROOT/deploy/docker_env.sh" + +docker compose "${COMPOSE_FILE_ARGS[@]}" logs -f "$@" diff --git a/deploy/docker-up.sh b/deploy/docker-up.sh new file mode 100644 index 00000000..5163cc0c --- /dev/null +++ b/deploy/docker-up.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -euo pipefail + +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +cd "$PROJECT_ROOT" || exit 1 + +# shellcheck disable=SC1091 +source "$PROJECT_ROOT/deploy/docker_env.sh" + +extra_args=() +if [ "${DOCKER_WITH_SAM3:-0}" = "1" ]; then + extra_args+=(--profile sam3) +fi + +docker compose "${COMPOSE_FILE_ARGS[@]}" "${extra_args[@]}" up -d --build "$@" diff --git a/deploy/docker.env.example b/deploy/docker.env.example new file mode 100644 index 00000000..94905110 --- /dev/null +++ b/deploy/docker.env.example @@ -0,0 +1,28 @@ +# Docker / docker compose defaults + +BACKEND_PORT=8000 +FRONTEND_PORT=3000 +DOCKER_BACKEND_BIND_PORT=8000 +DOCKER_FRONTEND_BIND_PORT=80 +DOCKER_SAM3_BIND_PORT=8021 +DOCKER_HOST_NETWORK=0 +DOCKER_APP_WORKERS=1 +DOCKER_BACKEND_INSTALL_CUDA=0 +PYTHON_BASE_IMAGE=python:3.11-slim +NODE_BASE_IMAGE=node:20-alpine +NGINX_BASE_IMAGE=nginx:alpine + +# Optional local SAM3 container profile +SAM3_PORT=8021 +SAM3_HOME= +SAM3_CHECKPOINT_PATH= +SAM3_BPE_PATH= +SAM3_SERVER_URLS= + +# Optional frontend public build args overrides +VITE_API_KEY= +VITE_API_BASE_URL= +VITE_DEFAULT_LLM_API_URL= +VITE_LLM_API_URLS= +VITE_SUPABASE_URL= +VITE_SUPABASE_ANON_KEY= diff --git a/deploy/docker_env.sh b/deploy/docker_env.sh new file mode 100644 index 00000000..20f843a7 --- /dev/null +++ b/deploy/docker_env.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +load_env_file() { + local file="$1" + if [ -f "$file" ]; then + set -a + # shellcheck disable=SC1090 + source "$file" + set +a + fi +} + +# Compose-level defaults first. +load_env_file "$PROJECT_ROOT/deploy/docker.env.example" + +# Backend runtime vars used by env_file and optional SAM3 defaults. +load_env_file "$PROJECT_ROOT/fastapi_app/.env" + +# Frontend public defaults for static build. +load_env_file "$PROJECT_ROOT/frontend-workflow/.env" + +# Optional machine profile for local SAM3 path defaults. +if [ -f "$PROJECT_ROOT/deploy/profiles/nv.env" ]; then + load_env_file "$PROJECT_ROOT/deploy/profiles/nv.env" +elif [ -f "$PROJECT_ROOT/deploy/profiles/muxi.env" ]; then + load_env_file "$PROJECT_ROOT/deploy/profiles/muxi.env" +fi + +# Local docker overrides win last. +load_env_file "$PROJECT_ROOT/deploy/docker.env" + +COMPOSE_FILE_ARGS=(-f "$PROJECT_ROOT/docker-compose.yml") +if [ "${DOCKER_HOST_NETWORK:-0}" = "1" ]; then + COMPOSE_FILE_ARGS+=(-f "$PROJECT_ROOT/docker-compose.host.yml") +fi diff --git a/docker-compose.host.yml b/docker-compose.host.yml new file mode 100644 index 00000000..f3f3143f --- /dev/null +++ b/docker-compose.host.yml @@ -0,0 +1,12 @@ +services: + paper2any-backend: + network_mode: host + ports: !reset [] + + paper2any-frontend: + network_mode: host + ports: !reset [] + + paper2any-sam3: + network_mode: host + ports: !reset [] diff --git a/docker-compose.yml b/docker-compose.yml index 1e063e6a..e69c1db4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,19 +1,55 @@ -version: "3.8" - services: paper2any-backend: build: context: . dockerfile: Dockerfile + args: + INSTALL_CUDA: ${DOCKER_BACKEND_INSTALL_CUDA:-0} + PYTHON_BASE_IMAGE: ${PYTHON_BASE_IMAGE:-python:3.11-slim} + image: paper2any-backend:local container_name: paper2any-backend - network_mode: host + env_file: + - ./fastapi_app/.env environment: - - PYTHONUNBUFFERED=1 - - RMBG_MODEL_PATH=/app/models/RMBG-2.0 + PYTHONUNBUFFERED: "1" + PAPER2ANY_RUNTIME_TMPDIR: /app/outputs/system/tmp + RMBG_MODEL_PATH: ${RMBG_MODEL_PATH:-/app/models/RMBG-2.0} + SAM3_SERVER_URLS: ${SAM3_SERVER_URLS:-} + SAM3_HOME: ${SAM3_HOME:-} + SAM3_CHECKPOINT_PATH: ${SAM3_CHECKPOINT_PATH:-/app/models/sam3/sam3.pt} + SAM3_BPE_PATH: ${SAM3_BPE_PATH:-/app/models/sam3/bpe_simple_vocab_16e6.txt.gz} + PAPER2DRAWIO_SAM3_CHECKPOINT_PATH: ${SAM3_CHECKPOINT_PATH:-/app/models/sam3/sam3.pt} + PAPER2DRAWIO_SAM3_BPE_PATH: ${SAM3_BPE_PATH:-/app/models/sam3/bpe_simple_vocab_16e6.txt.gz} + command: + - python + - -m + - uvicorn + - fastapi_app.main:app + - --host + - 0.0.0.0 + - --port + - "${DOCKER_BACKEND_BIND_PORT:-8000}" + - --workers + - "${DOCKER_APP_WORKERS:-1}" + - --log-level + - info volumes: - ./outputs:/app/outputs - ./models:/app/models - - ./fastapi_app/.env:/app/fastapi_app/.env + - ./data:/app/data + - ./database:/app/database + - ./logs:/app/logs + - ./raw_data_store:/app/raw_data_store + - ./rebuttal_sessions:/app/rebuttal_sessions + - ./fastapi_app/.env:/app/fastapi_app/.env:ro + ports: + - "${BACKEND_PORT:-8000}:${DOCKER_BACKEND_BIND_PORT:-8000}" + healthcheck: + test: ["CMD", "curl", "-fsS", "http://127.0.0.1:${DOCKER_BACKEND_BIND_PORT:-8000}/health"] + interval: 20s + timeout: 5s + retries: 10 + start_period: 30s restart: unless-stopped paper2any-frontend: @@ -21,13 +57,71 @@ services: context: . dockerfile: frontend-workflow/Dockerfile args: + NODE_BASE_IMAGE: ${NODE_BASE_IMAGE:-node:20-alpine} + NGINX_BASE_IMAGE: ${NGINX_BASE_IMAGE:-nginx:alpine} VITE_API_KEY: ${VITE_API_KEY:-} - VITE_DEFAULT_LLM_API_URL: ${VITE_DEFAULT_LLM_API_URL:-https://api.apiyi.com/v1} - VITE_LLM_API_URLS: ${VITE_LLM_API_URLS:-https://api.apiyi.com/v1,http://b.apiyi.com:16888/v1,http://123.129.219.111:3000/v1} VITE_API_BASE_URL: ${VITE_API_BASE_URL:-} + VITE_DEFAULT_LLM_API_URL: ${VITE_DEFAULT_LLM_API_URL:-} + VITE_LLM_API_URLS: ${VITE_LLM_API_URLS:-} + VITE_SUPABASE_URL: ${VITE_SUPABASE_URL:-} + VITE_SUPABASE_ANON_KEY: ${VITE_SUPABASE_ANON_KEY:-} + image: paper2any-frontend:local container_name: paper2any-frontend - ports: - - "3000:80" depends_on: - - paper2any-backend + paper2any-backend: + condition: service_healthy + environment: + NGINX_LISTEN_PORT: "${DOCKER_FRONTEND_BIND_PORT:-80}" + BACKEND_UPSTREAM_URL: "${BACKEND_UPSTREAM_URL:-http://paper2any-backend:${DOCKER_BACKEND_BIND_PORT:-8000}}" + ports: + - "${FRONTEND_PORT:-3000}:${DOCKER_FRONTEND_BIND_PORT:-80}" + healthcheck: + test: ["CMD", "wget", "-qO-", "http://127.0.0.1:${DOCKER_FRONTEND_BIND_PORT:-80}/"] + interval: 20s + timeout: 5s + retries: 10 + start_period: 10s + restart: unless-stopped + + paper2any-sam3: + profiles: ["sam3"] + build: + context: . + dockerfile: Dockerfile + args: + INSTALL_CUDA: 1 + PYTHON_BASE_IMAGE: ${PYTHON_BASE_IMAGE:-python:3.11-slim} + image: paper2any-backend:local-gpu + container_name: paper2any-sam3 + environment: + PYTHONUNBUFFERED: "1" + SAM3_HOME: ${SAM3_HOME:-/app/models/sam3-official/sam3} + SAM3_CHECKPOINT_PATH: ${SAM3_CHECKPOINT_PATH:-/app/models/sam3/sam3.pt} + SAM3_BPE_PATH: ${SAM3_BPE_PATH:-/app/models/sam3/bpe_simple_vocab_16e6.txt.gz} + command: + - python + - -m + - dataflow_agent.toolkits.model_servers.sam3_server + - --host + - 0.0.0.0 + - --port + - "${DOCKER_SAM3_BIND_PORT:-8021}" + - --checkpoint + - "${SAM3_CHECKPOINT_PATH:-/app/models/sam3/sam3.pt}" + - --bpe + - "${SAM3_BPE_PATH:-/app/models/sam3/bpe_simple_vocab_16e6.txt.gz}" + - --device + - cuda + volumes: + - ./models:/app/models + - ./logs:/app/logs + ports: + - "${SAM3_PORT:-8021}:${DOCKER_SAM3_BIND_PORT:-8021}" + healthcheck: + test: ["CMD", "curl", "-fsS", "http://127.0.0.1:${DOCKER_SAM3_BIND_PORT:-8021}/health"] + interval: 20s + timeout: 5s + retries: 10 + start_period: 30s + gpus: all restart: unless-stopped diff --git a/docs/contributing.md b/docs/contributing.md index 2a99cc24..9a8c5fd3 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -26,7 +26,10 @@ conda create -n paper2any python=3.11 -y conda activate paper2any pip install --upgrade pip -pip install -r requirements-paper.txt || pip install -r requirements-paper-backup.txt +pip install -r requirements-paper.txt + +# NVIDIA GPU 机器再额外安装 +pip install -r requirements-cu12.txt ``` ### 前端 @@ -40,8 +43,7 @@ cd .. ### 最小启动方式 ```bash -bash deploy/start.sh -bash deploy/start_frontend.sh +bash deploy/start_nv.sh ``` 如果你的改动依赖本地模型服务,再继续使用: diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 0fbfef2b..c49cfcac 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -8,6 +8,16 @@ ## 1. 四类配置文件 +## 0. 推荐先决定 simple 还是 advanced + +- **Simple 模式**:`fastapi_app/.env.simple.example` + `frontend-workflow/.env.simple.example` +- **Advanced 模式**:`fastapi_app/.env.example` + `frontend-workflow/.env.example` + +建议: + +- 只想尽快跑起来:先用 simple +- 需要每个 workflow 分开配 provider / model:再切 advanced + | 文件 | 作用 | 不该放什么 | | --- | --- | --- | | `fastapi_app/.env` | 后端业务配置 | 不要放机器部署参数 | @@ -90,11 +100,11 @@ ## 6. 推荐的配置顺序 -1. 先复制三个模板: +1. 先复制三个模板(simple 模式推荐): ```bash -cp fastapi_app/.env.example fastapi_app/.env -cp frontend-workflow/.env.example frontend-workflow/.env +cp fastapi_app/.env.simple.example fastapi_app/.env +cp frontend-workflow/.env.simple.example frontend-workflow/.env cp deploy/profiles/nv.env.example deploy/profiles/nv.env ``` diff --git a/docs/guides/open_source_deployment.md b/docs/guides/open_source_deployment.md index 47b4d796..a7874b8c 100644 --- a/docs/guides/open_source_deployment.md +++ b/docs/guides/open_source_deployment.md @@ -210,7 +210,10 @@ conda create -n paper2any python=3.11 -y conda activate paper2any pip install --upgrade pip -pip install -r requirements-paper.txt || pip install -r requirements-paper-backup.txt +pip install -r requirements-paper.txt + +# NVIDIA GPU 机器再额外安装 +pip install -r requirements-cu12.txt ``` 如果你需要本地包开发模式: @@ -222,6 +225,8 @@ pip install -e . 说明: - `requirements-paper.txt` 已经包含 `requirements-base.txt` +- `requirements-cu12.txt` 只用于 NVIDIA Linux + CUDA 12 +- `requirements-system-ubuntu.txt` 列的是系统包,不是 Python 包 - 当前部署脚本会检查后端环境里是否至少有这些运行时依赖:`cv2`、`cairosvg`、`fastapi`、`moviepy`、`supabase`、`torch`、`uvicorn` ## 6.2 前端 Node 环境 @@ -389,7 +394,7 @@ LIVEPORTRAIT_KEY=your_liveportrait_key 如果你需要下面这些功能,就必须补全 Supabase: -- 登录 / 注册 / 匿名登录 +- 登录 / 注册 - 账户页 - 点数 / 邀请码 - 历史文件 @@ -405,7 +410,7 @@ SUPABASE_SERVICE_ROLE_KEY=your_supabase_service_role_key 如果你不配 Supabase: -- 系统依然可以做匿名或本地测试 +- 系统依然可以跳过认证门禁做本地测试 - 但账户、登录、历史、邀请码、账号点数这些能力会不完整或不可用 ## 9. 前端配置:`frontend-workflow/.env` @@ -585,6 +590,10 @@ bash deploy/start_muxi.sh ### 方式 A:只起前后端 ```bash +set -a +source deploy/profiles/nv.env +set +a + bash deploy/start.sh bash deploy/start_frontend.sh ``` diff --git a/docs/index.md b/docs/index.md index 9237b79a..b131973e 100644 --- a/docs/index.md +++ b/docs/index.md @@ -67,8 +67,7 @@ cp fastapi_app/.env.example fastapi_app/.env cp frontend-workflow/.env.example frontend-workflow/.env -bash deploy/start.sh -bash deploy/start_frontend.sh +bash deploy/start_nv.sh ``` 默认访问地址: diff --git a/docs/installation.md b/docs/installation.md index 884b0d94..6ddaf1c4 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -44,6 +44,7 @@ sudo apt-get update sudo apt-get install -y \ build-essential \ curl \ + ffmpeg \ git \ inkscape \ libreoffice \ @@ -61,7 +62,8 @@ sudo apt-get install -y \ 说明: - `Inkscape`、`LibreOffice`、`poppler-utils` 对图形导出和文档转换链路很常见 -- 部分功能还会用到 `wkhtmltopdf` +- 部分功能还会用到 `ffmpeg`、`wkhtmltopdf`、`tectonic` +- `requirements-system-ubuntu.txt` 里列的是系统包名,不是 Python 包 ## 4. 后端安装 @@ -72,7 +74,10 @@ conda create -n paper2any python=3.11 -y conda activate paper2any pip install --upgrade pip -pip install -r requirements-paper.txt || pip install -r requirements-paper-backup.txt +pip install -r requirements-paper.txt + +# NVIDIA GPU 机器再额外安装 +pip install -r requirements-cu12.txt ``` 可选: diff --git a/docs/quickstart.md b/docs/quickstart.md index 42a67a66..4bb5e944 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -25,7 +25,10 @@ conda create -n paper2any python=3.11 -y conda activate paper2any pip install --upgrade pip -pip install -r requirements-paper.txt || pip install -r requirements-paper-backup.txt +pip install -r requirements-paper.txt + +# NVIDIA GPU 机器再额外安装 +pip install -r requirements-cu12.txt ``` ### 3. 准备前端 Node 环境 @@ -40,10 +43,19 @@ cd .. ### 4. 复制配置模板 +推荐优先使用**粗粒度 simple 模式**,只填少量 URL / Key: + +```bash +cp fastapi_app/.env.simple.example fastapi_app/.env +cp frontend-workflow/.env.simple.example frontend-workflow/.env +cp deploy/profiles/nv.env.example deploy/profiles/nv.env +``` + +如果你需要逐个 workflow 覆盖模型,再改用: + ```bash cp fastapi_app/.env.example fastapi_app/.env cp frontend-workflow/.env.example frontend-workflow/.env -cp deploy/profiles/nv.env.example deploy/profiles/nv.env ``` ### 5. 至少填这几项 @@ -53,8 +65,9 @@ cp deploy/profiles/nv.env.example deploy/profiles/nv.env ```bash BACKEND_API_KEY=your-backend-api-key APP_BILLING_MODE=free -DF_API_URL=https://your-llm-gateway/v1 -DF_API_KEY=your-llm-api-key +PAPER2ANY_CONFIG_MODE=simple +SIMPLE_TEXT_API_URL=https://your-llm-gateway/v1 +SIMPLE_TEXT_API_KEY=your-llm-api-key ``` `frontend-workflow/.env` @@ -70,15 +83,20 @@ VITE_API_BASE_URL= - 本地 `npm run dev` + Vite 代理模式下,`VITE_API_BASE_URL` 通常留空。 - 如果你准备启用登录、账户点数、历史文件,需要继续补 `SUPABASE_*`。详见 [开源部署与配置总指南](guides/open_source_deployment.md)。 -### 6. 启动后端 +### 6. 启动整套服务(推荐) ```bash -bash deploy/start.sh +bash deploy/start_nv.sh ``` -### 7. 启动前端 +### 7. 手动分开启动(可选) ```bash +set -a +source deploy/profiles/nv.env +set +a + +bash deploy/start.sh bash deploy/start_frontend.sh ``` diff --git a/fastapi_app/.env.example b/fastapi_app/.env.example index 404fb4ed..18468434 100644 --- a/fastapi_app/.env.example +++ b/fastapi_app/.env.example @@ -1,6 +1,9 @@ # ============================================ -# 使用说明 +# 使用说明(Advanced / Fine-grained) # ============================================ +# 如果你只想填很少的 URL / Key,请优先使用: +# fastapi_app/.env.simple.example +# 当前这个 .env.example 是“细粒度 / 高级模式”示例,保留所有 workflow / model 级别开关。 # 1. 复制本文件为 .env:cp .env.example .env # 2. 在 .env 中填入真实 key(不要提交 .env 到 git) # 3. 启动后端后会自动加载 fastapi_app/.env,无需在终端 export diff --git a/fastapi_app/.env.simple.example b/fastapi_app/.env.simple.example new file mode 100644 index 00000000..1f45daa8 --- /dev/null +++ b/fastapi_app/.env.simple.example @@ -0,0 +1,51 @@ +# ============================================ +# Paper2Any Backend - Simple Mode Example +# ============================================ +# 目标: +# - 用户只填少量 URL / Key +# - 不再逐个 workflow 配模型 +# - 所有 workflow 文本模型统一走 SIMPLE_TEXT_* +# - 所有生图 workflow 统一走 SIMPLE_IMAGE_* +# - OCR / VLM 统一走 SIMPLE_OCR_* + +BACKEND_API_KEY=your-backend-api-key +APP_BILLING_MODE=free +PAPER2ANY_CONFIG_MODE=simple + +# 文本模型入口(统一给 paper2ppt / kb / drawio / report / rebuttal 等使用) +SIMPLE_TEXT_API_URL=http://123.129.219.111:3000/v1 +SIMPLE_TEXT_API_KEY=sk-your-text-key + +# 生图 / 改图入口(统一给 paper2ppt 图片版 / ppt2polish / paper2figure / poster 等使用) +SIMPLE_IMAGE_API_URL=https://api.ikuncode.cc +SIMPLE_IMAGE_API_KEY=sk-your-image-key + +# OCR / VLM 入口(统一给 pdf2ppt / drawio / 视觉理解类流程使用) +SIMPLE_OCR_API_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 +SIMPLE_OCR_API_KEY=your-ocr-key + +# 下面这些模型一般不需要改;simple 模式下如果不填,就自动使用默认推荐值 +SIMPLE_TEXT_MODEL=gpt-4o +SIMPLE_IMAGE_MODEL=gemini-3-pro-image-preview +SIMPLE_VLM_MODEL=qwen-vl-ocr-2025-11-20 +SIMPLE_EMBEDDING_MODEL=text-embedding-3-small + +# 其他基础运行配置 +PAPER2FIGURE_TO_PPT_FORCE_AI_EDIT=true +PAPER2DRAWIO_ENABLE_VLM_VALIDATION=false +SAM3_SERVER_URLS=http://127.0.0.1:8021 +MINERU_API_BASE_URL=https://mineru.net/api/v4 +MINERU_API_KEY=your_mineru_api_key +PAPER2DRAWIO_OCR_API_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 +PAPER2DRAWIO_OCR_API_KEY=your-ocr-key + +# 登录 / 历史 / 配额(可选) +SUPABASE_URL=https://your-project-id.supabase.co +SUPABASE_ANON_KEY=your_supabase_anon_key +SUPABASE_SERVICE_ROLE_KEY=your_supabase_service_role_key + +# 购买 / 兑换(可选) +POINTS_PURCHASE_URL= +POINTS_REDEEM_CODE_FILE_10=./data/redeem_codes/points_10.txt +POINTS_REDEEM_CODE_FILE_50=./data/redeem_codes/points_50.txt +POINTS_REDEEM_CODE_FILE_100=./data/redeem_codes/points_100.txt diff --git a/fastapi_app/config/settings.py b/fastapi_app/config/settings.py index 41793320..53d2852c 100644 --- a/fastapi_app/config/settings.py +++ b/fastapi_app/config/settings.py @@ -40,6 +40,18 @@ class AppSettings(BaseSettings): MODEL_QWEN_VL_OCR: str = "qwen-vl-ocr-2025-11-20" # API Configuration + PAPER2ANY_CONFIG_MODE: str = "advanced" + SIMPLE_TEXT_API_URL: str = "" + SIMPLE_TEXT_API_KEY: str = "" + SIMPLE_IMAGE_API_URL: str = "" + SIMPLE_IMAGE_API_KEY: str = "" + SIMPLE_OCR_API_URL: str = "" + SIMPLE_OCR_API_KEY: str = "" + SIMPLE_TEXT_MODEL: str = "gpt-4o" + SIMPLE_IMAGE_MODEL: str = "gemini-3-pro-image-preview" + SIMPLE_VLM_MODEL: str = "qwen-vl-ocr-2025-11-20" + SIMPLE_EMBEDDING_MODEL: str = "text-embedding-3-small" + DEFAULT_LLM_API_URL: str = "http://123.129.219.111:3000/v1/" DF_API_URL: str = "http://123.129.219.111:3000/v1" DF_API_KEY: str = "" @@ -123,6 +135,14 @@ class AppSettings(BaseSettings): PAPER2DRAWIO_SAM3_BPE_PATH: str = str(_project_root() / "models" / "sam3" / "bpe_simple_vocab_16e6.txt.gz") PAPER2DRAWIO_OCR_API_URL: str = "https://dashscope.aliyuncs.com/compatible-mode/v1" PAPER2DRAWIO_OCR_API_KEY: str = "" + PAPER2DRAWIO_SEGMENT_HINT_API_URL: str = "" + PAPER2DRAWIO_SEGMENT_HINT_API_KEY: str = "" + PAPER2DRAWIO_SEGMENT_HINT_VLM_MODEL: str = "gpt-4o" + PAPER2DRAWIO_SEGMENT_HINT_TIMEOUT: int = 120 + PAPER2PPT_SEGMENT_HINT_API_URL: str = "" + PAPER2PPT_SEGMENT_HINT_API_KEY: str = "" + PAPER2PPT_SEGMENT_HINT_VLM_MODEL: str = "gpt-4o" + PAPER2PPT_SEGMENT_HINT_TIMEOUT: int = 120 MINERU_API_BASE_URL: str = "https://mineru.net/api/v4" MINERU_API_KEY: str = "" MINERU_API_MODEL_VERSION: str = "vlm" @@ -151,22 +171,39 @@ class AppSettings(BaseSettings): PDF2PPT_DEFAULT_MODEL: str = "gpt-4o" PDF2PPT_DEFAULT_IMAGE_MODEL: str = "gemini-2.5-flash-image" + # Image2PPT Workflow + IMAGE2PPT_DEFAULT_MODEL: str = "gpt-4o" + IMAGE2PPT_DEFAULT_IMAGE_MODEL: str = "gemini-2.5-flash-image" + # Paper2Figure Workflow PAPER2FIGURE_DEFAULT_MODEL: str = "gpt-4o" PAPER2FIGURE_DEFAULT_IMAGE_MODEL: str = "gemini-3-pro-image-preview" # Paper2Video Workflow PAPER2VIDEO_DEFAULT_MODEL: str = "gpt-4o" + PAPER2VIDEO_TTS_MODEL: str = "cosyvoice-v3-flash" + PAPER2VIDEO_TALKING_MODEL: str = "liveportrait" # Paper2Drawio Workflow PAPER2DRAWIO_DEFAULT_MODEL: str = "gpt-5.4" PAPER2DRAWIO_VLM_MODEL: str = "gpt-4o" PAPER2DRAWIO_ENABLE_VLM_VALIDATION: bool = False + # Image2Drawio Workflow + IMAGE2DRAWIO_DEFAULT_MODEL: str = "gpt-4o" + IMAGE2DRAWIO_DEFAULT_IMAGE_MODEL: str = "gemini-3-pro-image-preview" + IMAGE2DRAWIO_VLM_MODEL: str = "qwen-vl-ocr-2025-11-20" + # Knowledge Base KB_EMBEDDING_MODEL: str = "gemini-2.5-flash" KB_CHAT_MODEL: str = "gpt-4o" + # MindMap / Poster / Rebuttal + MINDMAP_DEFAULT_MODEL: str = "gpt-4o" + PAPER2POSTER_DEFAULT_MODEL: str = "gpt-4o" + PAPER2POSTER_VISION_MODEL: str = "gpt-4o" + PAPER2REBUTTAL_DEFAULT_MODEL: str = "gpt-4o" + # ============================================ # Layer 3: Role-level Model Configuration # ============================================ @@ -196,5 +233,127 @@ class Config: extra = "ignore" +def _first_non_empty(*values: Optional[str]) -> str: + for value in values: + text = str(value or "").strip() + if text: + return text + return "" + + +def _apply_simple_mode(settings_obj: AppSettings) -> AppSettings: + mode = str(getattr(settings_obj, "PAPER2ANY_CONFIG_MODE", "") or "").strip().lower() + if mode != "simple": + return settings_obj + + text_api_url = _first_non_empty( + settings_obj.SIMPLE_TEXT_API_URL, + settings_obj.DF_API_URL, + settings_obj.DEFAULT_LLM_API_URL, + ) + text_api_key = _first_non_empty( + settings_obj.SIMPLE_TEXT_API_KEY, + settings_obj.DF_API_KEY, + ) + image_api_url = _first_non_empty( + settings_obj.SIMPLE_IMAGE_API_URL, + settings_obj.DF_IMAGE_API_URL, + text_api_url, + ) + image_api_key = _first_non_empty( + settings_obj.SIMPLE_IMAGE_API_KEY, + settings_obj.DF_IMAGE_API_KEY, + text_api_key, + ) + ocr_api_url = _first_non_empty( + settings_obj.SIMPLE_OCR_API_URL, + settings_obj.PAPER2DRAWIO_OCR_API_URL, + text_api_url, + ) + ocr_api_key = _first_non_empty( + settings_obj.SIMPLE_OCR_API_KEY, + settings_obj.PAPER2DRAWIO_OCR_API_KEY, + text_api_key, + ) + + text_model = _first_non_empty(settings_obj.SIMPLE_TEXT_MODEL, "gpt-4o") + image_model = _first_non_empty(settings_obj.SIMPLE_IMAGE_MODEL, "gemini-3-pro-image-preview") + vlm_model = _first_non_empty(settings_obj.SIMPLE_VLM_MODEL, "qwen-vl-ocr-2025-11-20") + embedding_model = _first_non_empty(settings_obj.SIMPLE_EMBEDDING_MODEL, "text-embedding-3-small") + + settings_obj.DEFAULT_LLM_API_URL = text_api_url or settings_obj.DEFAULT_LLM_API_URL + settings_obj.DF_API_URL = text_api_url or settings_obj.DF_API_URL + settings_obj.DF_API_KEY = text_api_key or settings_obj.DF_API_KEY + settings_obj.DF_IMAGE_API_URL = image_api_url or settings_obj.DF_IMAGE_API_URL + settings_obj.DF_IMAGE_API_KEY = image_api_key or settings_obj.DF_IMAGE_API_KEY + settings_obj.PAPER2DRAWIO_OCR_API_URL = ocr_api_url or settings_obj.PAPER2DRAWIO_OCR_API_URL + settings_obj.PAPER2DRAWIO_OCR_API_KEY = ocr_api_key or settings_obj.PAPER2DRAWIO_OCR_API_KEY + + for scope in ( + "PAPER2ANY", + "PAPER2PPT", + "PPT2POLISH", + "PDF2PPT", + "IMAGE2PPT", + "PAPER2DRAWIO", + "PAPER2POSTER", + "PAPER2VIDEO", + "KB", + "KB_DEEPRESEARCH", + "PAPER2REBUTTAL", + ): + setattr(settings_obj, f"{scope}_MANAGED_API_URL", text_api_url) + setattr(settings_obj, f"{scope}_MANAGED_API_KEY", text_api_key) + setattr(settings_obj, f"{scope}_MANAGED_IMAGE_API_URL", image_api_url) + setattr(settings_obj, f"{scope}_MANAGED_IMAGE_API_KEY", image_api_key) + + settings_obj.PAPER2DRAWIO_SEGMENT_HINT_API_URL = text_api_url + settings_obj.PAPER2DRAWIO_SEGMENT_HINT_API_KEY = text_api_key + settings_obj.PAPER2PPT_SEGMENT_HINT_API_URL = text_api_url + settings_obj.PAPER2PPT_SEGMENT_HINT_API_KEY = text_api_key + + settings_obj.PAPER2PPT_DEFAULT_MODEL = text_model + settings_obj.PAPER2PPT_DEFAULT_IMAGE_MODEL = image_model + settings_obj.PDF2PPT_DEFAULT_MODEL = text_model + settings_obj.PDF2PPT_DEFAULT_IMAGE_MODEL = image_model + settings_obj.IMAGE2PPT_DEFAULT_MODEL = text_model + settings_obj.IMAGE2PPT_DEFAULT_IMAGE_MODEL = image_model + settings_obj.PAPER2FIGURE_DEFAULT_MODEL = text_model + settings_obj.PAPER2FIGURE_DEFAULT_IMAGE_MODEL = image_model + settings_obj.PAPER2VIDEO_DEFAULT_MODEL = text_model + settings_obj.PAPER2VIDEO_TTS_MODEL = settings_obj.PAPER2VIDEO_TTS_MODEL or "cosyvoice-v3-flash" + settings_obj.PAPER2VIDEO_TALKING_MODEL = settings_obj.PAPER2VIDEO_TALKING_MODEL or "liveportrait" + settings_obj.PAPER2DRAWIO_DEFAULT_MODEL = text_model + settings_obj.PAPER2DRAWIO_VLM_MODEL = vlm_model + settings_obj.IMAGE2DRAWIO_DEFAULT_MODEL = text_model + settings_obj.IMAGE2DRAWIO_DEFAULT_IMAGE_MODEL = image_model + settings_obj.IMAGE2DRAWIO_VLM_MODEL = vlm_model + settings_obj.KB_CHAT_MODEL = text_model + settings_obj.KB_EMBEDDING_MODEL = embedding_model + settings_obj.MINDMAP_DEFAULT_MODEL = text_model + settings_obj.PAPER2POSTER_DEFAULT_MODEL = text_model + settings_obj.PAPER2POSTER_VISION_MODEL = text_model + settings_obj.PAPER2REBUTTAL_DEFAULT_MODEL = text_model + + settings_obj.PAPER2PPT_OUTLINE_MODEL = text_model + settings_obj.PAPER2PPT_CONTENT_MODEL = text_model + settings_obj.PAPER2PPT_IMAGE_GEN_MODEL = image_model + settings_obj.PAPER2PPT_VLM_MODEL = vlm_model + settings_obj.PAPER2PPT_CHART_MODEL = text_model + settings_obj.PAPER2PPT_DESC_MODEL = text_model + settings_obj.PAPER2PPT_TECHNICAL_MODEL = text_model + + settings_obj.PAPER2FIGURE_TEXT_MODEL = text_model + settings_obj.PAPER2FIGURE_IMAGE_MODEL = image_model + settings_obj.PAPER2FIGURE_VLM_MODEL = vlm_model + settings_obj.PAPER2FIGURE_CHART_MODEL = text_model + settings_obj.PAPER2FIGURE_DESC_MODEL = text_model + settings_obj.PAPER2FIGURE_REF_IMG_DESC_MODEL = text_model + settings_obj.PAPER2FIGURE_TECHNICAL_MODEL = text_model + + settings_obj.PAPER2CITATION_WEBSEARCH_MODEL = settings_obj.PAPER2CITATION_WEBSEARCH_MODEL or text_model + return settings_obj + + # Global configuration instance -settings = AppSettings() +settings = _apply_simple_mode(AppSettings()) diff --git a/fastapi_app/main.py b/fastapi_app/main.py index d48a4229..4fa17f6d 100644 --- a/fastapi_app/main.py +++ b/fastapi_app/main.py @@ -34,8 +34,8 @@ def _configure_runtime_tempdir() -> None: from fastapi_app.config import settings from fastapi_app.routers import account from fastapi_app.routers import paper2video -from fastapi_app.routers import paper2any, paper2citation, paper2ppt, paper2poster -from fastapi_app.routers import pdf2ppt, image2ppt, kb, kb_embedding, files +from fastapi_app.routers import paper2any, paper2citation, paper2figure, paper2ppt, paper2poster +from fastapi_app.routers import pdf2ppt, image2ppt, kb, kb_workflows, kb_embedding, files from fastapi_app.routers import image2drawio from fastapi_app.routers import mindmap from fastapi_app.routers import paper2drawio @@ -86,6 +86,7 @@ def create_app() -> FastAPI: # 路由挂载 # Paper2Graph / System app.include_router(paper2any.router, prefix="/api/v1", tags=["paper2any"]) + app.include_router(paper2figure.router, prefix="/api/v1", tags=["paper2figure"]) app.include_router(account.router, prefix="/api/v1", tags=["account"]) # Paper2PPT app.include_router(paper2ppt.router, prefix="/api/v1", tags=["paper2ppt"]) @@ -105,6 +106,7 @@ def create_app() -> FastAPI: app.include_router(mindmap.router, prefix="/api/v1", tags=["mindmap"]) # 知识库接口 app.include_router(kb.router, prefix="/api/v1", tags=["Knowledge Base"]) + app.include_router(kb_workflows.router, prefix="/api/v1", tags=["Knowledge Base Workflows"]) app.include_router(kb_embedding.router, prefix="/api/v1", tags=["Knowledge Base Embedding"]) # 文件管理接口 app.include_router(files.router, prefix="/api/v1", tags=["Files"]) diff --git a/fastapi_app/routers/__init__.py b/fastapi_app/routers/__init__.py index 3068c824..ef3eb627 100644 --- a/fastapi_app/routers/__init__.py +++ b/fastapi_app/routers/__init__.py @@ -10,10 +10,12 @@ paper2citation, paper2video, paper2any, + paper2figure, paper2ppt, pdf2ppt, image2ppt, kb, + kb_workflows, kb_embedding, files, image2drawio, @@ -27,10 +29,12 @@ "paper2citation", "paper2video", "paper2any", + "paper2figure", "paper2ppt", "pdf2ppt", "image2ppt", "kb", + "kb_workflows", "kb_embedding", "files", "image2drawio", diff --git a/fastapi_app/routers/image2drawio.py b/fastapi_app/routers/image2drawio.py index dd63d7b6..f2ce8b9f 100644 --- a/fastapi_app/routers/image2drawio.py +++ b/fastapi_app/routers/image2drawio.py @@ -6,6 +6,7 @@ from pydantic import BaseModel from fastapi_app.config.settings import settings +from fastapi_app.services.managed_api_service import resolve_model_name router = APIRouter(prefix="/image2drawio", tags=["image2drawio"]) @@ -37,9 +38,21 @@ async def generate_image2drawio( chat_api_url=chat_api_url, api_key=api_key, email=email, - model=model, - gen_fig_model=gen_fig_model, - vlm_model=vlm_model, + model=resolve_model_name( + model, + managed_default=settings.IMAGE2DRAWIO_DEFAULT_MODEL, + fallback_default="gpt-4o", + ), + gen_fig_model=resolve_model_name( + gen_fig_model, + managed_default=settings.IMAGE2DRAWIO_DEFAULT_IMAGE_MODEL, + fallback_default="gemini-3-pro-image-preview", + ), + vlm_model=resolve_model_name( + vlm_model, + managed_default=settings.IMAGE2DRAWIO_VLM_MODEL, + fallback_default="qwen-vl-ocr-2025-11-20", + ), language=language, ) return Image2DrawioResponse(**result) diff --git a/fastapi_app/routers/image2ppt.py b/fastapi_app/routers/image2ppt.py index 1e1bbd4b..1449fff5 100644 --- a/fastapi_app/routers/image2ppt.py +++ b/fastapi_app/routers/image2ppt.py @@ -6,6 +6,8 @@ from fastapi.responses import FileResponse from dataflow_agent.logger import get_logger +from fastapi_app.config import settings +from fastapi_app.services.managed_api_service import resolve_model_name log = get_logger(__name__) @@ -51,8 +53,16 @@ async def generate_image2ppt( api_key=api_key, email=email, use_ai_edit=use_ai_edit, - model=model, - gen_fig_model=gen_fig_model, + model=resolve_model_name( + model, + managed_default=settings.IMAGE2PPT_DEFAULT_MODEL, + fallback_default="gpt-4o", + ), + gen_fig_model=resolve_model_name( + gen_fig_model, + managed_default=settings.IMAGE2PPT_DEFAULT_IMAGE_MODEL, + fallback_default="gemini-2.5-flash-image", + ), language=language, style=style, page_count=page_count, diff --git a/fastapi_app/routers/kb.py b/fastapi_app/routers/kb.py index aefd6ea0..336e3780 100644 --- a/fastapi_app/routers/kb.py +++ b/fastapi_app/routers/kb.py @@ -672,492 +672,10 @@ async def chat_with_kb( traceback.print_exc() raise HTTPException(status_code=500, detail=str(e)) -@router.post("/generate-ppt") -async def generate_ppt_from_kb( - file_path: Optional[str] = Body(None, embed=True), - file_paths: Optional[List[str]] = Body(None, embed=True), - image_paths: Optional[List[str]] = Body(None, embed=True), - image_items: Optional[List[Dict[str, Any]]] = Body(None, embed=True), - query: Optional[str] = Body("", embed=True), - need_embedding: bool = Body(False, embed=True), - search_top_k: int = Body(8, embed=True), - user_id: Optional[str] = Body(None, embed=True), - email: Optional[str] = Body(None, embed=True), - notebook_id: Optional[str] = Body(None, embed=True), - api_url: str = Body(..., embed=True), - api_key: str = Body(..., embed=True), - style: str = Body("modern", embed=True), - language: str = Body("zh", embed=True), - page_count: int = Body(10, embed=True), - model: str = Body("gpt-4o", embed=True), - gen_fig_model: str = Body("gemini-2.5-flash-image", embed=True), - user: AuthUser = Depends(get_current_user), -): - """ - Generate PPT from knowledge base file (non-interactive) - """ - try: - api_url, api_key = resolve_llm_credentials(api_url, api_key, scope="kb") - email, user_id = _resolve_kb_identity(user) - # Normalize and validate input files (PDF/PPT/DOC/IMG) - input_paths = file_paths or ([file_path] if file_path else []) - if not input_paths: - raise HTTPException(status_code=400, detail="No input files provided") - - # Create output directory - project_root = get_project_root() - if notebook_id: - output_dir = _generated_dir(email, notebook_id, "ppt", user_id) - else: - ts = int(time.time()) - output_dir = project_root / "outputs" / "kb_outputs" / email / f"{ts}_ppt" - output_dir.mkdir(parents=True, exist_ok=True) - - # Split docs/images - doc_paths: List[Path] = [] - user_image_items: List[Dict[str, Any]] = [] - for p in input_paths: - local_path = _resolve_user_owned_output_path(p, user) - if not local_path.exists(): - raise HTTPException(status_code=404, detail=f"File not found: {p}") - ext = local_path.suffix.lower() - if ext in IMAGE_EXTENSIONS: - user_image_items.append({"path": str(local_path), "description": ""}) - elif ext in {".pdf", ".pptx", ".ppt", ".docx", ".doc"}: - doc_paths.append(local_path) - else: - raise HTTPException(status_code=400, detail=f"Unsupported file type for PPT: {local_path.name}") - - if not doc_paths: - raise HTTPException(status_code=400, detail="At least one document file is required for PPT generation") - - # Convert docs to PDF for MinerU merge - local_pdf_paths: List[Path] = [] - convert_dir = output_dir / "input" - convert_dir.mkdir(parents=True, exist_ok=True) - for p in doc_paths: - ext = p.suffix.lower() - if ext == ".pdf": - local_pdf_paths.append(p) - elif ext in {".pptx", ".ppt", ".docx", ".doc"}: - local_pdf_paths.append(_convert_to_pdf(p, convert_dir)) - else: - raise HTTPException(status_code=400, detail=f"Unsupported file type for PPT: {p.name}") - - # Merge PDFs if multiple - if len(local_pdf_paths) > 1: - merge_dir = output_dir / "input" - merged_pdf = merge_dir / "merged.pdf" - local_file_path = _merge_pdfs(local_pdf_paths, merged_pdf) - else: - local_file_path = local_pdf_paths[0] - - # Normalize image items (optional) - resolved_image_items: List[Dict[str, Any]] = [] - for item in image_items or []: - raw_path = item.get("path") or item.get("url") or "" - if not raw_path: - continue - img_path = _resolve_user_owned_output_path(str(raw_path), user) - if img_path.exists() and img_path.suffix.lower() in IMAGE_EXTENSIONS: - resolved_image_items.append({ - "path": str(img_path), - "description": item.get("description") or item.get("desc") or "" - }) - - for img in image_paths or []: - img_path = _resolve_user_owned_output_path(img, user) - if img_path.exists() and img_path.suffix.lower() in IMAGE_EXTENSIONS: - resolved_image_items.append({ - "path": str(img_path), - "description": "" - }) - - resolved_image_items.extend(user_image_items) - - # Embedding + retrieval (optional) - retrieval_text = "" - if need_embedding: - if notebook_id: - base_dir = _vector_store_dir(email, notebook_id, user_id) - else: - base_dir = project_root / "outputs" / "kb_data" / email / "vector_store" - embed_api_url = api_url - if "/embeddings" not in embed_api_url: - embed_api_url = embed_api_url.rstrip("/") + "/embeddings" - - files_for_embed = [{"path": str(p), "description": ""} for p in doc_paths] - from dataflow_agent.toolkits.ragtool.vector_store_tool import process_knowledge_base_files - - manifest = await process_knowledge_base_files( - files_for_embed, - base_dir=str(base_dir), - api_url=embed_api_url, - api_key=api_key, - model_name=None, - multimodal_model=None, - ) - - from dataflow_agent.toolkits.ragtool.vector_store_tool import VectorStoreManager - - manager = VectorStoreManager( - base_dir=str(base_dir), - embedding_api_url=embed_api_url, - api_key=api_key, - ) - - def _match_file_ids(m: Dict[str, Any], paths: List[Path]) -> List[str]: - ids: List[str] = [] - target = {str(p.resolve()) for p in paths} - for f in m.get("files", []): - try: - if str(Path(f.get("original_path", "")).resolve()) in target: - if f.get("id"): - ids.append(f["id"]) - except Exception: - continue - return ids - - file_ids = _match_file_ids(manifest or manager.manifest or {}, doc_paths) - if query and file_ids: - results = manager.search(query=query, top_k=search_top_k, file_ids=file_ids) - retrieval_text = "\n\n".join([r.get("content", "") for r in results if r.get("content")]) - - # Prepare request - ppt_req = Paper2PPTRequest( - input_type="PDF", - input_content=str(local_file_path), - email=email, - chat_api_url=api_url, - chat_api_key=api_key, - api_key=api_key, - style=style, - language=language, - page_count=page_count, - model=model, - gen_fig_model=gen_fig_model, - aspect_ratio="16:9", - use_long_paper=False - ) - - # Run KB pagecontent workflow - from fastapi_app.workflow_adapters.wa_paper2ppt import _init_state_from_request - - state_pc = _init_state_from_request(ppt_req, result_path=output_dir) - state_pc.kb_query = query or "" - state_pc.kb_retrieval_text = retrieval_text - state_pc.kb_user_images = resolved_image_items - state_pc = await run_workflow("kb_page_content", state_pc) - pagecontent = getattr(state_pc, "pagecontent", []) or [] - - # Run PPT generation with injected pagecontent - state_pc.pagecontent = pagecontent - state_pp = await run_workflow("paper2ppt_parallel_consistent_style", state_pc) - - # Extract output paths - pdf_path = "" - pptx_path = "" - if hasattr(state_pp, 'ppt_pdf_path'): - pdf_path = state_pp.ppt_pdf_path - if hasattr(state_pp, 'ppt_pptx_path'): - pptx_path = state_pp.ppt_pptx_path - - return { - "success": True, - "result_path": str(output_dir), - "pdf_path": _to_outputs_url(pdf_path) if pdf_path else "", - "pptx_path": _to_outputs_url(pptx_path) if pptx_path else "", - "output_file_id": f"kb_ppt_{ts}" - } - - except Exception as e: - import traceback - traceback.print_exc() - raise HTTPException(status_code=500, detail=str(e)) - -@router.post("/generate-podcast") -async def generate_podcast_from_kb( - file_paths: List[str] = Body(..., embed=True), - user_id: Optional[str] = Body(None, embed=True), - email: Optional[str] = Body(None, embed=True), - notebook_id: Optional[str] = Body(None, embed=True), - api_url: str = Body(..., embed=True), - api_key: str = Body(..., embed=True), - model: str = Body("gpt-4o", embed=True), - tts_model: str = Body("cosyvoice-v3-flash", embed=True), - voice_name: str = Body("", embed=True), - voice_name_b: str = Body("Puck", embed=True), - podcast_mode: str = Body("monologue", embed=True), - podcast_length: str = Body("standard", embed=True), - language: str = Body("zh", embed=True), - user: AuthUser = Depends(get_current_user), -): - """ - Generate podcast from knowledge base files - """ - try: - api_url, api_key = resolve_llm_credentials(api_url, api_key, scope="kb") - email, user_id = _resolve_kb_identity(user) - project_root = get_project_root() - if notebook_id: - output_dir = _generated_dir(email, notebook_id, "podcast", user_id) - else: - ts = int(time.time()) - output_dir = project_root / "outputs" / "kb_outputs" / email / f"{ts}_podcast" - output_dir.mkdir(parents=True, exist_ok=True) - - # Normalize file paths - if not file_paths: - raise HTTPException(status_code=400, detail="No valid files provided") - - local_paths: List[Path] = [] - for f in file_paths: - local_path = _resolve_user_owned_output_path(f, user) - if not local_path.exists(): - raise HTTPException(status_code=404, detail=f"File not found: {f}") - local_paths.append(local_path) - - # If multiple files, merge into a single PDF (doc/ppt will be converted) - if len(local_paths) > 1: - merge_dir = output_dir / "input" - merge_dir.mkdir(parents=True, exist_ok=True) - - pdf_paths: List[Path] = [] - for p in local_paths: - ext = p.suffix.lower() - if ext == ".pdf": - pdf_paths.append(p) - elif ext in {".docx", ".doc", ".pptx", ".ppt"}: - pdf_paths.append(_convert_to_pdf(p, merge_dir)) - else: - raise HTTPException(status_code=400, detail=f"Unsupported file type for podcast: {p.name}") - - merged_pdf = merge_dir / "merged.pdf" - local_file_paths = [str(_merge_pdfs(pdf_paths, merged_pdf))] - else: - local_file_paths = [str(local_paths[0])] - - # Prepare request - podcast_req = KBPodcastRequest( - files=local_file_paths, - chat_api_url=api_url, - api_key=api_key, - model=model, - tts_model=tts_model, - voice_name=voice_name, - voice_name_b=voice_name_b, - podcast_mode=podcast_mode, - podcast_length=podcast_length, - language=language - ) - podcast_req.email = email - - state = KBPodcastState(request=podcast_req, result_path=str(output_dir)) - - # Run workflow via registry (统一使用 run_workflow) - result_state = await run_workflow("kb_podcast", state) - - # Extract results - audio_path = "" - script_path = "" - result_path = "" - - if isinstance(result_state, dict): - audio_path = result_state.get("audio_path", "") - result_path = result_state.get("result_path", "") - else: - audio_path = getattr(result_state, "audio_path", "") - result_path = getattr(result_state, "result_path", "") - - if result_path: - script_path = str(Path(result_path) / "script.txt") - - audio_error = "" - if not audio_path: - audio_error = "No audio path returned from workflow" - elif isinstance(audio_path, str) and audio_path.startswith("["): - audio_error = audio_path - else: - audio_file = Path(audio_path) - if not audio_file.is_absolute(): - audio_file = (get_project_root() / audio_file).resolve() - if not audio_file.exists(): - audio_error = f"Audio file not found: {audio_file}" - - if audio_error: - raise HTTPException(status_code=500, detail=audio_error) - - audio_url = _to_outputs_url(audio_path) if audio_path else "" - script_url = _to_outputs_url(script_path) if script_path else "" - result_url = _to_outputs_url(result_path) if result_path else "" - - return { - "success": True, - "result_path": result_url, - "audio_path": audio_url, - "script_path": script_url, - "output_file_id": f"kb_podcast_{int(time.time())}" - } - - except Exception as e: - import traceback - traceback.print_exc() - raise HTTPException(status_code=500, detail=str(e)) - -@router.post("/generate-mindmap") -async def generate_mindmap_from_kb( - file_paths: List[str] = Body(..., embed=True), - user_id: Optional[str] = Body(None, embed=True), - email: Optional[str] = Body(None, embed=True), - notebook_id: Optional[str] = Body(None, embed=True), - api_url: str = Body(..., embed=True), - api_key: str = Body(..., embed=True), - model: str = Body("gpt-4o", embed=True), - mindmap_style: str = Body("default", embed=True), - max_depth: int = Body(3, embed=True), - language: str = Body("zh", embed=True), - user: AuthUser = Depends(get_current_user), -): - """ - Generate mindmap from knowledge base files - """ - try: - api_url, api_key = resolve_llm_credentials(api_url, api_key, scope="kb") - email, user_id = _resolve_kb_identity(user) - # Normalize file paths - local_file_paths = [] - - for f in file_paths: - local_path = _resolve_user_owned_output_path(f, user) - if not local_path.exists(): - raise HTTPException(status_code=404, detail=f"File not found: {f}") - local_file_paths.append(str(local_path)) - - if not local_file_paths: - raise HTTPException(status_code=400, detail="No valid files provided") - - # Prepare request - mindmap_req = KBMindMapRequest( - files=local_file_paths, - chat_api_url=api_url, - api_key=api_key, - model=model, - mindmap_style=mindmap_style, - max_depth=max_depth, - language=language - ) - mindmap_req.email = email - - if notebook_id: - nb_output_dir = _generated_dir(email, notebook_id, "mindmap", user_id) - state = KBMindMapState(request=mindmap_req, result_path=str(nb_output_dir)) - else: - state = KBMindMapState(request=mindmap_req) - - # Run workflow via registry (统一使用 run_workflow) - result_state = await run_workflow("kb_mindmap", state) - - # Extract results - mermaid_code = "" - result_path = "" - - if isinstance(result_state, dict): - mermaid_code = result_state.get("mermaid_code", "") - result_path = result_state.get("result_path", "") - else: - mermaid_code = getattr(result_state, "mermaid_code", "") - result_path = getattr(result_state, "result_path", "") - - mindmap_path = "" - if result_path: - mmd_path = Path(result_path) / "mindmap.mmd" - if (not mmd_path.exists()) and mermaid_code: - try: - mmd_path.write_text(mermaid_code, encoding="utf-8") - except Exception: - pass - if mmd_path.exists(): - mindmap_path = _to_outputs_url(str(mmd_path)) - - return { - "success": True, - "result_path": _to_outputs_url(result_path) if result_path else "", - "mermaid_code": mermaid_code, - "mindmap_path": mindmap_path, - "output_file_id": f"kb_mindmap_{int(time.time())}" - } - - except Exception as e: - import traceback - traceback.print_exc() - raise HTTPException(status_code=500, detail=str(e)) - -@router.post("/deep-research", response_model=DeepResearchResponse) -async def deep_research_from_kb( - req: DeepResearchRequest, - user: AuthUser = Depends(get_current_user), -): - """ - Deep research workflow入口(router -> service -> wa -> wf) - """ - if req.mode == "web" and not (req.search_api_key or (is_free_billing_mode() and settings.DEFAULT_SEARCH_API_KEY)): - raise HTTPException(status_code=400, detail="Search API key required") - if req.mode == "web" and req.search_provider == "google_cse" and not (req.google_cse_id or (is_free_billing_mode() and settings.DEFAULT_GOOGLE_CSE_ID)): - raise HTTPException(status_code=400, detail="google_cse_id required") - if not req.topic and not req.file_paths: - raise HTTPException(status_code=400, detail="Topic or files required") - req.email = _canonical_user_email(user) - req.user_id = _canonical_user_id(user) - if req.file_paths: - req.file_paths = [str(_resolve_user_owned_output_path(path, user)) for path in req.file_paths] - service = _get_deepresearch_service() - return await service.run(req) - - -@router.post("/generate-report", response_model=KBReportResponse) -async def generate_report_from_kb( - req: KBReportRequest, - user: AuthUser = Depends(get_current_user), -): - """ - Generate a report with insights/analysis from KB documents (workflow). - """ - if not req.file_paths: - raise HTTPException(status_code=400, detail="No valid files provided") - req.email = _canonical_user_email(user) - req.user_id = _canonical_user_id(user) - req.file_paths = [str(_resolve_user_owned_output_path(path, user)) for path in req.file_paths] - service = _get_report_service() - return await service.run(req) - - -@router.post("/save-mindmap") -async def save_mindmap_to_file( - file_url: str = Body(..., embed=True), - content: str = Body(..., embed=True), - user: AuthUser = Depends(get_current_user), -): - """ - Save edited Mermaid mindmap code back to the output file. - """ - try: - if not file_url: - raise HTTPException(status_code=400, detail="File URL is required") - - local_path = _resolve_user_owned_output_path(file_url, user) - - if local_path.suffix.lower() not in {".mmd", ".mermaid", ".md"}: - raise HTTPException(status_code=400, detail="Invalid mindmap file type") - - local_path.parent.mkdir(parents=True, exist_ok=True) - local_path.write_text(content or "", encoding="utf-8") - - return { - "success": True, - "mindmap_path": _to_outputs_url(str(local_path)) - } - except HTTPException: - raise +# +# Knowledge-base workflow endpoints were moved to: +# fastapi_app/routers/kb_workflows.py +# This file now focuses on notebook / file-management / chat responsibilities. except Exception as e: import traceback traceback.print_exc() diff --git a/fastapi_app/routers/kb_workflows.py b/fastapi_app/routers/kb_workflows.py new file mode 100644 index 00000000..af80ad68 --- /dev/null +++ b/fastapi_app/routers/kb_workflows.py @@ -0,0 +1,512 @@ +from __future__ import annotations + +import time +from pathlib import Path +from typing import Any, Dict, List, Optional + +from fastapi import APIRouter, Body, Depends, HTTPException + +from dataflow_agent.state import ( + IntelligentQARequest, + IntelligentQAState, + KBMindMapRequest, + KBMindMapState, + KBPodcastRequest, + KBPodcastState, +) +from dataflow_agent.utils import get_project_root +from dataflow_agent.workflow import run_workflow +from fastapi_app.config import settings +from fastapi_app.dependencies import AuthUser, get_current_user +from fastapi_app.schemas import ( + DeepResearchRequest, + DeepResearchResponse, + KBReportRequest, + KBReportResponse, + Paper2PPTRequest, +) +from fastapi_app.services.managed_api_service import ( + is_free_billing_mode, + resolve_llm_credentials, + resolve_model_name, +) +from fastapi_app.utils import _to_outputs_url +from fastapi_app.routers.kb import ( + IMAGE_EXTENSIONS, + _canonical_user_email, + _canonical_user_id, + _convert_to_pdf, + _generated_dir, + _get_deepresearch_service, + _get_report_service, + _merge_pdfs, + _resolve_kb_identity, + _resolve_user_owned_output_path, + _vector_store_dir, +) + + +router = APIRouter(prefix="/kb", tags=["Knowledge Base Workflows"]) + + +@router.post("/generate-ppt") +async def generate_ppt_from_kb( + file_path: Optional[str] = Body(None, embed=True), + file_paths: Optional[List[str]] = Body(None, embed=True), + image_paths: Optional[List[str]] = Body(None, embed=True), + image_items: Optional[List[Dict[str, Any]]] = Body(None, embed=True), + query: Optional[str] = Body("", embed=True), + need_embedding: bool = Body(False, embed=True), + search_top_k: int = Body(8, embed=True), + user_id: Optional[str] = Body(None, embed=True), + email: Optional[str] = Body(None, embed=True), + notebook_id: Optional[str] = Body(None, embed=True), + api_url: Optional[str] = Body(None, embed=True), + api_key: Optional[str] = Body(None, embed=True), + style: str = Body("modern", embed=True), + language: str = Body("zh", embed=True), + page_count: int = Body(10, embed=True), + model: Optional[str] = Body(None, embed=True), + gen_fig_model: Optional[str] = Body(None, embed=True), + user: AuthUser = Depends(get_current_user), +): + """ + Generate PPT from KB documents. + """ + try: + api_url, api_key = resolve_llm_credentials(api_url, api_key, scope="kb") + resolved_model = resolve_model_name( + model, + managed_default=settings.KB_CHAT_MODEL, + fallback_default=settings.KB_CHAT_MODEL, + ) + resolved_image_model = resolve_model_name( + gen_fig_model, + managed_default=settings.PAPER2PPT_DEFAULT_IMAGE_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_IMAGE_MODEL, + ) + email, user_id = _resolve_kb_identity(user) + input_paths = file_paths or ([file_path] if file_path else []) + if not input_paths: + raise HTTPException(status_code=400, detail="No input files provided") + + project_root = get_project_root() + ts = int(time.time()) + if notebook_id: + output_dir = _generated_dir(email, notebook_id, "ppt", user_id) + else: + output_dir = project_root / "outputs" / "kb_outputs" / email / f"{ts}_ppt" + output_dir.mkdir(parents=True, exist_ok=True) + + doc_paths: List[Path] = [] + user_image_items: List[Dict[str, Any]] = [] + for p in input_paths: + local_path = _resolve_user_owned_output_path(p, user) + if not local_path.exists(): + raise HTTPException(status_code=404, detail=f"File not found: {p}") + ext = local_path.suffix.lower() + if ext in IMAGE_EXTENSIONS: + user_image_items.append({"path": str(local_path), "description": ""}) + elif ext in {".pdf", ".pptx", ".ppt", ".docx", ".doc"}: + doc_paths.append(local_path) + else: + raise HTTPException(status_code=400, detail=f"Unsupported file type for PPT: {local_path.name}") + + if not doc_paths: + raise HTTPException(status_code=400, detail="At least one document file is required for PPT generation") + + local_pdf_paths: List[Path] = [] + convert_dir = output_dir / "input" + convert_dir.mkdir(parents=True, exist_ok=True) + for p in doc_paths: + ext = p.suffix.lower() + if ext == ".pdf": + local_pdf_paths.append(p) + elif ext in {".pptx", ".ppt", ".docx", ".doc"}: + local_pdf_paths.append(_convert_to_pdf(p, convert_dir)) + else: + raise HTTPException(status_code=400, detail=f"Unsupported file type for PPT: {p.name}") + + if len(local_pdf_paths) > 1: + merged_pdf = convert_dir / "merged.pdf" + local_file_path = _merge_pdfs(local_pdf_paths, merged_pdf) + else: + local_file_path = local_pdf_paths[0] + + resolved_image_items: List[Dict[str, Any]] = [] + for item in image_items or []: + raw_path = item.get("path") or item.get("url") or "" + if not raw_path: + continue + img_path = _resolve_user_owned_output_path(str(raw_path), user) + if img_path.exists() and img_path.suffix.lower() in IMAGE_EXTENSIONS: + resolved_image_items.append( + { + "path": str(img_path), + "description": item.get("description") or item.get("desc") or "", + } + ) + + for img in image_paths or []: + img_path = _resolve_user_owned_output_path(img, user) + if img_path.exists() and img_path.suffix.lower() in IMAGE_EXTENSIONS: + resolved_image_items.append({"path": str(img_path), "description": ""}) + + resolved_image_items.extend(user_image_items) + + retrieval_text = "" + if need_embedding: + if notebook_id: + base_dir = _vector_store_dir(email, notebook_id, user_id) + else: + base_dir = project_root / "outputs" / "kb_data" / email / "vector_store" + embed_api_url = api_url + if "/embeddings" not in embed_api_url: + embed_api_url = embed_api_url.rstrip("/") + "/embeddings" + + files_for_embed = [{"path": str(p), "description": ""} for p in doc_paths] + from dataflow_agent.toolkits.ragtool.vector_store_tool import ( + VectorStoreManager, + process_knowledge_base_files, + ) + + manifest = await process_knowledge_base_files( + files_for_embed, + base_dir=str(base_dir), + api_url=embed_api_url, + api_key=api_key, + model_name=None, + multimodal_model=None, + ) + + manager = VectorStoreManager( + base_dir=str(base_dir), + embedding_api_url=embed_api_url, + api_key=api_key, + ) + + def _match_file_ids(m: Dict[str, Any], paths: List[Path]) -> List[str]: + ids: List[str] = [] + target = {str(p.resolve()) for p in paths} + for f in m.get("files", []): + try: + if str(Path(f.get("original_path", "")).resolve()) in target and f.get("id"): + ids.append(f["id"]) + except Exception: + continue + return ids + + file_ids = _match_file_ids(manifest or manager.manifest or {}, doc_paths) + if query and file_ids: + results = manager.search(query=query, top_k=search_top_k, file_ids=file_ids) + retrieval_text = "\n\n".join([r.get("content", "") for r in results if r.get("content")]) + + ppt_req = Paper2PPTRequest( + input_type="PDF", + input_content=str(local_file_path), + email=email, + chat_api_url=api_url, + chat_api_key=api_key, + api_key=api_key, + style=style, + language=language, + page_count=page_count, + model=resolved_model, + gen_fig_model=resolved_image_model, + aspect_ratio="16:9", + use_long_paper=False, + ) + + from fastapi_app.workflow_adapters.wa_paper2ppt import _init_state_from_request + + state_pc = _init_state_from_request(ppt_req, result_path=output_dir) + state_pc.kb_query = query or "" + state_pc.kb_retrieval_text = retrieval_text + state_pc.kb_user_images = resolved_image_items + state_pc = await run_workflow("kb_page_content", state_pc) + pagecontent = getattr(state_pc, "pagecontent", []) or [] + + state_pc.pagecontent = pagecontent + state_pp = await run_workflow("paper2ppt_parallel_consistent_style", state_pc) + + pdf_path = getattr(state_pp, "ppt_pdf_path", "") + pptx_path = getattr(state_pp, "ppt_pptx_path", "") + + return { + "success": True, + "result_path": str(output_dir), + "pdf_path": _to_outputs_url(pdf_path) if pdf_path else "", + "pptx_path": _to_outputs_url(pptx_path) if pptx_path else "", + "output_file_id": f"kb_ppt_{ts}", + } + except Exception as e: + import traceback + + traceback.print_exc() + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/generate-podcast") +async def generate_podcast_from_kb( + file_paths: List[str] = Body(..., embed=True), + user_id: Optional[str] = Body(None, embed=True), + email: Optional[str] = Body(None, embed=True), + notebook_id: Optional[str] = Body(None, embed=True), + api_url: Optional[str] = Body(None, embed=True), + api_key: Optional[str] = Body(None, embed=True), + model: Optional[str] = Body(None, embed=True), + tts_model: Optional[str] = Body(None, embed=True), + voice_name: str = Body("", embed=True), + voice_name_b: str = Body("Puck", embed=True), + podcast_mode: str = Body("monologue", embed=True), + podcast_length: str = Body("standard", embed=True), + language: str = Body("zh", embed=True), + user: AuthUser = Depends(get_current_user), +): + try: + api_url, api_key = resolve_llm_credentials(api_url, api_key, scope="kb") + resolved_model = resolve_model_name( + model, + managed_default=settings.KB_CHAT_MODEL, + fallback_default=settings.KB_CHAT_MODEL, + ) + resolved_tts_model = resolve_model_name( + tts_model, + managed_default=settings.PAPER2VIDEO_TTS_MODEL, + fallback_default=settings.PAPER2VIDEO_TTS_MODEL, + ) + email, user_id = _resolve_kb_identity(user) + project_root = get_project_root() + if notebook_id: + output_dir = _generated_dir(email, notebook_id, "podcast", user_id) + else: + ts = int(time.time()) + output_dir = project_root / "outputs" / "kb_outputs" / email / f"{ts}_podcast" + output_dir.mkdir(parents=True, exist_ok=True) + + if not file_paths: + raise HTTPException(status_code=400, detail="No valid files provided") + + local_paths: List[Path] = [] + for f in file_paths: + local_path = _resolve_user_owned_output_path(f, user) + if not local_path.exists(): + raise HTTPException(status_code=404, detail=f"File not found: {f}") + local_paths.append(local_path) + + if len(local_paths) > 1: + merge_dir = output_dir / "input" + merge_dir.mkdir(parents=True, exist_ok=True) + + pdf_paths: List[Path] = [] + for p in local_paths: + ext = p.suffix.lower() + if ext == ".pdf": + pdf_paths.append(p) + elif ext in {".docx", ".doc", ".pptx", ".ppt"}: + pdf_paths.append(_convert_to_pdf(p, merge_dir)) + else: + raise HTTPException(status_code=400, detail=f"Unsupported file type for podcast: {p.name}") + + merged_pdf = merge_dir / "merged.pdf" + local_file_paths = [str(_merge_pdfs(pdf_paths, merged_pdf))] + else: + local_file_paths = [str(local_paths[0])] + + podcast_req = KBPodcastRequest( + files=local_file_paths, + chat_api_url=api_url, + api_key=api_key, + model=resolved_model, + tts_model=resolved_tts_model, + voice_name=voice_name, + voice_name_b=voice_name_b, + podcast_mode=podcast_mode, + podcast_length=podcast_length, + language=language, + ) + podcast_req.email = email + + state = KBPodcastState(request=podcast_req, result_path=str(output_dir)) + result_state = await run_workflow("kb_podcast", state) + + audio_path = "" + result_path = "" + if isinstance(result_state, dict): + audio_path = result_state.get("audio_path", "") + result_path = result_state.get("result_path", "") + else: + audio_path = getattr(result_state, "audio_path", "") + result_path = getattr(result_state, "result_path", "") + + script_path = str(Path(result_path) / "script.txt") if result_path else "" + audio_error = "" + if not audio_path: + audio_error = "No audio path returned from workflow" + elif isinstance(audio_path, str) and audio_path.startswith("["): + audio_error = audio_path + else: + audio_file = Path(audio_path) + if not audio_file.is_absolute(): + audio_file = (get_project_root() / audio_file).resolve() + if not audio_file.exists(): + audio_error = f"Audio file not found: {audio_file}" + if audio_error: + raise HTTPException(status_code=500, detail=audio_error) + + return { + "success": True, + "result_path": _to_outputs_url(result_path) if result_path else "", + "audio_path": _to_outputs_url(audio_path) if audio_path else "", + "script_path": _to_outputs_url(script_path) if script_path else "", + "output_file_id": f"kb_podcast_{int(time.time())}", + } + except Exception as e: + import traceback + + traceback.print_exc() + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/generate-mindmap") +async def generate_mindmap_from_kb( + file_paths: List[str] = Body(..., embed=True), + user_id: Optional[str] = Body(None, embed=True), + email: Optional[str] = Body(None, embed=True), + notebook_id: Optional[str] = Body(None, embed=True), + api_url: Optional[str] = Body(None, embed=True), + api_key: Optional[str] = Body(None, embed=True), + model: Optional[str] = Body(None, embed=True), + mindmap_style: str = Body("default", embed=True), + max_depth: int = Body(3, embed=True), + language: str = Body("zh", embed=True), + user: AuthUser = Depends(get_current_user), +): + try: + api_url, api_key = resolve_llm_credentials(api_url, api_key, scope="kb") + resolved_model = resolve_model_name( + model, + managed_default=settings.MINDMAP_DEFAULT_MODEL, + fallback_default=settings.MINDMAP_DEFAULT_MODEL, + ) + email, user_id = _resolve_kb_identity(user) + + local_file_paths = [] + for f in file_paths: + local_path = _resolve_user_owned_output_path(f, user) + if not local_path.exists(): + raise HTTPException(status_code=404, detail=f"File not found: {f}") + local_file_paths.append(str(local_path)) + if not local_file_paths: + raise HTTPException(status_code=400, detail="No valid files provided") + + mindmap_req = KBMindMapRequest( + files=local_file_paths, + chat_api_url=api_url, + api_key=api_key, + model=resolved_model, + mindmap_style=mindmap_style, + max_depth=max_depth, + language=language, + ) + mindmap_req.email = email + + if notebook_id: + nb_output_dir = _generated_dir(email, notebook_id, "mindmap", user_id) + state = KBMindMapState(request=mindmap_req, result_path=str(nb_output_dir)) + else: + state = KBMindMapState(request=mindmap_req) + + result_state = await run_workflow("kb_mindmap", state) + mermaid_code = "" + result_path = "" + if isinstance(result_state, dict): + mermaid_code = result_state.get("mermaid_code", "") + result_path = result_state.get("result_path", "") + else: + mermaid_code = getattr(result_state, "mermaid_code", "") + result_path = getattr(result_state, "result_path", "") + + mindmap_path = "" + if result_path: + mmd_path = Path(result_path) / "mindmap.mmd" + if (not mmd_path.exists()) and mermaid_code: + try: + mmd_path.write_text(mermaid_code, encoding="utf-8") + except Exception: + pass + if mmd_path.exists(): + mindmap_path = _to_outputs_url(str(mmd_path)) + + return { + "success": True, + "result_path": _to_outputs_url(result_path) if result_path else "", + "mermaid_code": mermaid_code, + "mindmap_path": mindmap_path, + "output_file_id": f"kb_mindmap_{int(time.time())}", + } + except Exception as e: + import traceback + + traceback.print_exc() + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/deep-research", response_model=DeepResearchResponse) +async def deep_research_from_kb( + req: DeepResearchRequest, + user: AuthUser = Depends(get_current_user), +): + if req.mode == "web" and not (req.search_api_key or (is_free_billing_mode() and settings.DEFAULT_SEARCH_API_KEY)): + raise HTTPException(status_code=400, detail="Search API key required") + if req.mode == "web" and req.search_provider == "google_cse" and not ( + req.google_cse_id or (is_free_billing_mode() and settings.DEFAULT_GOOGLE_CSE_ID) + ): + raise HTTPException(status_code=400, detail="google_cse_id required") + if not req.topic and not req.file_paths: + raise HTTPException(status_code=400, detail="Topic or files required") + req.email = _canonical_user_email(user) + req.user_id = _canonical_user_id(user) + if req.file_paths: + req.file_paths = [str(_resolve_user_owned_output_path(path, user)) for path in req.file_paths] + service = _get_deepresearch_service() + return await service.run(req) + + +@router.post("/generate-report", response_model=KBReportResponse) +async def generate_report_from_kb( + req: KBReportRequest, + user: AuthUser = Depends(get_current_user), +): + if not req.file_paths: + raise HTTPException(status_code=400, detail="No valid files provided") + req.email = _canonical_user_email(user) + req.user_id = _canonical_user_id(user) + req.file_paths = [str(_resolve_user_owned_output_path(path, user)) for path in req.file_paths] + service = _get_report_service() + return await service.run(req) + + +@router.post("/save-mindmap") +async def save_mindmap_to_file( + file_url: str = Body(..., embed=True), + content: str = Body(..., embed=True), + user: AuthUser = Depends(get_current_user), +): + try: + if not file_url: + raise HTTPException(status_code=400, detail="File URL is required") + + local_path = _resolve_user_owned_output_path(file_url, user) + if local_path.suffix.lower() not in {".mmd", ".mermaid", ".md"}: + raise HTTPException(status_code=400, detail="Invalid mindmap file type") + + local_path.parent.mkdir(parents=True, exist_ok=True) + local_path.write_text(content or "", encoding="utf-8") + + return { + "success": True, + "mindmap_path": _to_outputs_url(str(local_path)), + } + except HTTPException: + raise diff --git a/fastapi_app/routers/mindmap.py b/fastapi_app/routers/mindmap.py index 45c20a9b..bf2547d7 100644 --- a/fastapi_app/routers/mindmap.py +++ b/fastapi_app/routers/mindmap.py @@ -15,10 +15,11 @@ from dataflow_agent.agentroles import create_agent from dataflow_agent.logger import get_logger from dataflow_agent.state import MainRequest, MainState +from fastapi_app.config import settings from fastapi_app.dependencies import AuthUser, get_optional_user, is_auth_configured from fastapi_app.config.pricing import estimate_mindmap_points from fastapi_app.services.billing_service import BillingService -from fastapi_app.services.managed_api_service import resolve_llm_credentials +from fastapi_app.services.managed_api_service import resolve_llm_credentials, resolve_model_name from fastapi_app.utils import _to_outputs_url, get_outputs_root, resolve_outputs_path router = APIRouter(prefix="/mindmap", tags=["mindmap"]) @@ -326,6 +327,11 @@ async def generate_mindmap( raise HTTPException(status_code=400, detail="Please provide at least one file or some text content") resolved_api_url, resolved_api_key = resolve_llm_credentials(chat_api_url, api_key, scope="kb") + resolved_model = resolve_model_name( + model, + managed_default=settings.MINDMAP_DEFAULT_MODEL, + fallback_default="gpt-4o", + ) owner = _owner_slug(user) run_id = f"{int(time.time())}_{uuid.uuid4().hex[:8]}" run_dir = (_mindmap_root_for_user(user) / run_id).resolve() @@ -353,7 +359,7 @@ async def generate_mindmap( text_blocks=parsed_files, chat_api_url=resolved_api_url, api_key=resolved_api_key, - model=(model or "gpt-5.4").strip() or "gpt-5.4", + model=resolved_model, max_depth=max(2, min(int(max_depth or 3), 6)), language=(language or "zh").strip() or "zh", style=(mindmap_style or "default").strip() or "default", @@ -371,7 +377,7 @@ async def generate_mindmap( "owner": owner, "style": (mindmap_style or "default").strip() or "default", "language": (language or "zh").strip() or "zh", - "model": (model or "gpt-5.4").strip() or "gpt-5.4", + "model": resolved_model, "source_count": len(local_paths), "estimated_points": charge_info["points"], "billing": charge_info, diff --git a/fastapi_app/routers/paper2any.py b/fastapi_app/routers/paper2any.py index b12b9b4a..c70d4b6a 100644 --- a/fastapi_app/routers/paper2any.py +++ b/fastapi_app/routers/paper2any.py @@ -1,9 +1,7 @@ from __future__ import annotations -from typing import Optional -from fastapi import APIRouter, Depends, File, Form, UploadFile, Request, Body -from fastapi.responses import FileResponse -from fastapi_app.schemas import Paper2FigureResponse, VerifyLlmRequest, VerifyLlmResponse +from fastapi import APIRouter, Body, Depends +from fastapi_app.schemas import VerifyLlmRequest, VerifyLlmResponse from dataflow_agent.logger import get_logger log = get_logger(__name__) @@ -26,109 +24,3 @@ async def verify_llm_connection( Verify LLM connection by sending a simple 'Hi' message from the backend. """ return await service.verify_llm_connection(req) - - -@router.get("/paper2figure/history") -async def list_paper2figure_history_files( - request: Request, - email: str, - service: Paper2AnyService = Depends(get_service), -): - """ - 根据邮箱,列出该用户目录中的所有历史输出文件(pptx/png/svg) - """ - return await service.list_history_files(email, request) - - -@router.post("/paper2figure/generate") -async def generate_paper2figure( - img_gen_model_name: str = Form(...), - chat_api_url: Optional[str] = Form(None), - api_key: Optional[str] = Form(None), - input_type: str = Form(...), - email: Optional[str] = Form(None), - file: Optional[UploadFile] = File(None), - file_kind: Optional[str] = Form(None), - text: Optional[str] = Form(None), - graph_type: str = Form("model_arch"), # 'model_arch' | 'tech_route' | 'exp_data' - language: str = Form("zh"), - figure_complex: str = Form("easy"), - style: str = Form("cartoon"), - service: Paper2AnyService = Depends(get_service), -): - """ - Paper2Graph 接口(带邀请码校验 + workflow 调用) - """ - ppt_path = await service.generate_paper2figure( - img_gen_model_name=img_gen_model_name, - chat_api_url=chat_api_url, - api_key=api_key, - input_type=input_type, - email=email, - file=file, - file_kind=file_kind, - text=text, - graph_type=graph_type, - language=language, - figure_complex=figure_complex, - style=style, - ) - - return FileResponse( - path=str(ppt_path), - media_type="application/vnd.openxmlformats-officedocument.presentationml.presentation", - filename=ppt_path.name, - ) - - -@router.post("/paper2figure/generate-json", response_model=Paper2FigureResponse) -async def generate_paper2figure_json( - request: Request, - img_gen_model_name: str = Form(...), - chat_api_url: Optional[str] = Form(None), - api_key: Optional[str] = Form(None), - input_type: str = Form(...), # 'file' | 'text' | 'image' - email: Optional[str] = Form(None), - file: Optional[UploadFile] = File(None), - file_kind: Optional[str] = Form(None), # 'pdf' | 'image' - text: Optional[str] = Form(None), - graph_type: str = Form("model_arch"), # 'model_arch' | 'tech_route' | 'exp_data' - language: str = Form("zh"), - style: str = Form("cartoon"), - figure_complex: str = Form("easy"), - resolution: str = Form("2K"), - edit_prompt: Optional[str] = Form(None), - tech_route_palette: str = Form(""), - tech_route_template: str = Form(""), - reference_image: Optional[UploadFile] = File(None), - tech_route_edit_prompt: Optional[str] = Form(None), - output_format: Optional[str] = Form(None), - service: Paper2AnyService = Depends(get_service), -): - """ - Paper2Graph JSON 接口 - """ - resp_data = await service.generate_paper2figure_json( - request=request, - img_gen_model_name=img_gen_model_name, - chat_api_url=chat_api_url, - api_key=api_key, - input_type=input_type, - email=email, - file=file, - file_kind=file_kind, - text=text, - graph_type=graph_type, - language=language, - style=style, - figure_complex=figure_complex, - resolution=resolution, - edit_prompt=edit_prompt, - tech_route_palette=tech_route_palette, - tech_route_template=tech_route_template, - reference_image=reference_image, - tech_route_edit_prompt=tech_route_edit_prompt, - output_format=output_format, - ) - - return Paper2FigureResponse(**resp_data) diff --git a/fastapi_app/routers/paper2drawio.py b/fastapi_app/routers/paper2drawio.py index 9aae103c..a54e2790 100644 --- a/fastapi_app/routers/paper2drawio.py +++ b/fastapi_app/routers/paper2drawio.py @@ -10,6 +10,7 @@ from dataflow_agent.logger import get_logger from fastapi_app.config.settings import settings +from fastapi_app.services.managed_api_service import resolve_model_name log = get_logger(__name__) router = APIRouter(prefix="/paper2drawio", tags=["paper2drawio"]) @@ -24,7 +25,7 @@ class ChatRequest(BaseModel): chat_history: List[Dict[str, str]] = [] chat_api_url: str = "" api_key: str = "" - model: str = "gpt-4o" + model: str = settings.PAPER2DRAWIO_DEFAULT_MODEL class ExportRequest(BaseModel): @@ -80,13 +81,19 @@ async def generate_diagram( request=request, chat_api_url=chat_api_url, api_key=api_key, - model=model or settings.PAPER2DRAWIO_DEFAULT_MODEL, + model=resolve_model_name( + model, + managed_default=settings.PAPER2DRAWIO_DEFAULT_MODEL, + ), enable_vlm_validation=( enable_vlm_validation if enable_vlm_validation is not None else settings.PAPER2DRAWIO_ENABLE_VLM_VALIDATION ), - vlm_model=vlm_model or settings.PAPER2DRAWIO_VLM_MODEL, + vlm_model=resolve_model_name( + vlm_model, + managed_default=settings.PAPER2DRAWIO_VLM_MODEL, + ), vlm_validation_max_retries=vlm_validation_max_retries, input_type=input_type, diagram_type=diagram_type, @@ -116,7 +123,10 @@ async def chat_edit_diagram( chat_history=body.chat_history, chat_api_url=body.chat_api_url, api_key=body.api_key, - model=body.model, + model=resolve_model_name( + body.model, + managed_default=settings.PAPER2DRAWIO_DEFAULT_MODEL, + ), ) diff --git a/fastapi_app/routers/paper2figure.py b/fastapi_app/routers/paper2figure.py new file mode 100644 index 00000000..048455a0 --- /dev/null +++ b/fastapi_app/routers/paper2figure.py @@ -0,0 +1,132 @@ +from __future__ import annotations + +from typing import Optional + +from fastapi import APIRouter, Body, Depends, File, Form, Request, UploadFile +from fastapi.responses import FileResponse + +from fastapi_app.config import settings +from fastapi_app.schemas import Paper2FigureResponse +from fastapi_app.services.managed_api_service import resolve_model_name + + +router = APIRouter(prefix="/paper2figure", tags=["paper2figure"]) + + +def get_service() -> "Paper2AnyService": + from fastapi_app.services.paper2any_service import Paper2AnyService + + return Paper2AnyService() + + +@router.get("/history") +async def list_paper2figure_history_files( + request: Request, + email: str, + service: "Paper2AnyService" = Depends(get_service), +): + """ + 根据邮箱,列出该用户目录中的所有历史输出文件(pptx/png/svg)。 + """ + return await service.list_history_files(email, request) + + +@router.post("/generate") +async def generate_paper2figure( + img_gen_model_name: Optional[str] = Form(None), + chat_api_url: Optional[str] = Form(None), + api_key: Optional[str] = Form(None), + input_type: str = Form(...), + email: Optional[str] = Form(None), + file: Optional[UploadFile] = File(None), + file_kind: Optional[str] = Form(None), + text: Optional[str] = Form(None), + graph_type: str = Form("model_arch"), + language: str = Form("zh"), + figure_complex: str = Form("easy"), + style: str = Form("cartoon"), + service: "Paper2AnyService" = Depends(get_service), +): + """ + Paper2Figure 文件下载接口。 + """ + ppt_path = await service.generate_paper2figure( + img_gen_model_name=resolve_model_name( + img_gen_model_name, + managed_default=settings.PAPER2FIGURE_IMAGE_MODEL, + fallback_default=settings.PAPER2FIGURE_DEFAULT_IMAGE_MODEL, + ), + chat_api_url=chat_api_url, + api_key=api_key, + input_type=input_type, + email=email, + file=file, + file_kind=file_kind, + text=text, + graph_type=graph_type, + language=language, + figure_complex=figure_complex, + style=style, + ) + + return FileResponse( + path=str(ppt_path), + media_type="application/vnd.openxmlformats-officedocument.presentationml.presentation", + filename=ppt_path.name, + ) + + +@router.post("/generate-json", response_model=Paper2FigureResponse) +async def generate_paper2figure_json( + request: Request, + img_gen_model_name: Optional[str] = Form(None), + chat_api_url: Optional[str] = Form(None), + api_key: Optional[str] = Form(None), + input_type: str = Form(...), + email: Optional[str] = Form(None), + file: Optional[UploadFile] = File(None), + file_kind: Optional[str] = Form(None), + text: Optional[str] = Form(None), + graph_type: str = Form("model_arch"), + language: str = Form("zh"), + style: str = Form("cartoon"), + figure_complex: str = Form("easy"), + resolution: str = Form("2K"), + edit_prompt: Optional[str] = Form(None), + tech_route_palette: str = Form(""), + tech_route_template: str = Form(""), + reference_image: Optional[UploadFile] = File(None), + tech_route_edit_prompt: Optional[str] = Form(None), + output_format: Optional[str] = Form(None), + service: "Paper2AnyService" = Depends(get_service), +): + """ + Paper2Figure JSON 接口。 + """ + resp_data = await service.generate_paper2figure_json( + request=request, + img_gen_model_name=resolve_model_name( + img_gen_model_name, + managed_default=settings.PAPER2FIGURE_IMAGE_MODEL, + fallback_default=settings.PAPER2FIGURE_DEFAULT_IMAGE_MODEL, + ), + chat_api_url=chat_api_url, + api_key=api_key, + input_type=input_type, + email=email, + file=file, + file_kind=file_kind, + text=text, + graph_type=graph_type, + language=language, + style=style, + figure_complex=figure_complex, + resolution=resolution, + edit_prompt=edit_prompt, + tech_route_palette=tech_route_palette, + tech_route_template=tech_route_template, + reference_image=reference_image, + tech_route_edit_prompt=tech_route_edit_prompt, + output_format=output_format, + ) + return Paper2FigureResponse(**resp_data) diff --git a/fastapi_app/routers/paper2poster.py b/fastapi_app/routers/paper2poster.py index 0471e3d1..d241ee3b 100644 --- a/fastapi_app/routers/paper2poster.py +++ b/fastapi_app/routers/paper2poster.py @@ -4,6 +4,9 @@ from fastapi import APIRouter, Depends, File, Form, UploadFile +from fastapi_app.config import settings +from fastapi_app.services.managed_api_service import resolve_model_name + router = APIRouter() @@ -33,8 +36,16 @@ async def generate_paper2poster( paper_file=paper_file, chat_api_url=chat_api_url, api_key=api_key, - model=model, - vision_model=vision_model, + model=resolve_model_name( + model, + managed_default=settings.PAPER2POSTER_DEFAULT_MODEL, + fallback_default="gpt-4o", + ), + vision_model=resolve_model_name( + vision_model, + managed_default=settings.PAPER2POSTER_VISION_MODEL, + fallback_default="gpt-4o", + ), poster_width=poster_width, poster_height=poster_height, logo_file=logo_file, diff --git a/fastapi_app/routers/paper2ppt.py b/fastapi_app/routers/paper2ppt.py index 3df6df59..c9f59d97 100644 --- a/fastapi_app/routers/paper2ppt.py +++ b/fastapi_app/routers/paper2ppt.py @@ -9,6 +9,7 @@ from fastapi import APIRouter, Depends, File, Form, HTTPException, Request, UploadFile +from fastapi_app.config import settings from fastapi_app.schemas import ( ErrorResponse, FrontendPPTExportRequest, @@ -19,6 +20,7 @@ PageContentRequest, PPTGenerationRequest, ) +from fastapi_app.services.managed_api_service import resolve_model_name from dataflow_agent.utils.version_manager import ImageVersionManager from fastapi_app.services.billing_service import BillingService from fastapi_app.utils import _to_outputs_url, resolve_outputs_path @@ -207,6 +209,72 @@ def _consume_paper2ppt_frontend_charge(request: Request, req: FrontendPPTGenerat ) +def _build_ppt_generation_request( + *, + img_gen_model_name: str, + chat_api_url: Optional[str], + api_key: Optional[str], + credential_scope: Optional[str], + email: Optional[str], + style: str, + aspect_ratio: str, + language: str, + model: str, + get_down: str, + all_edited_down: str, + result_path: str, + pagecontent: Optional[str], + page_id: Optional[int], + edit_prompt: Optional[str], + regenerate_from_outline: str, + image_resolution: Optional[str], + skip_pages: Optional[str], +) -> PPTGenerationRequest: + return PPTGenerationRequest( + img_gen_model_name=resolve_model_name( + img_gen_model_name, + managed_default=settings.PAPER2PPT_IMAGE_GEN_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_IMAGE_MODEL, + ), + chat_api_url=chat_api_url, + api_key=api_key, + credential_scope=credential_scope, + email=email, + style=style, + aspect_ratio=aspect_ratio, + language=language, + model=resolve_model_name( + model, + managed_default=settings.PAPER2PPT_CONTENT_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_MODEL, + ), + get_down=get_down, + all_edited_down=all_edited_down, + result_path=result_path, + pagecontent=pagecontent, + page_id=page_id, + edit_prompt=edit_prompt, + regenerate_from_outline=regenerate_from_outline, + image_resolution=image_resolution, + skip_pages=skip_pages, + ) + + +async def _execute_paper2ppt_generate( + *, + request: Request, + req: PPTGenerationRequest, + reference_img: Optional[UploadFile], + service: "Paper2PPTService", +) -> Dict[str, Any]: + _consume_paper2ppt_generate_charge(request, req) + return await service.generate_ppt( + req=req, + reference_img=reference_img, + request=request, + ) + + @router.post( "/paper2ppt/page-content", response_model=Dict[str, Any], @@ -223,11 +291,11 @@ async def paper2ppt_pagecontent_json( file: Optional[UploadFile] = File(None), text: Optional[str] = Form(None), # 可选控制参数(对 pagecontent 也可能有用) - model: str = Form("gpt-5.1"), + model: str = Form(settings.PAPER2PPT_OUTLINE_MODEL), language: str = Form("zh"), style: str = Form(""), reference_img: Optional[UploadFile] = File(None), - gen_fig_model: str = Form(...), + gen_fig_model: str = Form(settings.PAPER2PPT_IMAGE_GEN_MODEL), page_count: int = Form(...), use_long_paper: str = Form("false"), # 当 input_type=pdf 时,按“幻灯片图片”模式解析 @@ -247,10 +315,18 @@ async def paper2ppt_pagecontent_json( email=email, input_type=input_type, text=text, - model=model, + model=resolve_model_name( + model, + managed_default=settings.PAPER2PPT_OUTLINE_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_MODEL, + ), language=language, style=style, - gen_fig_model=gen_fig_model, + gen_fig_model=resolve_model_name( + gen_fig_model, + managed_default=settings.PAPER2PPT_IMAGE_GEN_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_IMAGE_MODEL, + ), page_count=page_count, use_long_paper=use_long_paper, pdf_as_slides=pdf_as_slides, @@ -266,6 +342,176 @@ async def paper2ppt_pagecontent_json( return data +@router.post( + "/paper2ppt/slides/generate", + response_model=Dict[str, Any], + responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}}, +) +async def paper2ppt_generate_slides( + request: Request, + img_gen_model_name: str = Form(settings.PAPER2PPT_IMAGE_GEN_MODEL), + chat_api_url: Optional[str] = Form(None), + api_key: Optional[str] = Form(None), + credential_scope: Optional[str] = Form(None), + email: Optional[str] = Form(None), + style: str = Form(""), + reference_img: Optional[UploadFile] = File(None), + aspect_ratio: str = Form("16:9"), + language: str = Form("en"), + model: str = Form(settings.PAPER2PPT_CONTENT_MODEL), + image_resolution: Optional[str] = Form(None), + result_path: str = Form(...), + pagecontent: str = Form(...), + regenerate_from_outline: str = Form("false"), + skip_pages: Optional[str] = Form(None), + service: Paper2PPTService = Depends(get_service), +): + """ + 显式的批量页图生成接口: + - 根据 pagecontent 批量生成 / 增量生成页面 + - 不承担单页编辑和最终导出语义 + """ + req = _build_ppt_generation_request( + img_gen_model_name=img_gen_model_name, + chat_api_url=chat_api_url, + api_key=api_key, + credential_scope=credential_scope, + email=email, + style=style, + aspect_ratio=aspect_ratio, + language=language, + model=model, + get_down="false", + all_edited_down="false", + result_path=result_path, + pagecontent=pagecontent, + page_id=None, + edit_prompt=None, + regenerate_from_outline=regenerate_from_outline, + image_resolution=image_resolution, + skip_pages=skip_pages, + ) + return await _execute_paper2ppt_generate( + request=request, + req=req, + reference_img=reference_img, + service=service, + ) + + +@router.post( + "/paper2ppt/slides/{page_id}/edit", + response_model=Dict[str, Any], + responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}}, +) +async def paper2ppt_edit_slide( + page_id: int, + request: Request, + img_gen_model_name: str = Form(settings.PAPER2PPT_IMAGE_GEN_MODEL), + chat_api_url: Optional[str] = Form(None), + api_key: Optional[str] = Form(None), + credential_scope: Optional[str] = Form(None), + email: Optional[str] = Form(None), + style: str = Form(""), + reference_img: Optional[UploadFile] = File(None), + aspect_ratio: str = Form("16:9"), + language: str = Form("en"), + model: str = Form(settings.PAPER2PPT_CONTENT_MODEL), + image_resolution: Optional[str] = Form(None), + result_path: str = Form(...), + pagecontent: Optional[str] = Form(None), + edit_prompt: Optional[str] = Form(None), + regenerate_from_outline: str = Form("false"), + service: Paper2PPTService = Depends(get_service), +): + """ + 显式的单页编辑接口: + - edit_prompt: 文字编辑 / 局部改图 + - regenerate_from_outline=true: 按当前 outline 内容重生该页 + """ + req = _build_ppt_generation_request( + img_gen_model_name=img_gen_model_name, + chat_api_url=chat_api_url, + api_key=api_key, + credential_scope=credential_scope, + email=email, + style=style, + aspect_ratio=aspect_ratio, + language=language, + model=model, + get_down="true", + all_edited_down="false", + result_path=result_path, + pagecontent=pagecontent, + page_id=page_id, + edit_prompt=edit_prompt, + regenerate_from_outline=regenerate_from_outline, + image_resolution=image_resolution, + skip_pages=None, + ) + return await _execute_paper2ppt_generate( + request=request, + req=req, + reference_img=reference_img, + service=service, + ) + + +@router.post( + "/paper2ppt/finalize", + response_model=Dict[str, Any], + responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}}, +) +async def paper2ppt_finalize( + request: Request, + img_gen_model_name: str = Form(settings.PAPER2PPT_IMAGE_GEN_MODEL), + chat_api_url: Optional[str] = Form(None), + api_key: Optional[str] = Form(None), + credential_scope: Optional[str] = Form(None), + email: Optional[str] = Form(None), + style: str = Form(""), + reference_img: Optional[UploadFile] = File(None), + aspect_ratio: str = Form("16:9"), + language: str = Form("en"), + model: str = Form(settings.PAPER2PPT_CONTENT_MODEL), + image_resolution: Optional[str] = Form(None), + result_path: str = Form(...), + pagecontent: Optional[str] = Form(None), + service: Paper2PPTService = Depends(get_service), +): + """ + 显式的最终导出接口: + - 基于已有结果目录导出最终 PPTX / PDF + - 不承担批量生成和单页编辑语义 + """ + req = _build_ppt_generation_request( + img_gen_model_name=img_gen_model_name, + chat_api_url=chat_api_url, + api_key=api_key, + credential_scope=credential_scope, + email=email, + style=style, + aspect_ratio=aspect_ratio, + language=language, + model=model, + get_down="false", + all_edited_down="true", + result_path=result_path, + pagecontent=pagecontent, + page_id=None, + edit_prompt=None, + regenerate_from_outline="false", + image_resolution=image_resolution, + skip_pages=None, + ) + return await _execute_paper2ppt_generate( + request=request, + req=req, + reference_img=reference_img, + service=service, + ) + + @router.post( "/paper2ppt/generate", response_model=Dict[str, Any], @@ -273,7 +519,7 @@ async def paper2ppt_pagecontent_json( ) async def paper2ppt_ppt_json( request: Request, - img_gen_model_name: str = Form(...), + img_gen_model_name: str = Form(settings.PAPER2PPT_IMAGE_GEN_MODEL), chat_api_url: Optional[str] = Form(None), api_key: Optional[str] = Form(None), credential_scope: Optional[str] = Form(None), @@ -283,7 +529,7 @@ async def paper2ppt_ppt_json( reference_img: Optional[UploadFile] = File(None), aspect_ratio: str = Form("16:9"), language: str = Form("en"), - model: str = Form("gpt-5.1"), + model: str = Form(settings.PAPER2PPT_CONTENT_MODEL), image_resolution: Optional[str] = Form(None), # 关键:是否进入编辑,是否已经有了 nano 结果,现在要进入页面逐个页面编辑 get_down: str = Form("false"), # 字符串形式,需要手动转换 @@ -302,12 +548,16 @@ async def paper2ppt_ppt_json( service: Paper2PPTService = Depends(get_service), ): """ - 只跑 paper2ppt: + 兼容旧接口,内部仍然分派到: + - /paper2ppt/slides/generate + - /paper2ppt/slides/{page_id}/edit + - /paper2ppt/finalize + + 旧语义: - get_down=false:生成模式(需要 pagecontent) - get_down=true:编辑模式(需要 page_id(0-based) + edit_prompt,pagecontent 可选) """ - - req = PPTGenerationRequest( + req = _build_ppt_generation_request( img_gen_model_name=img_gen_model_name, chat_api_url=chat_api_url, api_key=api_key, @@ -327,15 +577,104 @@ async def paper2ppt_ppt_json( image_resolution=image_resolution, skip_pages=skip_pages, ) - - _consume_paper2ppt_generate_charge(request, req) - - data = await service.generate_ppt( + return await _execute_paper2ppt_generate( + request=request, req=req, reference_img=reference_img, - request=request, + service=service, ) - return data + + +@router.post( + "/paper2ppt/slides/generate-task", + response_model=Dict[str, Any], + responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}}, +) +async def paper2ppt_generate_slides_task( + request: Request, + img_gen_model_name: str = Form(settings.PAPER2PPT_IMAGE_GEN_MODEL), + chat_api_url: Optional[str] = Form(None), + api_key: Optional[str] = Form(None), + credential_scope: Optional[str] = Form(None), + email: Optional[str] = Form(None), + style: str = Form(""), + reference_img: Optional[UploadFile] = File(None), + aspect_ratio: str = Form("16:9"), + language: str = Form("en"), + model: str = Form(settings.PAPER2PPT_CONTENT_MODEL), + image_resolution: Optional[str] = Form(None), + result_path: str = Form(...), + pagecontent: str = Form(...), + regenerate_from_outline: str = Form("false"), + skip_pages: Optional[str] = Form(None), + task_service: Paper2PPTTaskService = Depends(get_task_service), +): + req = _build_ppt_generation_request( + img_gen_model_name=img_gen_model_name, + chat_api_url=chat_api_url, + api_key=api_key, + credential_scope=credential_scope, + email=email, + style=style, + aspect_ratio=aspect_ratio, + language=language, + model=model, + get_down="false", + all_edited_down="false", + result_path=result_path, + pagecontent=pagecontent, + page_id=None, + edit_prompt=None, + regenerate_from_outline=regenerate_from_outline, + image_resolution=image_resolution, + skip_pages=skip_pages, + ) + return await task_service.submit_generate_task(req=req, reference_img=reference_img, request=request) + + +@router.post( + "/paper2ppt/finalize-task", + response_model=Dict[str, Any], + responses={400: {"model": ErrorResponse}, 500: {"model": ErrorResponse}}, +) +async def paper2ppt_finalize_task( + request: Request, + img_gen_model_name: str = Form(settings.PAPER2PPT_IMAGE_GEN_MODEL), + chat_api_url: Optional[str] = Form(None), + api_key: Optional[str] = Form(None), + credential_scope: Optional[str] = Form(None), + email: Optional[str] = Form(None), + style: str = Form(""), + reference_img: Optional[UploadFile] = File(None), + aspect_ratio: str = Form("16:9"), + language: str = Form("en"), + model: str = Form(settings.PAPER2PPT_CONTENT_MODEL), + image_resolution: Optional[str] = Form(None), + result_path: str = Form(...), + pagecontent: Optional[str] = Form(None), + task_service: Paper2PPTTaskService = Depends(get_task_service), +): + req = _build_ppt_generation_request( + img_gen_model_name=img_gen_model_name, + chat_api_url=chat_api_url, + api_key=api_key, + credential_scope=credential_scope, + email=email, + style=style, + aspect_ratio=aspect_ratio, + language=language, + model=model, + get_down="false", + all_edited_down="true", + result_path=result_path, + pagecontent=pagecontent, + page_id=None, + edit_prompt=None, + regenerate_from_outline="false", + image_resolution=image_resolution, + skip_pages=None, + ) + return await task_service.submit_generate_task(req=req, reference_img=reference_img, request=request) @router.post( @@ -345,7 +684,7 @@ async def paper2ppt_ppt_json( ) async def paper2ppt_generate_task( request: Request, - img_gen_model_name: str = Form(...), + img_gen_model_name: str = Form(settings.PAPER2PPT_IMAGE_GEN_MODEL), chat_api_url: Optional[str] = Form(None), api_key: Optional[str] = Form(None), credential_scope: Optional[str] = Form(None), @@ -354,7 +693,7 @@ async def paper2ppt_generate_task( reference_img: Optional[UploadFile] = File(None), aspect_ratio: str = Form("16:9"), language: str = Form("en"), - model: str = Form("gpt-5.1"), + model: str = Form(settings.PAPER2PPT_CONTENT_MODEL), image_resolution: Optional[str] = Form(None), get_down: str = Form("false"), all_edited_down: str = Form("false"), @@ -367,7 +706,11 @@ async def paper2ppt_generate_task( task_service: Paper2PPTTaskService = Depends(get_task_service), ): req = PPTGenerationRequest( - img_gen_model_name=img_gen_model_name, + img_gen_model_name=resolve_model_name( + img_gen_model_name, + managed_default=settings.PAPER2PPT_IMAGE_GEN_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_IMAGE_MODEL, + ), chat_api_url=chat_api_url, api_key=api_key, credential_scope=credential_scope, @@ -375,7 +718,11 @@ async def paper2ppt_generate_task( style=style, aspect_ratio=aspect_ratio, language=language, - model=model, + model=resolve_model_name( + model, + managed_default=settings.PAPER2PPT_CONTENT_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_MODEL, + ), get_down=get_down, all_edited_down=all_edited_down, result_path=result_path, @@ -415,7 +762,7 @@ async def paper2ppt_outline_refine( api_key: Optional[str] = Form(None), credential_scope: Optional[str] = Form(None), email: Optional[str] = Form(None), - model: str = Form("gpt-5.1"), + model: str = Form(settings.PAPER2PPT_OUTLINE_MODEL), language: str = Form("zh"), result_path: Optional[str] = Form(None), service: Paper2PPTService = Depends(get_service), @@ -426,7 +773,11 @@ async def paper2ppt_outline_refine( api_key=api_key, credential_scope=credential_scope, email=email, - model=model, + model=resolve_model_name( + model, + managed_default=settings.PAPER2PPT_OUTLINE_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_MODEL, + ), language=language, result_path=result_path, outline_feedback=outline_feedback, @@ -449,7 +800,7 @@ async def paper2ppt_frontend_generate( api_key: Optional[str] = Form(None), credential_scope: Optional[str] = Form(None), email: Optional[str] = Form(None), - model: str = Form("gpt-5.1"), + model: str = Form(settings.PAPER2PPT_CONTENT_MODEL), language: str = Form("zh"), style: str = Form(""), include_images: bool = Form(False), @@ -468,12 +819,20 @@ async def paper2ppt_frontend_generate( api_key=api_key, credential_scope=credential_scope, email=email, - model=model, + model=resolve_model_name( + model, + managed_default=settings.PAPER2PPT_CONTENT_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_MODEL, + ), language=language, style=style, include_images=include_images, image_style=image_style, - image_model=image_model, + image_model=resolve_model_name( + image_model, + managed_default=settings.PAPER2PPT_IMAGE_GEN_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_IMAGE_MODEL, + ), page_id=page_id, edit_prompt=edit_prompt, current_slide=current_slide, diff --git a/fastapi_app/routers/pdf2ppt.py b/fastapi_app/routers/pdf2ppt.py index 9f71d733..72e621b5 100644 --- a/fastapi_app/routers/pdf2ppt.py +++ b/fastapi_app/routers/pdf2ppt.py @@ -7,6 +7,7 @@ from dataflow_agent.logger import get_logger from fastapi_app.config import settings +from fastapi_app.services.managed_api_service import resolve_model_name log = get_logger(__name__) @@ -59,8 +60,14 @@ async def generate_pdf2ppt( api_key=api_key, email=email, use_ai_edit=use_ai_edit, - model=model, - gen_fig_model=gen_fig_model, + model=resolve_model_name( + model, + managed_default=settings.PDF2PPT_DEFAULT_MODEL, + ), + gen_fig_model=resolve_model_name( + gen_fig_model, + managed_default=settings.PDF2PPT_DEFAULT_IMAGE_MODEL, + ), language=language, style=style, page_count=page_count, diff --git a/fastapi_app/schemas.py b/fastapi_app/schemas.py index a849e454..246e7a0d 100644 --- a/fastapi_app/schemas.py +++ b/fastapi_app/schemas.py @@ -218,7 +218,7 @@ class PageContentRequest(BaseModel): language: str = "zh" style: str = "" reference_img: Optional[Any] = None - gen_fig_model: str = Field(...) + gen_fig_model: str = settings.PAPER2PPT_IMAGE_GEN_MODEL page_count: int = 5 use_long_paper: str = "false" # 当 input_type=pdf 时,是否按“幻灯片图片”模式解析(跳过 MinerU 解析) @@ -285,7 +285,7 @@ class DeepResearchRequest(BaseModel): file_paths: List[str] = [] api_url: Optional[str] = None api_key: Optional[str] = None - model: str = settings.MODEL_GPT_4O + model: str = settings.KB_CHAT_MODEL language: str = "zh" email: Optional[str] = None user_id: Optional[str] = None @@ -478,7 +478,7 @@ class KBReportRequest(BaseModel): file_paths: List[str] = [] api_url: Optional[str] = None api_key: Optional[str] = None - model: str = "gpt-5.1" + model: str = settings.KB_CHAT_MODEL language: str = "zh" report_style: Literal["insight", "analysis"] = "insight" length: Literal["short", "standard", "long"] = "standard" @@ -613,3 +613,4 @@ class Paper2PPTResponse(BaseModel): pagecontent: List[Dict[str, Any]] = [] result_path: str = "" all_output_files: List[str] = [] + error: str = "" diff --git a/fastapi_app/services/image2ppt_service.py b/fastapi_app/services/image2ppt_service.py index ee6a30c5..c9362d16 100644 --- a/fastapi_app/services/image2ppt_service.py +++ b/fastapi_app/services/image2ppt_service.py @@ -5,11 +5,13 @@ from typing import Optional from fastapi import File, UploadFile, HTTPException +from fastapi_app.config import settings from fastapi_app.schemas import Paper2PPTRequest from fastapi_app.interprocess_lock import AsyncInterProcessSemaphore from fastapi_app.services.managed_api_service import ( resolve_image_generation_credentials, resolve_llm_credentials, + resolve_model_name, ) from fastapi_app.workflow_adapters.wa_pdf2ppt import run_pdf2ppt_wf_api from dataflow_agent.utils import get_project_root @@ -64,6 +66,14 @@ async def generate_ppt( api_key, scope="image2ppt", ) + model = resolve_model_name( + model, + managed_default=settings.IMAGE2PPT_DEFAULT_MODEL, + ) + gen_fig_model = resolve_model_name( + gen_fig_model, + managed_default=settings.IMAGE2PPT_DEFAULT_IMAGE_MODEL, + ) # 0.5 如果启用 AI 增强,必须校验 API 配置 if use_ai_edit: if not resolved_chat_api_url or not resolved_api_key: diff --git a/fastapi_app/services/managed_api_service.py b/fastapi_app/services/managed_api_service.py index 37b1be3f..20e1e811 100644 --- a/fastapi_app/services/managed_api_service.py +++ b/fastapi_app/services/managed_api_service.py @@ -41,6 +41,10 @@ def _normalize_api_key(value: str | None) -> str: return (value or "").strip() +def _normalize_model_name(value: str | None) -> str: + return (value or "").strip() + + def _get_default_managed_llm_credentials() -> tuple[str, str]: return ( _normalize_api_url(settings.DF_API_URL or settings.DEFAULT_LLM_API_URL), @@ -222,6 +226,27 @@ def resolve_image_generation_credentials( return _normalize_api_url(chat_api_url), _normalize_api_key(api_key) +def resolve_model_name( + requested_model: str | None, + *, + managed_default: str | None, + fallback_default: str | None = None, +) -> str: + """ + Resolve a workflow model name under the current billing mode. + + In free/managed mode we intentionally ignore any client-provided model and + always use the backend-managed default from .env. In paid mode we still + respect the client model and only fall back when it is empty. + """ + managed_value = _normalize_model_name(managed_default) + fallback_value = _normalize_model_name(fallback_default) + if is_free_billing_mode(): + return managed_value or fallback_value + requested_value = _normalize_model_name(requested_model) + return requested_value or managed_value or fallback_value + + def get_runtime_billing_config() -> dict: pricing = get_pricing_config() managed_api_url, managed_api_key = _get_any_configured_managed_llm_credentials() @@ -229,6 +254,7 @@ def get_runtime_billing_config() -> dict: return { "billing_mode": get_billing_mode(), "user_api_config_required": is_user_api_config_required(), + "model_selection_locked": is_free_billing_mode(), "managed_api_enabled": bool(managed_api_url and managed_api_key), "managed_api_url": managed_api_url, "server_side_billing_enforced": True, diff --git a/fastapi_app/services/paper2any_service.py b/fastapi_app/services/paper2any_service.py index e8338820..28a27f5e 100644 --- a/fastapi_app/services/paper2any_service.py +++ b/fastapi_app/services/paper2any_service.py @@ -17,6 +17,7 @@ from fastapi_app.services.managed_api_service import ( resolve_image_generation_credentials, resolve_llm_credentials, + resolve_model_name, ) from dataflow_agent.utils import get_project_root from dataflow_agent.logger import get_logger @@ -261,11 +262,29 @@ async def generate_paper2figure( # paper2figure 前端历史上把 tech_route 的文本模型塞在 img_gen_model_name 里。 # 这里按 graph_type 分流,避免技术路线图仍被固定到 gpt-4o。 selected_text_model = ( - _normalize_tech_route_text_model(img_gen_model_name) + resolve_model_name( + _normalize_tech_route_text_model(img_gen_model_name), + managed_default=settings.PAPER2FIGURE_TECHNICAL_MODEL, + fallback_default=settings.PAPER2FIGURE_TEXT_MODEL, + ) + if graph_type == "tech_route" + else resolve_model_name( + settings.PAPER2FIGURE_TEXT_MODEL, + managed_default=settings.PAPER2FIGURE_TEXT_MODEL, + ) + ) + selected_image_model = ( + resolve_model_name( + settings.PAPER2FIGURE_IMAGE_MODEL, + managed_default=settings.PAPER2FIGURE_IMAGE_MODEL, + ) if graph_type == "tech_route" - else settings.PAPER2FIGURE_TEXT_MODEL + else resolve_model_name( + img_gen_model_name, + managed_default=settings.PAPER2FIGURE_IMAGE_MODEL, + fallback_default=settings.PAPER2FIGURE_DEFAULT_IMAGE_MODEL, + ) ) - selected_image_model = settings.PAPER2FIGURE_IMAGE_MODEL if graph_type == "tech_route" else img_gen_model_name # 4. 构造 Request p2f_req = Paper2FigureRequest( @@ -382,11 +401,29 @@ async def generate_paper2figure_json( # paper2figure 前端历史上把 tech_route 的文本模型塞在 img_gen_model_name 里。 # 这里按 graph_type 分流,避免技术路线图仍被固定到 gpt-4o。 selected_text_model = ( - _normalize_tech_route_text_model(img_gen_model_name) + resolve_model_name( + _normalize_tech_route_text_model(img_gen_model_name), + managed_default=settings.PAPER2FIGURE_TECHNICAL_MODEL, + fallback_default=settings.PAPER2FIGURE_TEXT_MODEL, + ) + if graph_type == "tech_route" + else resolve_model_name( + settings.PAPER2FIGURE_TEXT_MODEL, + managed_default=settings.PAPER2FIGURE_TEXT_MODEL, + ) + ) + selected_image_model = ( + resolve_model_name( + settings.PAPER2FIGURE_IMAGE_MODEL, + managed_default=settings.PAPER2FIGURE_IMAGE_MODEL, + ) if graph_type == "tech_route" - else settings.PAPER2FIGURE_TEXT_MODEL + else resolve_model_name( + img_gen_model_name, + managed_default=settings.PAPER2FIGURE_IMAGE_MODEL, + fallback_default=settings.PAPER2FIGURE_DEFAULT_IMAGE_MODEL, + ) ) - selected_image_model = settings.PAPER2FIGURE_IMAGE_MODEL if graph_type == "tech_route" else img_gen_model_name # 4. 构造 Request p2f_req = Paper2FigureRequest( diff --git a/fastapi_app/services/paper2drawio_service.py b/fastapi_app/services/paper2drawio_service.py index f7a052cd..35be658c 100644 --- a/fastapi_app/services/paper2drawio_service.py +++ b/fastapi_app/services/paper2drawio_service.py @@ -19,7 +19,7 @@ from dataflow_agent.logger import get_logger from fastapi_app.config.settings import settings from fastapi_app.interprocess_lock import AsyncInterProcessSemaphore -from fastapi_app.services.managed_api_service import resolve_llm_credentials +from fastapi_app.services.managed_api_service import resolve_llm_credentials, resolve_model_name log = get_logger(__name__) @@ -163,6 +163,14 @@ async def generate_diagram( api_key, scope="paper2drawio", ) + model = resolve_model_name( + model, + managed_default=settings.PAPER2DRAWIO_DEFAULT_MODEL, + ) + vlm_model = resolve_model_name( + vlm_model, + managed_default=settings.PAPER2DRAWIO_VLM_MODEL, + ) run_dir = self._create_run_dir("paper2drawio", email) input_dir = run_dir / "input" @@ -320,6 +328,10 @@ async def chat_edit( api_key, scope="paper2drawio", ) + model = resolve_model_name( + model, + managed_default=settings.PAPER2DRAWIO_DEFAULT_MODEL, + ) current_cells = ( extract_cells(current_xml) if (" Optional[Path]: + raw = (path_value or "").strip() + if not raw: + return None + + candidate = Path(raw).expanduser() + if not candidate.is_absolute(): + candidate = (PROJECT_ROOT / candidate).resolve() + + if candidate.is_file(): + return candidate + + log.warning("[paper2poster] output file missing on disk: %s", raw) + return None + @staticmethod def _validate_poster_dimensions(width: float, height: float) -> None: if width <= 0 or height <= 0: @@ -85,6 +102,16 @@ async def generate( api_key, scope="paper2poster", ) + model = resolve_model_name( + model, + managed_default=settings.PAPER2POSTER_DEFAULT_MODEL, + fallback_default="gpt-4o", + ) + vision_model = resolve_model_name( + vision_model, + managed_default=settings.PAPER2POSTER_VISION_MODEL, + fallback_default="gpt-4o", + ) self._validate_poster_dimensions(poster_width, poster_height) run_dir = self._create_run_dir(email) @@ -138,13 +165,16 @@ async def generate( raise HTTPException(status_code=500, detail=result.get("message") or "Failed to generate poster") pptx_path = (result.get("output_pptx_path") or "").strip() - if not pptx_path: - raise HTTPException(status_code=500, detail="Poster workflow finished without a PPTX output") + pptx_file = self._resolve_existing_output_file(pptx_path) + if pptx_file is None: + detail = result.get("message") or "Poster workflow finished without a valid PPTX output" + raise HTTPException(status_code=500, detail=detail) png_path = (result.get("output_png_path") or "").strip() + png_file = self._resolve_existing_output_file(png_path) if png_path else None return { "success": True, - "pptx_url": _to_outputs_url(pptx_path), - "png_url": _to_outputs_url(png_path) if png_path else None, + "pptx_url": _to_outputs_url(str(pptx_file)), + "png_url": _to_outputs_url(str(png_file)) if png_file else None, "message": "Poster generated successfully", } diff --git a/fastapi_app/services/paper2ppt_frontend_service.py b/fastapi_app/services/paper2ppt_frontend_service.py index 61b6e421..7315835a 100644 --- a/fastapi_app/services/paper2ppt_frontend_service.py +++ b/fastapi_app/services/paper2ppt_frontend_service.py @@ -30,6 +30,7 @@ from fastapi_app.services.managed_api_service import ( resolve_image_generation_credentials, resolve_llm_credentials, + resolve_model_name, ) from fastapi_app.utils import _from_outputs_url, _to_outputs_url, resolve_outputs_path @@ -100,7 +101,11 @@ async def generate_slides( pagecontent=pagecontent, chat_api_url=resolved_chat_api_url, api_key=resolved_api_key, - model=req.model, + model=resolve_model_name( + req.model, + managed_default=settings.PAPER2PPT_CONTENT_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_MODEL, + ), language=req.language, style=req.style, ) @@ -116,12 +121,20 @@ async def generate_slides( slide_index=req.page_id, chat_api_url=resolved_chat_api_url, api_key=resolved_api_key, - model=req.model, + model=resolve_model_name( + req.model, + managed_default=settings.PAPER2PPT_CONTENT_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_MODEL, + ), language=req.language, style=req.style, include_images=req.include_images, image_style=req.image_style, - image_model=req.image_model, + image_model=resolve_model_name( + req.image_model, + managed_default=settings.PAPER2PPT_IMAGE_GEN_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_IMAGE_MODEL, + ), image_api_url=resolved_image_api_url, image_api_key=resolved_image_api_key, edit_prompt=req.edit_prompt, @@ -178,12 +191,20 @@ async def generate_slides( slide_index=index, chat_api_url=resolved_chat_api_url, api_key=resolved_api_key, - model=req.model, + model=resolve_model_name( + req.model, + managed_default=settings.PAPER2PPT_CONTENT_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_MODEL, + ), language=req.language, style=req.style, include_images=req.include_images, image_style=req.image_style, - image_model=req.image_model, + image_model=resolve_model_name( + req.image_model, + managed_default=settings.PAPER2PPT_IMAGE_GEN_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_IMAGE_MODEL, + ), image_api_url=resolved_image_api_url, image_api_key=resolved_image_api_key, edit_prompt=None, @@ -846,16 +867,12 @@ def _build_visual_asset_prompt( "sci_fi": "restrained sci-fi research visual with clean lighting", "flat_infographic": "flat infographic-style illustration with simple shapes", } - key_points = [ - str(item).strip() - for item in (outline_item.get("key_points") or []) - if str(item).strip() - ][:4] + key_points = self._normalize_outline_points(outline_item.get("key_points"), limit=4, item_limit=120) palette = theme.get("palette") or {} return ( "Create one supporting image for an academic presentation slide. " - f"Page topic: {str(outline_item.get('title') or f'Slide {slide_index + 1}').strip()}. " - f"Layout intent: {str(outline_item.get('layout_description') or '').strip()}. " + f"Page topic: {self._clean_text_content(outline_item.get('title'), f'Slide {slide_index + 1}', 220)}. " + f"Layout intent: {self._clean_text_content(outline_item.get('layout_description'), '', 220)}. " f"Key points: {'; '.join(key_points) if key_points else 'keep it concise and presentation-friendly'}. " f"Visual style: {style_map.get(image_style, image_style or 'academic illustration')}. " f"Preferred palette anchors: background {palette.get('bg', '#0b1020')}, accent {palette.get('accent', '#f59e0b')}, text contrast {palette.get('text', '#e2e8f0')}. " @@ -1145,6 +1162,7 @@ def _build_theme_messages( { "theme_name": "short id", "visual_mood": "one sentence", + "style_family": "modern | business | academic | creative", "palette": { "bg": "#0b1020", "panel": "rgba(15,23,42,0.92)", @@ -1183,6 +1201,7 @@ def _build_theme_messages( 6. The theme_lock must be concrete enough to prevent per-slide drift during later regeneration. 7. If style_prompt contains explicit color or material directions, translate them into the palette instead of ignoring them. 8. Do not default to cyan/teal accents unless the style_prompt clearly asks for them. +9. style_family must be one of modern, business, academic, creative and should match the tone implied by style_prompt. """.strip() user_payload = { @@ -1218,39 +1237,51 @@ def _build_messages( visual_assets: List[Dict[str, Any]], ) -> List[Dict[str, Any]]: system_prompt = """ -You are an expert academic slide frontend engineer. -Generate a single 16:9 presentation slide as HTML/CSS for a browser-based PPT editor. +You are an expert academic presentation designer. +Generate one strictly structured 16:9 slide for a browser PPT editor and true editable PPT export. Hard requirements: 1. Return JSON only. No markdown fences. No explanation. 2. Output schema: { "title": "short string", - "html_template": "HTML string", - "css_code": "CSS string", - "editable_fields": [ - {"key": "title", "label": "Title", "type": "text", "value": "..."}, - {"key": "summary", "label": "Summary", "type": "textarea", "value": "..."}, - {"key": "key_points", "label": "Key Points", "type": "list", "items": ["...", "..."]} - ], + "layout_type": "cover | section | bullets | two_column | cards_2x2 | image_focus | comparison | timeline", + "content": { + "...": "layout-specific content" + }, "generation_note": "one short sentence" } -3. Every visible text in html_template must come from placeholders only: - - text/textarea fields: {{field:key}} - - list fields: {{list:key}} - - controlled images, when required: {{image:key}} -4. css_code must only target .slide-root and its descendants. -5. Do not use external assets, remote fonts, raw image URLs, svg, canvas, script, iframe, video or img tags. -6. The slide must fit inside a 1600x900 canvas with safe margins and no overflow. -7. Use the supplied deck theme so every page looks like the same presentation family. -8. Treat theme_lock as non-negotiable. Do not invent a new palette family, component language, or typography system. -9. Keep titles within 2 lines, with title font 42-60px and body text 18-28px. -10. Prefer grid/flex layouts over brittle absolute positioning. -11. If visual_assets are supplied, reserve layout space and place them using {{image:key}} placeholders. Never write a raw tag yourself. -12. If visual_assets are empty, build a text-first slide using editable text blocks and CSS decoration only. -13. The HTML must contain a single .slide-root root element. -14. If reference deck slides are provided, preserve their shared component grammar, spacing rhythm, and card treatment. -15. Never put {{field:...}}, {{list:...}}, or {{image:...}} placeholders inside HTML attributes like aria-label, title, alt, data-*, href, or style. Placeholders may only appear in element content. +3. Never return HTML, CSS, SVG, coordinates, raw style code, or arbitrary DOM. +4. Use only the allowed layout_type values. +5. Keep the slide strictly editable: + - all visible text must live in `content` + - images must be referenced only through the provided visual_assets slots +6. Use the supplied deck theme so every page looks like the same presentation family. +7. Treat theme_lock as non-negotiable. Do not invent a new palette family, component language, or typography system. +8. Keep titles within 2 lines, body content concise, and list lengths <= 6. +9. Use `image_focus` only when the slide genuinely benefits from a dominant supporting visual. If no visual_assets are present, do not choose `image_focus`. +10. `cards_2x2` must contain exactly 4 cards. +11. `timeline` must contain 3 to 5 items. +12. `comparison` must contain left and right sections with short bullet lists. +13. Do not overuse one layout type across the deck. Reuse the shared theme, but vary page structure according to the content. + +Layout content schema: +- cover: + eyebrow, title, subtitle, presenter, footer +- section: + eyebrow, title, summary, quote, footer +- bullets: + eyebrow, title, summary, bullets[], takeaway, footer +- two_column: + eyebrow, title, summary, left_heading, left_body, left_points[], right_heading, right_body, right_points[], footer +- cards_2x2: + eyebrow, title, summary, cards[{title, body} x4], footer +- image_focus: + eyebrow, title, summary, bullets[], visual_caption, footer +- comparison: + eyebrow, title, summary, left_title, left_points[], right_title, right_points[], footer +- timeline: + eyebrow, title, summary, timeline[{label, body}], footer """.strip() outline_payload = { @@ -1309,7 +1340,7 @@ def _build_messages( user_sections.append(f"Revision request: {edit_prompt}") user_sections.append( - "Ensure the editable_fields fully cover all meaningful visible text shown on the slide." + "Return a compact structured slide. Do not emit arbitrary layout code." ) return [ @@ -1387,13 +1418,9 @@ def _summarize_slide_for_review(self, slide: Dict[str, Any]) -> Dict[str, Any]: "type": field_type, } if field_type == "list": - entry["items"] = [ - str(item).strip() - for item in (field.get("items") or []) - if str(item).strip() - ][:5] + entry["items"] = self._normalize_outline_points(field.get("items"), limit=5, item_limit=140) else: - entry["value"] = str(field.get("value") or "").strip()[:280] + entry["value"] = self._clean_text_content(field.get("value"), "", 280) summarized_fields.append(entry) visual_assets = slide.get("visual_assets") or slide.get("visualAssets") or [] @@ -1434,63 +1461,305 @@ def _normalize_slide_payload( theme=theme, visual_assets=visual_assets, ) - html_template = payload.get("html_template") or payload.get("html") or "" - css_code = payload.get("css_code") or payload.get("css") or "" - if not isinstance(html_template, str) or not isinstance(css_code, str): - return fallback_slide - if len(html_template) > 16000 or len(css_code) > 20000: + layout_type = str(payload.get("layout_type") or payload.get("layoutType") or "").strip() + content = payload.get("content") or {} + if not isinstance(content, dict): return fallback_slide - if _FORBIDDEN_HTML_RE.search(html_template) or _FORBIDDEN_CSS_RE.search(css_code): + if layout_type not in { + "cover", + "section", + "bullets", + "two_column", + "cards_2x2", + "image_focus", + "comparison", + "timeline", + }: return fallback_slide - - normalized_html = self._sanitize_html_template(html_template) - normalized_css = self._sanitize_css(css_code, theme=theme) - editable_fields = self._normalize_fields( - payload.get("editable_fields"), - outline_item=outline_item, - slide_index=slide_index, - ) - if not editable_fields: + if not visual_assets and layout_type == "image_focus": return fallback_slide - normalized_html, attribute_warnings = self._sanitize_attribute_placeholders( - normalized_html, - editable_fields, - ) - if attribute_warnings: + try: + return self._build_structured_slide( + layout_type=layout_type, + content=content, + outline_item=outline_item, + slide_index=slide_index, + slide_count=slide_count, + theme=theme, + visual_assets=visual_assets, + generation_note=str(payload.get("generation_note") or "").strip(), + ) + except Exception as exc: # noqa: BLE001 log.warning( - "[Paper2PPTFrontendService] Sanitized attribute placeholders for page %s: %s", + "[Paper2PPTFrontendService] Failed to normalize structured slide payload for page %s: %s", slide_index + 1, - ", ".join(attribute_warnings), + exc, ) - - field_keys = {field["key"] for field in editable_fields} - placeholders = set(_FIELD_PLACEHOLDER_RE.findall(normalized_html)) - image_placeholders = set(_IMAGE_PLACEHOLDER_RE.findall(normalized_html)) - asset_keys = {str(asset.get("key") or "").strip() for asset in visual_assets if str(asset.get("key") or "").strip()} - if not placeholders: - return fallback_slide - if not placeholders.issubset(field_keys): - return fallback_slide - if image_placeholders and not image_placeholders.issubset(asset_keys): - return fallback_slide - if visual_assets and not image_placeholders: return fallback_slide - title_value = ( - self._find_field_value(editable_fields, "title") - or outline_item.get("title") - or f"Slide {slide_index + 1}" + def _field_entry( + self, + *, + key: str, + label: str, + field_type: str, + value: str = "", + items: Optional[List[str]] = None, + ) -> Dict[str, Any]: + return { + "key": key, + "label": label, + "type": field_type, + "value": value, + "items": items or [], + } + + def _clean_text_content(self, value: Any, default: str = "", limit: int = 280) -> str: + text = self._extract_outline_text(value) + text = re.sub(r"\s+", " ", text) + return (text or default)[:limit] + + def _extract_outline_text(self, value: Any) -> str: + if value is None: + return "" + if isinstance(value, str): + return value.strip() + if isinstance(value, (int, float, bool)): + return str(value).strip() + if isinstance(value, dict): + preferred_keys = ( + "text", + "value", + "content", + "summary", + "title", + "label", + "body", + "description", + "reason", + "point", + ) + for key in preferred_keys: + extracted = self._extract_outline_text(value.get(key)) + if extracted: + return extracted + parts = [self._extract_outline_text(item) for item in value.values()] + joined = " ".join(part for part in parts if part) + return joined.strip() + if isinstance(value, list): + parts = [self._extract_outline_text(item) for item in value] + joined = " ".join(part for part in parts if part) + return joined.strip() + return str(value).strip() + + def _normalize_outline_points( + self, + value: Any, + *, + limit: int = 6, + item_limit: int = 120, + ) -> List[str]: + normalized: List[str] = [] + + def _append(item: Any) -> None: + text = self._clean_text_content(item, "", item_limit) + if text and text not in normalized: + normalized.append(text) + + if isinstance(value, list): + for item in value: + if isinstance(item, list): + for nested in item: + _append(nested) + else: + _append(item) + elif value is not None: + _append(value) + return normalized[:limit] + + def _clean_list_content( + self, + value: Any, + *, + defaults: Optional[List[str]] = None, + limit: int = 6, + item_limit: int = 120, + ) -> List[str]: + cleaned: List[str] = [] + if isinstance(value, list): + for item in value: + text = self._clean_text_content(item, "", item_limit) + if text: + cleaned.append(text) + elif isinstance(value, str) and value.strip(): + cleaned = [self._clean_text_content(value, "", item_limit)] + if cleaned: + return cleaned[:limit] + return (defaults or [])[:limit] + + def _build_structured_slide( + self, + *, + layout_type: str, + content: Dict[str, Any], + outline_item: Dict[str, Any], + slide_index: int, + slide_count: int, + theme: Dict[str, Any], + visual_assets: List[Dict[str, Any]], + generation_note: str, + ) -> Dict[str, Any]: + fallback_title = str(outline_item.get("title") or f"Slide {slide_index + 1}").strip() + section_template = str(theme.get("section_label_template") or "Slide {page_num:02d}/{slide_count:02d}") + try: + default_eyebrow = section_template.format(page_num=slide_index + 1, slide_count=slide_count) + except Exception: # noqa: BLE001 + default_eyebrow = f"Slide {slide_index + 1:02d}/{slide_count:02d}" + key_points = self._normalize_outline_points(outline_item.get("key_points"), limit=6, item_limit=120) + default_summary = key_points[0] if key_points else self._clean_text_content( + outline_item.get("layout_description"), + "", + 280, + ) + default_footer = str(theme.get("footer_text") or "Paper2Any Structured PPT").strip() + + editable_fields: List[Dict[str, Any]] = [] + layout_data: Dict[str, Any] = {"type": layout_type} + + def add_text(key: str, label: str, default: str, *, field_type: str = "text", limit: int = 280) -> str: + value = self._clean_text_content(content.get(key), default, limit) + editable_fields.append( + self._field_entry( + key=key, + label=label, + field_type="textarea" if field_type == "textarea" else "text", + value=value, + ) + ) + return key + + def add_list(key: str, label: str, default_items: List[str], *, limit: int = 6, item_limit: int = 120) -> str: + items = self._clean_list_content( + content.get(key), + defaults=default_items, + limit=limit, + item_limit=item_limit, + ) + editable_fields.append( + self._field_entry( + key=key, + label=label, + field_type="list", + items=items, + ) + ) + return key + + layout_data["eyebrow_key"] = add_text("eyebrow", "Eyebrow", default_eyebrow) + layout_data["title_key"] = add_text("title", "Title", fallback_title, limit=120) + layout_data["footer_key"] = add_text("footer", "Footer", default_footer, limit=80) + + if layout_type == "cover": + layout_data["subtitle_key"] = add_text("subtitle", "Subtitle", default_summary, field_type="textarea", limit=220) + layout_data["presenter_key"] = add_text("presenter", "Presenter", "Presenter / Team", limit=80) + elif layout_type == "section": + layout_data["summary_key"] = add_text("summary", "Summary", default_summary, field_type="textarea", limit=220) + layout_data["quote_key"] = add_text("quote", "Quote", key_points[1] if len(key_points) > 1 else default_summary, field_type="textarea", limit=200) + elif layout_type == "bullets": + layout_data["summary_key"] = add_text("summary", "Summary", default_summary, field_type="textarea", limit=220) + layout_data["bullets_key"] = add_list("bullets", "Bullets", key_points[:5] or ["Add key points"]) + layout_data["takeaway_key"] = add_text("takeaway", "Takeaway", key_points[-1] if key_points else default_summary, field_type="textarea", limit=180) + elif layout_type == "two_column": + layout_data["summary_key"] = add_text("summary", "Summary", default_summary, field_type="textarea", limit=220) + layout_data["left_heading_key"] = add_text("left_heading", "Left Heading", "Core Idea", limit=80) + layout_data["left_body_key"] = add_text("left_body", "Left Body", key_points[0] if key_points else default_summary, field_type="textarea", limit=180) + layout_data["left_points_key"] = add_list("left_points", "Left Points", key_points[:3], limit=4) + layout_data["right_heading_key"] = add_text("right_heading", "Right Heading", "Implication", limit=80) + layout_data["right_body_key"] = add_text("right_body", "Right Body", key_points[1] if len(key_points) > 1 else default_summary, field_type="textarea", limit=180) + layout_data["right_points_key"] = add_list("right_points", "Right Points", key_points[2:5] or key_points[:2], limit=4) + elif layout_type == "cards_2x2": + layout_data["summary_key"] = add_text("summary", "Summary", default_summary, field_type="textarea", limit=200) + raw_cards = content.get("cards") + cards = raw_cards if isinstance(raw_cards, list) else [] + card_refs: List[Dict[str, str]] = [] + for index in range(4): + item = cards[index] if index < len(cards) and isinstance(cards[index], dict) else {} + title_key = f"card_{index + 1}_title" + body_key = f"card_{index + 1}_body" + editable_fields.append(self._field_entry( + key=title_key, + label=f"Card {index + 1} Title", + field_type="text", + value=self._clean_text_content(item.get("title"), f"Point {index + 1}", 80), + )) + editable_fields.append(self._field_entry( + key=body_key, + label=f"Card {index + 1} Body", + field_type="textarea", + value=self._clean_text_content( + item.get("body"), + key_points[index] if index < len(key_points) else default_summary, + 140, + ), + )) + card_refs.append({"title_key": title_key, "body_key": body_key}) + layout_data["cards"] = card_refs + elif layout_type == "image_focus": + layout_data["summary_key"] = add_text("summary", "Summary", default_summary, field_type="textarea", limit=180) + layout_data["bullets_key"] = add_list("bullets", "Bullets", key_points[:4], limit=4) + layout_data["visual_caption_key"] = add_text("visual_caption", "Visual Caption", "Supporting visual", limit=90) + layout_data["visual_key"] = str((visual_assets[0].get("key") if visual_assets else _DEFAULT_VISUAL_KEY) or _DEFAULT_VISUAL_KEY) + elif layout_type == "comparison": + layout_data["summary_key"] = add_text("summary", "Summary", default_summary, field_type="textarea", limit=180) + layout_data["left_title_key"] = add_text("left_title", "Left Title", "Track A", limit=80) + layout_data["left_points_key"] = add_list("left_points", "Left Points", key_points[:3], limit=4) + layout_data["right_title_key"] = add_text("right_title", "Right Title", "Track B", limit=80) + layout_data["right_points_key"] = add_list("right_points", "Right Points", key_points[3:6] or key_points[:3], limit=4) + elif layout_type == "timeline": + layout_data["summary_key"] = add_text("summary", "Summary", default_summary, field_type="textarea", limit=180) + raw_timeline = content.get("timeline") + timeline_items = raw_timeline if isinstance(raw_timeline, list) else [] + timeline_refs: List[Dict[str, str]] = [] + count = max(3, min(5, len(timeline_items) or 3)) + for index in range(count): + item = timeline_items[index] if index < len(timeline_items) and isinstance(timeline_items[index], dict) else {} + label_key = f"timeline_{index + 1}_label" + body_key = f"timeline_{index + 1}_body" + editable_fields.append(self._field_entry( + key=label_key, + label=f"Timeline {index + 1} Label", + field_type="text", + value=self._clean_text_content(item.get("label"), f"Phase {index + 1}", 60), + )) + editable_fields.append(self._field_entry( + key=body_key, + label=f"Timeline {index + 1} Body", + field_type="textarea", + value=self._clean_text_content( + item.get("body"), + key_points[index] if index < len(key_points) else default_summary, + 120, + ), + )) + timeline_refs.append({"label_key": label_key, "body_key": body_key}) + layout_data["timeline"] = timeline_refs + else: + raise ValueError(f"unsupported layout_type: {layout_type}") + + title_value = next( + (field.get("value") for field in editable_fields if field.get("key") == "title"), + fallback_title, ) return { - "slide_id": str(payload.get("slide_id") or slide_index + 1), + "slide_id": str(slide_index + 1), "page_num": slide_index + 1, - "title": str(payload.get("title") or title_value), - "html_template": normalized_html, - "css_code": normalized_css, + "title": str(title_value or fallback_title), + "layout_type": layout_type, + "layout_data": layout_data, "editable_fields": editable_fields, "visual_assets": visual_assets, - "generation_note": str(payload.get("generation_note") or ""), + "generation_note": generation_note or "Structured slide generated", "status": "done", } @@ -1501,11 +1770,7 @@ def _normalize_review_payload( slide: Dict[str, Any], local_layout_issues: List[str], ) -> Dict[str, Any]: - issues = [ - str(item).strip() - for item in (payload.get("issues") or []) - if str(item).strip() - ] + issues = self._normalize_outline_points(payload.get("issues"), limit=12, item_limit=220) combined_issues: List[str] = [] for issue in [*local_layout_issues, *issues]: if issue and issue not in combined_issues: @@ -1542,11 +1807,7 @@ def _normalize_fields( ) -> List[Dict[str, Any]]: normalized: List[Dict[str, Any]] = [] seen_keys: set[str] = set() - outline_points = [ - str(item).strip() - for item in (outline_item.get("key_points") or []) - if str(item).strip() - ] + outline_points = self._normalize_outline_points(outline_item.get("key_points"), limit=6, item_limit=120) if isinstance(raw_fields, list): for raw_field in raw_fields: @@ -1560,11 +1821,7 @@ def _normalize_fields( field_type = "text" label = str(raw_field.get("label") or key.replace("_", " ").title()) if field_type == "list": - items = [ - str(item).strip() - for item in (raw_field.get("items") or []) - if str(item).strip() - ] + items = self._normalize_outline_points(raw_field.get("items"), limit=8, item_limit=140) if not items: items = outline_points[:4] normalized.append( @@ -1577,7 +1834,7 @@ def _normalize_fields( } ) else: - value = str(raw_field.get("value") or "").strip() + value = self._clean_text_content(raw_field.get("value"), "", 280) normalized.append( { "key": key, @@ -1595,7 +1852,7 @@ def _normalize_fields( "key": "title", "label": "Title", "type": "text", - "value": str(outline_item.get("title") or f"Slide {slide_index + 1}"), + "value": self._clean_text_content(outline_item.get("title"), f"Slide {slide_index + 1}", 220), "items": [], } ) @@ -1605,9 +1862,11 @@ def _normalize_fields( "key": "summary", "label": "Summary", "type": "textarea", - "value": str( - (outline_points[0] if outline_points else outline_item.get("layout_description") or "") - ).strip(), + "value": self._clean_text_content( + outline_points[0] if outline_points else outline_item.get("layout_description"), + "", + 280, + ), "items": [], } ) @@ -1624,6 +1883,7 @@ def _normalize_fields( return normalized def _build_fallback_theme(self, *, language: str, style: str) -> Dict[str, Any]: + style_family = self._infer_style_family(style) footer_text = "Paper2Any Frontend PPT" section_label_template = ( "第 {page_num:02d}/{slide_count:02d} 页" @@ -1637,9 +1897,11 @@ def _build_fallback_theme(self, *, language: str, style: str) -> Dict[str, Any]: ) ) palette = self._resolve_palette_from_style(style) + family_rules = self._build_family_rules(style_family) return { "theme_name": "scholarly_signal", "visual_mood": visual_mood, + "style_family": style_family, "palette": palette, "typography": { "title_font_stack": 'Georgia, "Times New Roman", serif', @@ -1649,40 +1911,130 @@ def _build_fallback_theme(self, *, language: str, style: str) -> Dict[str, Any]: "summary_size": 26, "body_size": 24, }, - "layout_rules": [ - "Keep 72px+ safe margins around major content.", - "Prefer one dominant text area plus one supporting card or metrics block.", - "Avoid more than two visual columns in a single slide.", - "Reserve a quiet footer area for page identity or takeaway.", - ], - "component_rules": [ - "Use rounded cards with subtle borders and a restrained glow.", - "Use one accent color only for emphasis, not for large fills.", - "Keep text hierarchy clear with title, summary, and supporting bullets.", - ], + "layout_rules": family_rules["layout_rules"], + "component_rules": family_rules["component_rules"], "theme_lock": { "must_keep": [ "Use only the deck palette colors for fills, borders, and emphasis.", "Keep the same serif title style and sans body style across the deck.", - "Keep rounded translucent cards and a quiet footer treatment.", - ], - "preferred_layout_patterns": [ - "hero_with_side_card", - "split_insight_grid", - "stacked_cards", - "timeline_overview", + family_rules["must_keep"], ], - "component_signature": "Rounded refined cards, restrained accent usage, thin borders, and quiet academic spacing.", + "preferred_layout_patterns": family_rules["preferred_layout_patterns"], + "component_signature": family_rules["component_signature"], "avoid": [ "Do not introduce unrelated bright color families.", "Do not use more than two main columns.", - "Do not use oversized billboard titles or poster-like full-bleed blocks.", + family_rules["avoid"], ], }, "footer_text": footer_text, "section_label_template": section_label_template, } + def _infer_style_family(self, style: str) -> str: + style_text = (style or "").strip().lower() + if any(keyword in style_text for keyword in ("academic", "report", "paper", "research", "严谨", "学术", "报告")): + return "academic" + if any(keyword in style_text for keyword in ("business", "brand", "corporate", "executive", "商务", "商业", "品牌")): + return "business" + if any(keyword in style_text for keyword in ("creative", "illustration", "warm", "friendly", "playful", "soft", "创意", "插画", "柔和")): + return "creative" + return "modern" + + def _build_family_rules(self, style_family: str) -> Dict[str, Any]: + family = (style_family or "modern").strip().lower() + if family == "academic": + return { + "layout_rules": [ + "Keep generous white or paper-like breathing room and stable reading rhythm.", + "Prefer section, bullets, two-column, and comparison layouts over showy hero frames.", + "Use image_focus only for genuinely visual pages.", + "Reserve a quiet footer for provenance or page identity.", + ], + "component_rules": [ + "Use restrained panels, subtle dividers, and report-like hierarchy.", + "Keep decoration secondary to text structure and evidence density.", + "Avoid billboard marketing blocks or exaggerated hero cards.", + ], + "preferred_layout_patterns": [ + "section_break", + "split_report_grid", + "comparison_columns", + "timeline_overview", + ], + "component_signature": "Refined report-style panels, paper-like spacing, and calm academic hierarchy.", + "must_keep": "Keep the visual language rigorous, airy, and report-like rather than glossy.", + "avoid": "Do not use neon glow, oversized promo badges, or playful sticker motifs.", + } + if family == "business": + return { + "layout_rules": [ + "Favor crisp comparison, KPI-card, and executive-summary patterns.", + "Use strong alignment and clear block grouping with moderate density.", + "Keep image_focus to showcase slides only.", + "Prefer horizontal momentum and strong title anchoring.", + ], + "component_rules": [ + "Use sharp, decisive cards with stronger contrast and cleaner edges.", + "Accent color should be used sparingly for strategic emphasis.", + "Make hierarchy feel presentation-room ready rather than article-like.", + ], + "preferred_layout_patterns": [ + "executive_hero", + "split_insight_grid", + "kpi_cards", + "decision_comparison", + ], + "component_signature": "Crisp executive cards, strong title anchors, and controlled business contrast.", + "must_keep": "Keep the deck polished, decisive, and boardroom-oriented.", + "avoid": "Do not use whimsical illustration accents or soft scrapbook styling.", + } + if family == "creative": + return { + "layout_rules": [ + "Allow more asymmetry, softer framing, and stronger hero moments.", + "Alternate between image_focus, section, cards, and timeline layouts to keep the deck lively.", + "Use comparison and two-column layouts only when the content clearly calls for them.", + "Let accent shapes support the narrative without overpowering text.", + ], + "component_rules": [ + "Use soft panels, expressive color accents, and warmer visual transitions.", + "Preserve readability, but allow more character in backgrounds and separators.", + "Favor friendly, presentation-forward composition over report density.", + ], + "preferred_layout_patterns": [ + "hero_spotlight", + "soft_cards", + "story_timeline", + "image_caption_feature", + ], + "component_signature": "Warm expressive panels, softer geometry, and more atmospheric deck motion.", + "must_keep": "Keep the deck warm, expressive, and visibly more playful than academic or business presets.", + "avoid": "Do not collapse the deck back into a uniform report grid on every page.", + } + return { + "layout_rules": [ + "Keep 72px+ safe margins around major content.", + "Prefer one dominant text area plus one supporting card or metrics block.", + "Avoid more than two visual columns in a single slide.", + "Reserve a quiet footer area for page identity or takeaway.", + ], + "component_rules": [ + "Use refined rounded cards with controlled glow and layered depth.", + "Use one accent color only for emphasis, not for large fills.", + "Keep text hierarchy clear with title, summary, and supporting bullets.", + ], + "preferred_layout_patterns": [ + "hero_with_side_card", + "split_insight_grid", + "stacked_cards", + "timeline_overview", + ], + "component_signature": "Modern layered cards, restrained glow, and polished presentation spacing.", + "must_keep": "Keep the deck sleek, layered, and contemporary without drifting into plain report style.", + "avoid": "Do not flatten everything into plain white report blocks unless the prompt explicitly asks for that.", + } + def _resolve_palette_from_style(self, style: str) -> Dict[str, str]: style_text = (style or "").strip().lower() @@ -1802,27 +2154,26 @@ def _clean_int(value: Any, default: int, min_value: int, max_value: int) -> int: return default return max(min_value, min(max_value, parsed)) + def _clean_style_family(value: Any, default: str) -> str: + candidate = str(value or "").strip().lower() + if candidate in {"modern", "business", "academic", "creative"}: + return candidate + return default + def _clean_list(value: Any, defaults: List[str], limit: int = 6) -> List[str]: if isinstance(value, list): - cleaned = [str(item).strip() for item in value if str(item).strip()] + cleaned = self._normalize_outline_points(value, limit=limit, item_limit=140) if cleaned: return cleaned[:limit] return defaults[:limit] - layout_rules = [ - str(item).strip() - for item in (payload.get("layout_rules") or []) - if str(item).strip() - ][:6] - component_rules = [ - str(item).strip() - for item in (payload.get("component_rules") or []) - if str(item).strip() - ][:6] + layout_rules = self._normalize_outline_points(payload.get("layout_rules"), limit=6, item_limit=180) + component_rules = self._normalize_outline_points(payload.get("component_rules"), limit=6, item_limit=180) return { "theme_name": _clean_text(payload.get("theme_name"), fallback["theme_name"]), "visual_mood": _clean_text(payload.get("visual_mood"), fallback["visual_mood"]), + "style_family": _clean_style_family(payload.get("style_family"), fallback["style_family"]), "palette": { "bg": _clean_color(palette_raw.get("bg"), fallback["palette"]["bg"]), "panel": _clean_color(palette_raw.get("panel"), fallback["palette"]["panel"]), @@ -1898,25 +2249,25 @@ def _build_theme_lock(self, theme: Dict[str, Any]) -> Dict[str, Any]: theme_lock = theme.get("theme_lock") if isinstance(theme_lock, dict): return { - "must_keep": [ - str(item).strip() - for item in (theme_lock.get("must_keep") or []) - if str(item).strip() - ] or fallback["theme_lock"]["must_keep"], - "preferred_layout_patterns": [ - str(item).strip() - for item in (theme_lock.get("preferred_layout_patterns") or []) - if str(item).strip() - ] or fallback["theme_lock"]["preferred_layout_patterns"], + "must_keep": self._normalize_outline_points( + theme_lock.get("must_keep"), + limit=8, + item_limit=180, + ) or fallback["theme_lock"]["must_keep"], + "preferred_layout_patterns": self._normalize_outline_points( + theme_lock.get("preferred_layout_patterns"), + limit=8, + item_limit=180, + ) or fallback["theme_lock"]["preferred_layout_patterns"], "component_signature": str( theme_lock.get("component_signature") or fallback["theme_lock"]["component_signature"] ).strip(), - "avoid": [ - str(item).strip() - for item in (theme_lock.get("avoid") or []) - if str(item).strip() - ] or fallback["theme_lock"]["avoid"], + "avoid": self._normalize_outline_points( + theme_lock.get("avoid"), + limit=8, + item_limit=180, + ) or fallback["theme_lock"]["avoid"], } return fallback["theme_lock"] @@ -1927,6 +2278,7 @@ def _build_deck_identity_summary(self, theme: Dict[str, Any]) -> Dict[str, Any]: return { "theme_name": str(theme.get("theme_name") or "deck_theme").strip(), "visual_mood": str(theme.get("visual_mood") or "").strip(), + "style_family": str(theme.get("style_family") or "modern").strip(), "palette_anchor": { "bg": str(palette.get("bg") or "").strip(), "primary": str(palette.get("primary") or "").strip(), @@ -2146,20 +2498,21 @@ def _load_reference_slides( return [self._summarize_reference_slide(slide) for slide in references] def _summarize_reference_slide(self, slide: Dict[str, Any]) -> Dict[str, Any]: - html_template = str(slide.get("html_template") or "") - css_code = str(slide.get("css_code") or "") editable_fields = slide.get("editable_fields") or [] return { "page_num": int(slide.get("page_num") or 0), "title": str(slide.get("title") or "").strip(), + "layout_type": str(slide.get("layout_type") or slide.get("layoutType") or "").strip(), "field_keys": [ str(field.get("key") or "").strip() for field in editable_fields if isinstance(field, dict) and str(field.get("key") or "").strip() ][:10], - "html_outline": self._extract_html_outline(html_template), - "component_classes": self._extract_component_classes(html_template, css_code), - "css_selectors": self._extract_css_selectors(css_code), + "visual_asset_keys": [ + str(asset.get("key") or "").strip() + for asset in (slide.get("visual_assets") or []) + if isinstance(asset, dict) and str(asset.get("key") or "").strip() + ][:4], } def _extract_html_outline(self, html_template: str, limit: int = 12) -> List[str]: @@ -2216,6 +2569,42 @@ def _extract_css_selectors(self, css_code: str, limit: int = 8) -> List[str]: break return cleaned + def _choose_fallback_layout_type( + self, + *, + outline_item: Dict[str, Any], + slide_index: int, + theme: Dict[str, Any], + visual_assets: Sequence[Dict[str, Any]], + ) -> str: + style_family = str(theme.get("style_family") or "modern").strip().lower() + layout_hint = str(outline_item.get("layout_description") or "").lower() + title = str(outline_item.get("title") or "").lower() + key_points = self._normalize_outline_points(outline_item.get("key_points"), limit=6, item_limit=120) + bullet_count = len(key_points) + + if slide_index == 0: + return "cover" + if any(keyword in title for keyword in ("overview", "agenda", "outline", "introduction", "background", "summary")): + return "section" + if any(keyword in layout_hint for keyword in ("compare", "contrast", "trade-off", "versus", "vs", "对比", "比较")): + return "comparison" + if any(keyword in layout_hint for keyword in ("timeline", "process", "workflow", "loop", "pipeline", "流程", "时间线")): + return "timeline" + if any(keyword in layout_hint for keyword in ("card", "grid", "domain", "application", "industry", "module", "模块", "领域")): + return "cards_2x2" + if bullet_count >= 5 and style_family in {"business", "modern"}: + return "two_column" + if visual_assets and style_family in {"creative", "modern"} and slide_index % 3 == 1: + return "image_focus" + if bullet_count >= 4 and style_family == "academic": + return "bullets" + if style_family == "business": + return "two_column" + if style_family == "creative": + return "cards_2x2" + return "bullets" + def _build_fallback_slide( self, *, @@ -2225,315 +2614,46 @@ def _build_fallback_slide( theme: Dict[str, Any], visual_assets: Optional[List[Dict[str, Any]]] = None, ) -> Dict[str, Any]: - palette = theme.get("palette") or self._build_fallback_theme(language="zh", style="")["palette"] - typography = theme.get("typography") or {} visual_assets = (visual_assets or [])[:_MAX_INLINE_VISUAL_ASSETS] - has_visual = bool(visual_assets) - has_multi_visual = len(visual_assets) > 1 - key_points = [ - str(item).strip() - for item in (outline_item.get("key_points") or []) - if str(item).strip() - ][:4] - summary = key_points[0] if key_points else str(outline_item.get("layout_description") or "").strip() + key_points = self._normalize_outline_points(outline_item.get("key_points"), limit=4, item_limit=120) + summary = key_points[0] if key_points else self._clean_text_content( + outline_item.get("layout_description"), + "", + 280, + ) takeaway = key_points[-1] if key_points else "Refine the narrative in the editor" section_template = str(theme.get("section_label_template") or "Slide {page_num:02d}/{slide_count:02d}") try: eyebrow = section_template.format(page_num=slide_index + 1, slide_count=slide_count) except Exception: # noqa: BLE001 eyebrow = f"Slide {slide_index + 1:02d}/{slide_count:02d}" - - visual_markup = "" - if has_visual: - visual_markup = "\n".join( - f'
{{{{image:{asset.get("key") or self._build_visual_asset_key(asset_index)}}}}}
' - for asset_index, asset in enumerate(visual_assets) - ) - - html_template = """ -
-
-
-
-
-
{{field:eyebrow}}
-

{{field:title}}

-

{{field:summary}}

- """ + ( - """ -
    {{list:key_points}}
-""" - if has_visual - else "" - ) + """ -
- """ + ( - """ -
-""" + visual_markup + """ -
-""" - if has_visual - else """ -
-
{{field:points_label}}
-
    {{list:key_points}}
-
-""" - ) + """ -
- -
-
-""".strip() - - css_code = f""" -.slide-root {{ - width: 100%; - height: 100%; - background: - radial-gradient(circle at top right, {palette["secondary"]}33 0%, transparent 28%), - radial-gradient(circle at bottom left, {palette["accent"]}22 0%, transparent 32%), - {palette["bg"]}; - color: {palette["text"]}; - overflow: hidden; -}} -.slide-root * {{ - box-sizing: border-box; -}} -.slide-shell {{ - position: relative; - width: 100%; - height: 100%; - padding: 68px 72px; -}} -.grid-layer {{ - position: absolute; - inset: 0; - background-image: - linear-gradient(rgba(148, 163, 184, 0.08) 1px, transparent 1px), - linear-gradient(90deg, rgba(148, 163, 184, 0.08) 1px, transparent 1px); - background-size: 48px 48px; - opacity: 0.22; -}} -.hero {{ - position: relative; - z-index: 1; - display: grid; - grid-template-columns: {'1.08fr 0.92fr' if has_visual else '1.5fr 0.95fr'}; - gap: 28px; - height: calc(100% - 120px); -}} -.hero-copy {{ - display: flex; - flex-direction: column; - justify-content: center; - gap: 20px; -}} -.eyebrow {{ - display: inline-flex; - align-self: flex-start; - padding: 8px 14px; - border-radius: 999px; - background: {palette["secondary"]}22; - border: 1px solid {palette["primary"]}55; - color: {palette["primary"]}; - font-size: {int(typography.get("eyebrow_size") or 18)}px; - font-weight: 700; - letter-spacing: 0.08em; - text-transform: uppercase; -}} -.title {{ - margin: 0; - max-width: 880px; - font-size: {int(typography.get("title_size") or 56)}px; - line-height: 1.04; - letter-spacing: -0.04em; - font-family: {typography.get("title_font_stack") or 'Georgia, "Times New Roman", serif'}; -}} -.summary {{ - margin: 0; - max-width: 840px; - font-size: {int(typography.get("summary_size") or 26)}px; - line-height: 1.42; - color: {palette["muted"]}; - white-space: pre-wrap; - font-family: {typography.get("body_font_stack") or '"Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif'}; -}} -.stat-card, .takeaway-card, .visual-card {{ - border-radius: 28px; - border: 1px solid {palette["primary"]}30; - background: {palette["panel"]}; - box-shadow: 0 30px 60px rgba(15, 23, 42, 0.35); - backdrop-filter: blur(10px); -}} -.stat-card {{ - align-self: center; - padding: 28px; -}} -.visual-card {{ - padding: 18px; - min-height: 420px; - display: flex; - flex-direction: column; - gap: 14px; -}} -.visual-card.visual-card-grid {{ - justify-content: stretch; -}} -.visual-shell {{ - width: 100%; - height: 100%; - min-height: 384px; - border-radius: 22px; - overflow: hidden; -}} -.visual-card.visual-card-grid .visual-shell {{ - flex: 1 1 0; - min-height: 160px; -}} -.visual-card.visual-card-grid .visual-shell-1 {{ - min-height: 236px; -}} -.card-label, .takeaway-label {{ - font-size: {int(typography.get("eyebrow_size") or 18)}px; - letter-spacing: 0.08em; - text-transform: uppercase; - color: {palette["primary"]}; - margin-bottom: 14px; - font-weight: 700; -}} -.bullet-list {{ - margin: 0; - padding-left: 26px; - display: grid; - gap: 14px; - font-size: {int(typography.get("body_size") or 24)}px; - line-height: 1.35; - font-family: {typography.get("body_font_stack") or '"Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif'}; -}} -.bullet-list li {{ - color: {palette["text"]}; -}} -.bullet-list.compact {{ - max-width: 720px; - gap: 10px; - font-size: {max(18, int(typography.get("body_size") or 24) - 2)}px; -}} -.footer-row {{ - position: relative; - z-index: 1; - display: grid; - grid-template-columns: 1.4fr auto; - align-items: end; - gap: 18px; -}} -.takeaway-card {{ - padding: 24px 28px; -}} -.takeaway-text {{ - margin: 0; - font-size: {int(typography.get("body_size") or 24)}px; - line-height: 1.4; - color: {palette["text"]}; - white-space: pre-wrap; - font-family: {typography.get("body_font_stack") or '"Segoe UI", "PingFang SC", "Microsoft YaHei", sans-serif'}; -}} -.footer-tag {{ - display: inline-flex; - align-items: center; - justify-content: center; - min-width: 220px; - padding: 14px 18px; - border-radius: 999px; - border: 1px solid {palette["accent"]}55; - color: {palette["accent"]}; - font-size: {int(typography.get("eyebrow_size") or 18)}px; - font-weight: 700; - background: rgba(15, 23, 42, 0.45); -}} -""".strip() - - editable_fields = [ - { - "key": "eyebrow", - "label": "Eyebrow", - "type": "text", - "value": eyebrow, - "items": [], - }, - { - "key": "title", - "label": "Title", - "type": "text", - "value": str(outline_item.get("title") or f"Slide {slide_index + 1}"), - "items": [], - }, - { - "key": "summary", - "label": "Summary", - "type": "textarea", - "value": summary, - "items": [], - }, - { - "key": "key_points", - "label": "Key Points", - "type": "list", - "value": "", - "items": key_points or ["Summarize the page content here"], - }, - { - "key": "takeaway_label", - "label": "Takeaway Label", - "type": "text", - "value": "Takeaway", - "items": [], - }, - { - "key": "takeaway", - "label": "Takeaway", - "type": "textarea", - "value": takeaway, - "items": [], - }, - { - "key": "footer", - "label": "Footer", - "type": "text", - "value": str(theme.get("footer_text") or "Paper2Any Frontend PPT"), - "items": [], - }, - ] - if not has_visual: - editable_fields.insert( - 3, - { - "key": "points_label", - "label": "Points Label", - "type": "text", - "value": "Key Points", - "items": [], - }, - ) - - return { - "slide_id": str(slide_index + 1), - "page_num": slide_index + 1, + layout_type = self._choose_fallback_layout_type( + outline_item=outline_item, + slide_index=slide_index, + theme=theme, + visual_assets=visual_assets, + ) + content = { + "eyebrow": eyebrow, "title": str(outline_item.get("title") or f"Slide {slide_index + 1}"), - "html_template": html_template, - "css_code": css_code, - "editable_fields": editable_fields, - "visual_assets": visual_assets, - "generation_note": "Built-in fallback template", - "status": "done", + "summary": summary, + "bullets": key_points or ["Summarize the page content here"], + "takeaway": takeaway, + "footer": str(theme.get("footer_text") or "Paper2Any Structured PPT"), + "visual_caption": "Supporting visual", } + slide = self._build_structured_slide( + layout_type=layout_type, + content=content, + outline_item=outline_item, + slide_index=slide_index, + slide_count=slide_count, + theme=theme, + visual_assets=visual_assets, + generation_note="Built-in fallback structured slide", + ) + slide["generation_note"] = "Built-in fallback structured slide" + return slide def _sanitize_html_template(self, html_template: str) -> str: cleaned = re.sub(r"<\s*/?\s*(html|head|body)\b[^>]*>", "", html_template, flags=re.IGNORECASE) @@ -2567,13 +2687,9 @@ def _replace_field(token_match: re.Match[str]) -> str: if field is None: return "" if str(field.get("type") or "") == "list": - raw_value = " • ".join( - str(item).strip() - for item in (field.get("items") or []) - if str(item).strip() - ) + raw_value = " • ".join(self._normalize_outline_points(field.get("items"), limit=12, item_limit=180)) else: - raw_value = str(field.get("value") or "") + raw_value = self._extract_outline_text(field.get("value")) return html.escape(" ".join(raw_value.split()), quote=True) next_value = re.sub(r"\{\{field:([a-zA-Z0-9_]+)\}\}", _replace_field, next_value) diff --git a/fastapi_app/services/paper2ppt_service.py b/fastapi_app/services/paper2ppt_service.py index f2747960..086af256 100644 --- a/fastapi_app/services/paper2ppt_service.py +++ b/fastapi_app/services/paper2ppt_service.py @@ -98,13 +98,16 @@ import copy import hashlib +import json import os from pathlib import Path from typing import Any, Dict, List, Optional from fastapi import HTTPException, Request, UploadFile +import httpx from PIL import Image, ImageOps, UnidentifiedImageError +from fastapi_app.config import settings from fastapi_app.schemas import ( FullPipelineRequest, OutlineRefineRequest, @@ -114,6 +117,7 @@ from fastapi_app.services.managed_api_service import ( resolve_image_generation_credentials, resolve_llm_credentials, + resolve_model_name, ) from fastapi_app.utils import ( _to_outputs_url, @@ -122,12 +126,13 @@ ) from fastapi_app.workflow_adapters.wa_paper2ppt import ( run_paper2page_content_wf_api, - run_paper2page_content_refine_wf_api, run_paper2ppt_full_pipeline, run_paper2ppt_wf_api, ) +from dataflow_agent.promptstemplates import PromptsTemplateGenerator from dataflow_agent.logger import get_logger from dataflow_agent.utils import get_project_root +from dataflow_agent.utils_common import robust_parse_json log = get_logger(__name__) @@ -138,6 +143,10 @@ _PREVIEW_JPEG_QUALITY = 82 _PIL_RESAMPLING = getattr(Image, "Resampling", Image) _PIL_LANCZOS = _PIL_RESAMPLING.LANCZOS +_OUTLINE_PATCH_CHUNK_SIZE = 4 +_OUTLINE_PLAN_SOURCE_CHAR_LIMIT = 12000 +_OUTLINE_PLAN_PAGE_DIGEST_LIMIT = 22000 +_OUTLINE_REWRITE_SOURCE_CHAR_LIMIT = 9000 class Paper2PPTService: @@ -234,7 +243,11 @@ async def get_page_content( credential_scope=credential_scope, chat_api_key=resolved_api_key, api_key=resolved_api_key, - model=req.model, + model=resolve_model_name( + req.model, + managed_default=settings.PAPER2PPT_OUTLINE_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_MODEL, + ), gen_fig_model="", input_type=wf_input_type, input_content=wf_input_content, @@ -249,6 +262,21 @@ async def get_page_content( resp_model = await run_paper2page_content_wf_api(p2ppt_req, result_path=run_dir) resp_dict = resp_model.model_dump() + resp_dict["pagecontent"] = self._normalize_pagecontent_items(resp_dict.get("pagecontent", [])) + if not resp_dict["pagecontent"]: + backend_error = str(resp_dict.get("error") or "").strip() + if backend_error: + raise HTTPException(status_code=502, detail=backend_error) + raw_text = (req.text or "").strip() + if str(req.input_type).lower() == "text" and use_long_paper_bool and req.page_count > 20: + raise HTTPException( + status_code=400, + detail=( + f"当前为文本模式,输入内容仅 {len(raw_text)} 个字符,不足以稳定生成 {req.page_count} 页长文大纲。" + "请提供更完整的正文,或改用 Topic 模式。" + ), + ) + raise HTTPException(status_code=502, detail="后端未生成有效大纲,请稍后重试") if request is not None: resp_dict["pagecontent"] = self._convert_pagecontent_paths_to_urls( resp_dict.get("pagecontent", []), request, resp_model.result_path @@ -286,7 +314,11 @@ async def refine_outline( credential_scope=credential_scope, chat_api_key=resolved_api_key, api_key=resolved_api_key, - model=req.model, + model=resolve_model_name( + req.model, + managed_default=settings.PAPER2PPT_OUTLINE_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_MODEL, + ), gen_fig_model="", input_type="TEXT", input_content="", @@ -299,20 +331,25 @@ async def refine_outline( if req.result_path: result_root = self.resolve_result_path(req.result_path) - resp_model = await run_paper2page_content_refine_wf_api( - p2ppt_req, + source_text, mineru_output = self._load_outline_refine_source_context(result_root) + refined_pagecontent = await self._refine_outline_with_patch_plan( + req=p2ppt_req, pagecontent=pc, outline_feedback=req.outline_feedback, - result_path=result_root, + source_text=source_text, + mineru_output=mineru_output, ) - - resp_dict = resp_model.model_dump() + resp_dict: Dict[str, Any] = { + "success": True, + "pagecontent": self._normalize_pagecontent_items(refined_pagecontent), + "result_path": str(result_root) if result_root is not None else (req.result_path or ""), + } if request is not None: resp_dict["pagecontent"] = self._convert_pagecontent_paths_to_urls( - resp_dict.get("pagecontent", []), request, resp_model.result_path + resp_dict.get("pagecontent", []), request, resp_dict.get("result_path") ) - if request is not None: - resp_dict["all_output_files"] = self._collect_output_files_as_urls(resp_model.result_path, request) + if request is not None and resp_dict.get("result_path"): + resp_dict["all_output_files"] = self._collect_output_files_as_urls(resp_dict["result_path"], request) else: resp_dict["all_output_files"] = [] @@ -381,8 +418,16 @@ async def generate_ppt( api_key=resolved_api_key, image_api_url=resolved_image_api_url, image_api_key=resolved_image_api_key, - model=req.model, - gen_fig_model=req.img_gen_model_name, + model=resolve_model_name( + req.model, + managed_default=settings.PAPER2PPT_CONTENT_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_MODEL, + ), + gen_fig_model=resolve_model_name( + req.img_gen_model_name, + managed_default=settings.PAPER2PPT_IMAGE_GEN_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_IMAGE_MODEL, + ), input_type="PDF", input_content="", aspect_ratio=req.aspect_ratio, @@ -446,8 +491,16 @@ async def run_full_pipeline( api_key=resolved_api_key, image_api_url=resolved_image_api_url, image_api_key=resolved_image_api_key, - model=req.model, - gen_fig_model=req.img_gen_model_name, + model=resolve_model_name( + req.model, + managed_default=settings.PAPER2PPT_CONTENT_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_MODEL, + ), + gen_fig_model=resolve_model_name( + req.img_gen_model_name, + managed_default=settings.PAPER2PPT_IMAGE_GEN_MODEL, + fallback_default=settings.PAPER2PPT_DEFAULT_IMAGE_MODEL, + ), input_type=wf_input_type, input_content=wf_input_content, aspect_ratio=req.aspect_ratio, @@ -833,8 +886,6 @@ def _collect_output_files_as_urls(self, result_path: str, request: Request) -> l def _parse_pagecontent_json(self, pagecontent_json: str) -> List[Dict[str, Any]]: try: - import json - obj = json.loads(pagecontent_json) except Exception as e: # noqa: BLE001 raise HTTPException(status_code=400, detail=f"invalid pagecontent json: {e}") from e @@ -845,4 +896,568 @@ def _parse_pagecontent_json(self, pagecontent_json: str) -> List[Dict[str, Any]] for i, it in enumerate(obj): if not isinstance(it, dict): raise HTTPException(status_code=400, detail=f"pagecontent[{i}] must be an object(dict)") - return obj + return self._normalize_pagecontent_items(obj) + + def _extract_outline_text(self, value: Any) -> str: + if value is None: + return "" + if isinstance(value, str): + return " ".join(value.strip().split()) + if isinstance(value, (int, float, bool)): + return str(value) + if isinstance(value, dict): + preferred_keys = ( + "text", + "value", + "content", + "summary", + "title", + "label", + "body", + "description", + "reason", + "point", + ) + for key in preferred_keys: + text = self._extract_outline_text(value.get(key)) + if text: + return text + for item in value.values(): + text = self._extract_outline_text(item) + if text: + return text + return "" + if isinstance(value, list): + parts = [self._extract_outline_text(item) for item in value] + return " ".join(part for part in parts if part) + return " ".join(str(value).strip().split()) + + def _normalize_outline_points(self, value: Any) -> List[str]: + if isinstance(value, list): + items = [self._extract_outline_text(item) for item in value] + else: + items = [self._extract_outline_text(value)] + return [item for item in items if item] + + def _normalize_pagecontent_items(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + normalized: List[Dict[str, Any]] = [] + for raw in items: + item = copy.deepcopy(raw) + if "title" in item: + item["title"] = self._extract_outline_text(item.get("title")) + if "layout_description" in item: + item["layout_description"] = self._extract_outline_text(item.get("layout_description")) + if "key_points" in item: + item["key_points"] = self._normalize_outline_points(item.get("key_points")) + if "asset_ref" in item: + asset_ref = item.get("asset_ref") + if isinstance(asset_ref, list): + normalized_refs = self._normalize_outline_points(asset_ref) + item["asset_ref"] = normalized_refs[0] if normalized_refs else None + else: + normalized_ref = self._extract_outline_text(asset_ref) + item["asset_ref"] = normalized_ref or None + normalized.append(item) + return normalized + + def _load_outline_refine_source_context(self, result_root: Path | None) -> tuple[str, str]: + if result_root is None: + return "", "" + + input_dir = result_root / "input" + text_candidates = [ + input_dir / "input.txt", + input_dir / "input_topic.txt", + ] + source_text = "" + for candidate in text_candidates: + if candidate.exists(): + try: + source_text = candidate.read_text(encoding="utf-8") + if source_text.strip(): + break + except Exception: + continue + + mineru_output = "" + try: + markdown_candidates = list(result_root.glob("*/auto/*.md")) + except Exception: + markdown_candidates = [] + for candidate in markdown_candidates: + try: + mineru_output = candidate.read_text(encoding="utf-8") + if mineru_output.strip(): + break + except Exception: + continue + + return source_text, mineru_output + + async def _refine_outline_with_patch_plan( + self, + *, + req: Any, + pagecontent: List[Dict[str, Any]], + outline_feedback: str, + source_text: str, + mineru_output: str, + ) -> List[Dict[str, Any]]: + original_pages = self._normalize_pagecontent_items(pagecontent) + if not original_pages: + return [] + + plan = await self._plan_outline_refine_operations( + req=req, + pagecontent=original_pages, + outline_feedback=outline_feedback, + source_text=source_text, + mineru_output=mineru_output, + ) + working_pages, rewrite_targets = self._apply_outline_patch_plan(original_pages, plan) + rewritten_pages = await self._rewrite_outline_pages_by_chunks( + req=req, + pagecontent=working_pages, + rewrite_targets=rewrite_targets, + global_instruction=str(plan.get("global_instruction") or "").strip(), + outline_feedback=outline_feedback, + source_text=source_text, + mineru_output=mineru_output, + ) + return self._normalize_pagecontent_items(rewritten_pages) + + async def _plan_outline_refine_operations( + self, + *, + req: Any, + pagecontent: List[Dict[str, Any]], + outline_feedback: str, + source_text: str, + mineru_output: str, + ) -> Dict[str, Any]: + prompt_generator = PromptsTemplateGenerator(output_language=req.language) + outline_digest = self._build_outline_digest(pagecontent, max_chars=_OUTLINE_PLAN_PAGE_DIGEST_LIMIT) + source_excerpt = self._build_source_excerpt( + source_text=source_text, + mineru_output=mineru_output, + max_chars=_OUTLINE_PLAN_SOURCE_CHAR_LIMIT, + ) + system_prompt = prompt_generator.render( + "system_prompt_for_paper2ppt_outline_edit_planner_agent", + language=req.language, + ) + task_prompt = prompt_generator.render( + "task_prompt_for_paper2ppt_outline_edit_planner_agent", + page_count=len(pagecontent), + outline_feedback=outline_feedback, + outline_digest=outline_digest, + source_excerpt=source_excerpt, + language=req.language, + ) + + try: + raw_plan = await self._invoke_json_llm( + chat_api_url=req.chat_api_url, + api_key=req.api_key, + model_name=req.model, + system_prompt=system_prompt, + task_prompt=task_prompt, + max_tokens=4096, + ) + except Exception as exc: + log.warning("[paper2ppt][outline-refine] planner failed: %s", exc) + raw_plan = {} + + return self._normalize_outline_patch_plan( + raw_plan, + page_count=len(pagecontent), + outline_feedback=outline_feedback, + ) + + async def _rewrite_outline_pages_by_chunks( + self, + *, + req: Any, + pagecontent: List[Dict[str, Any]], + rewrite_targets: set[int], + global_instruction: str, + outline_feedback: str, + source_text: str, + mineru_output: str, + ) -> List[Dict[str, Any]]: + normalized_pages = self._normalize_pagecontent_items(pagecontent) + if not normalized_pages: + return [] + + if not rewrite_targets: + return normalized_pages + + prompt_generator = PromptsTemplateGenerator(output_language=req.language) + source_excerpt = self._build_source_excerpt( + source_text=source_text, + mineru_output=mineru_output, + max_chars=_OUTLINE_REWRITE_SOURCE_CHAR_LIMIT, + ) + chunks = self._build_rewrite_chunks(sorted(rewrite_targets), len(normalized_pages), _OUTLINE_PATCH_CHUNK_SIZE) + rewritten_pages = copy.deepcopy(normalized_pages) + + for chunk_start, chunk_end in chunks: + chunk_pages = copy.deepcopy(normalized_pages[chunk_start : chunk_end + 1]) + page_instruction_lines: List[str] = [] + for offset, page in enumerate(chunk_pages, start=chunk_start + 1): + specific_instruction = str(page.pop("_patch_instruction", "") or "").strip() + if specific_instruction: + page_instruction_lines.append(f"- Page {offset}: {specific_instruction}") + previous_title = rewritten_pages[chunk_start - 1]["title"] if chunk_start > 0 else "" + next_title = rewritten_pages[chunk_end + 1]["title"] if chunk_end + 1 < len(rewritten_pages) else "" + + system_prompt = prompt_generator.render( + "system_prompt_for_paper2ppt_outline_patch_rewriter_agent", + language=req.language, + ) + task_prompt = prompt_generator.render( + "task_prompt_for_paper2ppt_outline_patch_rewriter_agent", + page_count=len(chunk_pages), + chunk_start=chunk_start + 1, + chunk_end=chunk_end + 1, + outline_feedback=outline_feedback, + global_instruction=global_instruction or outline_feedback, + page_specific_instructions="\n".join(page_instruction_lines) or "- None", + previous_title=previous_title or "None", + next_title=next_title or "None", + source_excerpt=source_excerpt, + pagecontent=json.dumps(chunk_pages, ensure_ascii=False, indent=2), + language=req.language, + ) + + try: + raw_rewrite = await self._invoke_json_llm( + chat_api_url=req.chat_api_url, + api_key=req.api_key, + model_name=req.model, + system_prompt=system_prompt, + task_prompt=task_prompt, + max_tokens=4096, + ) + except Exception as exc: + log.warning( + "[paper2ppt][outline-refine] chunk rewrite failed for %s-%s: %s", + chunk_start + 1, + chunk_end + 1, + exc, + ) + continue + + if not isinstance(raw_rewrite, list) or len(raw_rewrite) != len(chunk_pages): + log.warning( + "[paper2ppt][outline-refine] chunk rewrite length mismatch for %s-%s, keep original", + chunk_start + 1, + chunk_end + 1, + ) + continue + + normalized_chunk = self._normalize_pagecontent_items( + [item for item in raw_rewrite if isinstance(item, dict)] + ) + if len(normalized_chunk) != len(chunk_pages): + log.warning( + "[paper2ppt][outline-refine] chunk rewrite invalid payload for %s-%s, keep original", + chunk_start + 1, + chunk_end + 1, + ) + continue + + merged_chunk: List[Dict[str, Any]] = [] + for original_page, rewritten_page in zip(chunk_pages, normalized_chunk): + merged_page = copy.deepcopy(original_page) + merged_page["title"] = rewritten_page.get("title") or original_page.get("title") or "" + merged_page["layout_description"] = ( + rewritten_page.get("layout_description") + or original_page.get("layout_description") + or "" + ) + rewritten_points = self._normalize_outline_points(rewritten_page.get("key_points", [])) + merged_page["key_points"] = rewritten_points or original_page.get("key_points") or [] + if rewritten_page.get("asset_ref") is not None: + merged_page["asset_ref"] = rewritten_page.get("asset_ref") + merged_chunk.append(merged_page) + + rewritten_pages[chunk_start : chunk_end + 1] = merged_chunk + + for page in rewritten_pages: + if isinstance(page, dict): + page.pop("_origin_page_number", None) + page.pop("_patch_instruction", None) + return rewritten_pages + + def _normalize_outline_patch_plan( + self, + raw_plan: Any, + *, + page_count: int, + outline_feedback: str, + ) -> Dict[str, Any]: + base_plan: Dict[str, Any] = { + "global_instruction": outline_feedback.strip(), + "apply_global_rewrite": True, + "operations": [], + } + if not isinstance(raw_plan, dict): + return base_plan + + global_instruction = self._extract_outline_text(raw_plan.get("global_instruction")) or outline_feedback.strip() + apply_global_rewrite = bool(raw_plan.get("apply_global_rewrite")) + operations: List[Dict[str, Any]] = [] + raw_operations = raw_plan.get("operations") + if isinstance(raw_operations, list): + for raw_operation in raw_operations: + if not isinstance(raw_operation, dict): + continue + op_type = str(raw_operation.get("type") or "").strip().lower() + if op_type not in {"update", "delete", "insert_after", "move"}: + continue + normalized_op: Dict[str, Any] = {"type": op_type} + if op_type in {"update", "delete", "move"}: + page_numbers = self._normalize_plan_page_numbers(raw_operation.get("page_numbers"), page_count) + if not page_numbers: + continue + normalized_op["page_numbers"] = page_numbers + if op_type == "update": + instruction = self._extract_outline_text(raw_operation.get("instruction")) + if not instruction: + continue + normalized_op["instruction"] = instruction + if op_type == "insert_after": + after_page = self._normalize_single_page_number(raw_operation.get("page_number"), page_count) + count = self._normalize_insert_count(raw_operation.get("count")) + instruction = self._extract_outline_text(raw_operation.get("instruction")) + if after_page is None or count <= 0 or not instruction: + continue + normalized_op["page_number"] = after_page + normalized_op["count"] = count + normalized_op["instruction"] = instruction + if op_type == "move": + after_page = self._normalize_single_page_number(raw_operation.get("after_page_number"), page_count) + if after_page is None: + continue + normalized_op["after_page_number"] = after_page + operations.append(normalized_op) + + if operations: + explicit_restructure = any(op["type"] in {"insert_after", "delete", "move"} for op in operations) + apply_global_rewrite = bool(raw_plan.get("apply_global_rewrite")) if raw_plan.get("apply_global_rewrite") is not None else not explicit_restructure + + return { + "global_instruction": global_instruction, + "apply_global_rewrite": apply_global_rewrite or not operations, + "operations": operations, + } + + def _normalize_plan_page_numbers(self, value: Any, page_count: int) -> List[int]: + if not isinstance(value, list): + return [] + numbers: set[int] = set() + for item in value: + normalized = self._normalize_single_page_number(item, page_count) + if normalized is not None: + numbers.add(normalized) + return sorted(numbers) + + def _normalize_single_page_number(self, value: Any, page_count: int) -> Optional[int]: + try: + number = int(str(value).strip()) + except Exception: + return None + if 1 <= number <= page_count: + return number + return None + + def _normalize_insert_count(self, value: Any) -> int: + try: + count = int(str(value).strip()) + except Exception: + return 0 + return max(0, min(count, 8)) + + def _apply_outline_patch_plan( + self, + pagecontent: List[Dict[str, Any]], + plan: Dict[str, Any], + ) -> tuple[List[Dict[str, Any]], set[int]]: + working_pages: List[Dict[str, Any]] = [] + for idx, page in enumerate(self._normalize_pagecontent_items(pagecontent), start=1): + page_copy = copy.deepcopy(page) + page_copy["_origin_page_number"] = idx + working_pages.append(page_copy) + + rewrite_targets: set[int] = set() + for operation in plan.get("operations", []): + op_type = operation.get("type") + if op_type == "delete": + delete_numbers = set(operation.get("page_numbers", [])) + working_pages = [ + page for page in working_pages + if page.get("_origin_page_number") not in delete_numbers + ] + continue + + if op_type == "move": + move_numbers = operation.get("page_numbers", []) + after_page_number = operation.get("after_page_number") + moving_pages = [ + page for page in working_pages + if page.get("_origin_page_number") in move_numbers + ] + if not moving_pages: + continue + working_pages = [ + page for page in working_pages + if page.get("_origin_page_number") not in move_numbers + ] + insert_index = self._find_insert_index_after_origin(working_pages, after_page_number) + working_pages[insert_index:insert_index] = moving_pages + continue + + if op_type == "insert_after": + insert_after = operation.get("page_number") + insert_index = self._find_insert_index_after_origin(working_pages, insert_after) + for offset in range(operation.get("count", 0)): + placeholder = { + "title": "", + "layout_description": "", + "key_points": [], + "asset_ref": None, + "_origin_page_number": None, + "_patch_instruction": operation.get("instruction", ""), + } + working_pages.insert(insert_index + offset, placeholder) + rewrite_targets.add(insert_index + offset) + continue + + if op_type == "update": + update_numbers = set(operation.get("page_numbers", [])) + for idx, page in enumerate(working_pages): + if page.get("_origin_page_number") in update_numbers: + existing_instruction = str(page.get("_patch_instruction") or "").strip() + new_instruction = str(operation.get("instruction") or "").strip() + if existing_instruction and new_instruction: + page["_patch_instruction"] = f"{existing_instruction}\n{new_instruction}" + elif new_instruction: + page["_patch_instruction"] = new_instruction + rewrite_targets.add(idx) + + if plan.get("apply_global_rewrite"): + rewrite_targets.update(range(len(working_pages))) + + return working_pages, rewrite_targets + + def _find_insert_index_after_origin(self, pages: List[Dict[str, Any]], after_page_number: Optional[int]) -> int: + if after_page_number is None: + return len(pages) + for idx, page in enumerate(pages): + if page.get("_origin_page_number") == after_page_number: + return idx + 1 + return len(pages) + + def _build_rewrite_chunks(self, target_indices: List[int], total_pages: int, max_chunk_size: int) -> List[tuple[int, int]]: + if not target_indices or total_pages <= 0: + return [] + + chunks: List[tuple[int, int]] = [] + group_start = target_indices[0] + previous = target_indices[0] + for index in target_indices[1:]: + group_size = previous - group_start + 1 + if index != previous + 1 or group_size >= max_chunk_size: + chunks.append((group_start, previous)) + group_start = index + previous = index + chunks.append((group_start, previous)) + return chunks + + def _build_outline_digest(self, pagecontent: List[Dict[str, Any]], *, max_chars: int) -> str: + parts: List[str] = [] + current_len = 0 + for idx, page in enumerate(pagecontent, start=1): + bullet_preview = "; ".join((page.get("key_points") or [])[:3]) + line = ( + f"Page {idx}: title={page.get('title') or ''} | " + f"layout={page.get('layout_description') or ''} | " + f"bullets={bullet_preview}" + ).strip() + if not line: + continue + if current_len + len(line) > max_chars: + break + parts.append(line) + current_len += len(line) + 1 + return "\n".join(parts) + + def _build_source_excerpt( + self, + *, + source_text: str, + mineru_output: str, + max_chars: int, + ) -> str: + combined = (source_text or "").strip() + if mineru_output and mineru_output.strip(): + combined = f"{combined}\n\n{mineru_output.strip()}".strip() + if len(combined) <= max_chars: + return combined + return combined[:max_chars] + + async def _invoke_json_llm( + self, + *, + chat_api_url: str, + api_key: str, + model_name: str, + system_prompt: str, + task_prompt: str, + max_tokens: int = 4096, + ) -> Any: + base_url = str(chat_api_url or "").rstrip("/") + if not base_url: + raise ValueError("chat_api_url is required for outline refine") + endpoint = f"{base_url}/chat/completions" + + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + payload = { + "model": model_name, + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": task_prompt}, + ], + "temperature": 0.0, + "max_tokens": max_tokens, + } + + proxy = None + for key in ("HTTPS_PROXY", "https_proxy", "HTTP_PROXY", "http_proxy"): + value = str(os.getenv(key) or "").strip() + if value.startswith(("http://", "https://")): + proxy = value + break + + client_kwargs: Dict[str, Any] = { + "timeout": httpx.Timeout(300.0), + "trust_env": False, + } + if proxy: + client_kwargs["proxy"] = proxy + + async with httpx.AsyncClient(**client_kwargs) as client: + response = await client.post(endpoint, headers=headers, json=payload) + response.raise_for_status() + body = response.json() + + raw_text = self._extract_outline_text( + (((body.get("choices") or [{}])[0]).get("message") or {}).get("content") + ) + if not raw_text.strip(): + raise ValueError("LLM returned empty content for outline refine") + return robust_parse_json(raw_text, strip_double_braces=True) diff --git a/fastapi_app/services/paper2video_service.py b/fastapi_app/services/paper2video_service.py index 4d7c2182..fc8a9298 100644 --- a/fastapi_app/services/paper2video_service.py +++ b/fastapi_app/services/paper2video_service.py @@ -22,8 +22,9 @@ from fastapi import HTTPException, Request, UploadFile +from fastapi_app.config import settings from fastapi_app.schemas import GenerateSubtitleResponse, GenerateVideoResponse -from fastapi_app.services.managed_api_service import resolve_llm_credentials +from fastapi_app.services.managed_api_service import resolve_llm_credentials, resolve_model_name from fastapi_app.utils import _to_outputs_url, get_outputs_root, resolve_outputs_path from fastapi_app.workflow_adapters.wa_paper2video import ( run_paper2video_generate_subtitle_wf_api, @@ -141,6 +142,16 @@ async def run_generate_subtitle( api_key, scope="paper2video", ) + model = resolve_model_name( + model, + managed_default=settings.PAPER2VIDEO_DEFAULT_MODEL, + fallback_default="gpt-4o", + ) + tts_model = resolve_model_name( + tts_model, + managed_default=settings.PAPER2VIDEO_TTS_MODEL, + fallback_default="cosyvoice-v3-flash", + ) run_dir = self._create_timestamp_run_dir(email) input_dir = run_dir / "input" @@ -175,6 +186,11 @@ async def run_generate_subtitle( else: pdf_path = input_path log.info("[Paper2VideoService] using PDF for workflow: %s", pdf_path) + talking_model = resolve_model_name( + talking_model, + managed_default=settings.PAPER2VIDEO_TALKING_MODEL, + fallback_default="liveportrait", + ) or "liveportrait" talking_model = self._normalize_talking_model(talking_model) # 可选:数字人头像(上传文件优先;否则使用系统预设 avatar_preset) diff --git a/fastapi_app/services/pdf2ppt_service.py b/fastapi_app/services/pdf2ppt_service.py index fb689ba8..c38448c5 100644 --- a/fastapi_app/services/pdf2ppt_service.py +++ b/fastapi_app/services/pdf2ppt_service.py @@ -6,11 +6,13 @@ import fitz from fastapi import File, UploadFile, HTTPException +from fastapi_app.config import settings from fastapi_app.schemas import Paper2PPTRequest from fastapi_app.interprocess_lock import AsyncInterProcessSemaphore from fastapi_app.services.managed_api_service import ( resolve_image_generation_credentials, resolve_llm_credentials, + resolve_model_name, ) from fastapi_app.workflow_adapters.wa_pdf2ppt import run_pdf2ppt_wf_api from dataflow_agent.utils import get_project_root @@ -73,6 +75,14 @@ async def generate_ppt( api_key, scope="pdf2ppt", ) + model = resolve_model_name( + model, + managed_default=settings.PDF2PPT_DEFAULT_MODEL, + ) + gen_fig_model = resolve_model_name( + gen_fig_model, + managed_default=settings.PDF2PPT_DEFAULT_IMAGE_MODEL, + ) # 0.5 如果启用 AI 增强,必须校验 API 配置 if use_ai_edit: if not resolved_chat_api_url or not resolved_api_key: diff --git a/fastapi_app/services/rebuttal_service.py b/fastapi_app/services/rebuttal_service.py index 547691b0..d9b64acb 100644 --- a/fastapi_app/services/rebuttal_service.py +++ b/fastapi_app/services/rebuttal_service.py @@ -19,7 +19,8 @@ _fix_json_escapes, ) from dataflow_agent.logger import get_logger -from fastapi_app.services.managed_api_service import resolve_llm_credentials +from fastapi_app.config import settings +from fastapi_app.services.managed_api_service import resolve_llm_credentials, resolve_model_name _CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -54,6 +55,11 @@ def init_llm_client(api_key: str, chat_api_url: str = None, provider: str = None """ global llm_client chat_api_url, api_key = resolve_llm_credentials(chat_api_url, api_key, scope="paper2rebuttal") + model = resolve_model_name( + model, + managed_default=settings.PAPER2REBUTTAL_DEFAULT_MODEL, + fallback_default="gpt-4o", + ) if not chat_api_url: raise ValueError("chat_api_url is required; URL and API key are passed from frontend.") diff --git a/fastapi_app/workflow_adapters/wa_paper2ppt.py b/fastapi_app/workflow_adapters/wa_paper2ppt.py index f9254195..ebbf0595 100644 --- a/fastapi_app/workflow_adapters/wa_paper2ppt.py +++ b/fastapi_app/workflow_adapters/wa_paper2ppt.py @@ -248,6 +248,15 @@ async def run_paper2page_content_wf_api(req: Paper2PPTRequest, result_path: Path final_state: Paper2FigureState = await run_workflow(workflow_name, state) # 提取结果 pagecontent = _state_get(final_state, "pagecontent", []) or [] + outline_generation_error = _state_get(final_state, "outline_generation_error", "") or "" + if not isinstance(pagecontent, list): + log.warning( + "[paper2page_content_wf_api] invalid pagecontent payload type=%s, coercing to empty list", + type(pagecontent).__name__, + ) + if not outline_generation_error and isinstance(pagecontent, dict): + outline_generation_error = _state_get(final_state, "error", "") or pagecontent.get("error", "") + pagecontent = [] log.critical(f"[paper2page_content_wf_api] pagecontent={pagecontent}") result_path = _state_get(final_state, "result_path", "") or str(result_root) @@ -256,6 +265,7 @@ async def run_paper2page_content_wf_api(req: Paper2PPTRequest, result_path: Path "success": True, "pagecontent": pagecontent, "result_path": result_path, + "error": outline_generation_error, } return Paper2PPTResponse(**resp_data) diff --git a/fastapi_app/workflow_adapters/wa_pdf2ppt.py b/fastapi_app/workflow_adapters/wa_pdf2ppt.py index 771ce59c..1659d1d6 100644 --- a/fastapi_app/workflow_adapters/wa_pdf2ppt.py +++ b/fastapi_app/workflow_adapters/wa_pdf2ppt.py @@ -7,8 +7,7 @@ - 调用 dataflow_agent.workflow.run_workflow("pdf2ppt_with_sam", state) - 输出:生成的 PPT 路径 -当前直接复用 Paper2FigureState / Paper2FigureRequest, -逻辑与 tests/test_pdf2ppt.py 中保持一致。 +当前直接复用 Paper2FigureState / Paper2FigureRequest。 """ from pathlib import Path diff --git a/frontend-workflow/.env.example b/frontend-workflow/.env.example index eec91647..0215334c 100644 --- a/frontend-workflow/.env.example +++ b/frontend-workflow/.env.example @@ -1,6 +1,9 @@ # =========================================== -# Internal API Configuration +# Frontend Example (Advanced / Fine-grained) # =========================================== +# 如果你只想要少量默认源和少量模型,请优先使用: +# frontend-workflow/.env.simple.example +# 当前这个 .env.example 是“细粒度 / 高级模式”示例。 # 本文件只负责前端可见配置。 # 不要把后端业务 key(例如 OCR / MinerU / Supabase service role / 各类第三方服务密钥) # 再重复写进 deploy profile。 diff --git a/frontend-workflow/.env.simple.example b/frontend-workflow/.env.simple.example new file mode 100644 index 00000000..e02a6876 --- /dev/null +++ b/frontend-workflow/.env.simple.example @@ -0,0 +1,40 @@ +# =========================================== +# Paper2Any Frontend - Simple Mode Example +# =========================================== +# 目标: +# - 前端只展示少量默认源和少量默认模型 +# - 用户不需要理解几十个模型名 + +VITE_API_KEY=your-backend-api-key +VITE_API_BASE_URL= + +# 前端默认展示的主源 +VITE_DEFAULT_LLM_API_URL=http://123.129.219.111:3000/v1 +VITE_LLM_API_URLS=http://123.129.219.111:3000/v1,https://api.ikuncode.cc/v1 +VITE_DEFAULT_LLM_MODEL=gpt-4o +VITE_LLM_VERIFY_TIMEOUT_MS=30000 + +# 各页面只保留少量推荐值 +VITE_PAPER2FIGURE_MODEL_MODEL_ARCH=gemini-3-pro-image-preview +VITE_PAPER2FIGURE_MODEL_EXP_DATA=gemini-3-pro-image-preview +VITE_PAPER2FIGURE_MODEL_TECH_ROUTE=gpt-4o + +VITE_PAPER2PPT_MODEL=gpt-4o,deepseek-v3.2 +VITE_PAPER2PPT_GEN_FIG_MODEL=gemini-3-pro-image-preview + +VITE_PDF2PPT_GEN_FIG_MODEL=gemini-3-pro-image-preview +VITE_IMAGE2PPT_GEN_FIG_MODEL=gemini-3-pro-image-preview +VITE_IMAGE2PPT_USE_AI_EDIT_DEFAULT=false + +VITE_PPT2POLISH_MODEL=gpt-4o,deepseek-v3.2 +VITE_PPT2POLISH_GEN_FIG_MODEL=gemini-3-pro-image-preview + +VITE_PAPER2DRAWIO_MODEL=gpt-4o +VITE_PAPER2DRAWIO_IMAGE_MODEL=gemini-3-pro-image-preview +VITE_IMAGE2DRAWIO_GEN_FIG_MODEL=gemini-3-pro-image-preview +VITE_IMAGE2DRAWIO_VLM_MODEL=qwen-vl-ocr-2025-11-20 + +VITE_PAPER2REBUTTAL_MODEL=gpt-4o + +VITE_SUPABASE_URL=https://your-project.supabase.co +VITE_SUPABASE_ANON_KEY=your-anon-key diff --git a/frontend-workflow/Dockerfile b/frontend-workflow/Dockerfile index 49acbd98..e1f2236c 100644 --- a/frontend-workflow/Dockerfile +++ b/frontend-workflow/Dockerfile @@ -1,18 +1,40 @@ -FROM node:20-alpine AS build +ARG NODE_BASE_IMAGE=node:20-alpine +FROM ${NODE_BASE_IMAGE} AS build + +ARG VITE_API_KEY= +ARG VITE_API_BASE_URL= +ARG VITE_DEFAULT_LLM_API_URL= +ARG VITE_LLM_API_URLS= +ARG VITE_SUPABASE_URL= +ARG VITE_SUPABASE_ANON_KEY= WORKDIR /app COPY frontend-workflow/package.json frontend-workflow/package-lock.json ./ RUN npm ci -# 复制前端源码(包含 .env 文件,Vite build 时会自动读取) +# 复制前端源码;若存在 frontend-workflow/.env,Vite build 也会读取。 COPY frontend-workflow/ ./ +RUN cp -f .env.example .env.production || true && \ + rm -f .env.production.local && \ + touch .env.production.local && \ + if [ -n "$VITE_API_KEY" ]; then echo "VITE_API_KEY=$VITE_API_KEY" >> .env.production.local; fi && \ + if [ -n "$VITE_API_BASE_URL" ]; then echo "VITE_API_BASE_URL=$VITE_API_BASE_URL" >> .env.production.local; fi && \ + if [ -n "$VITE_DEFAULT_LLM_API_URL" ]; then echo "VITE_DEFAULT_LLM_API_URL=$VITE_DEFAULT_LLM_API_URL" >> .env.production.local; fi && \ + if [ -n "$VITE_LLM_API_URLS" ]; then echo "VITE_LLM_API_URLS=$VITE_LLM_API_URLS" >> .env.production.local; fi && \ + if [ -n "$VITE_SUPABASE_URL" ]; then echo "VITE_SUPABASE_URL=$VITE_SUPABASE_URL" >> .env.production.local; fi && \ + if [ -n "$VITE_SUPABASE_ANON_KEY" ]; then echo "VITE_SUPABASE_ANON_KEY=$VITE_SUPABASE_ANON_KEY" >> .env.production.local; fi + RUN npm run build -FROM nginx:alpine +ARG NGINX_BASE_IMAGE=nginx:alpine +FROM ${NGINX_BASE_IMAGE} -COPY frontend-workflow/nginx.conf /etc/nginx/conf.d/default.conf +COPY frontend-workflow/nginx.conf.template /etc/nginx/templates/default.conf.template +COPY frontend-workflow/docker-nginx-start.sh /docker-nginx-start.sh +RUN chmod +x /docker-nginx-start.sh COPY --from=build /app/dist /usr/share/nginx/html EXPOSE 80 +CMD ["/docker-nginx-start.sh"] diff --git a/frontend-workflow/docker-nginx-start.sh b/frontend-workflow/docker-nginx-start.sh new file mode 100644 index 00000000..9b845d09 --- /dev/null +++ b/frontend-workflow/docker-nginx-start.sh @@ -0,0 +1,12 @@ +#!/bin/sh +set -eu + +LISTEN_PORT="${NGINX_LISTEN_PORT:-80}" +BACKEND_URL="${BACKEND_UPSTREAM_URL:-http://paper2any-backend:8000}" + +sed \ + -e "s|__NGINX_LISTEN_PORT__|${LISTEN_PORT}|g" \ + -e "s|__BACKEND_UPSTREAM_URL__|${BACKEND_URL}|g" \ + /etc/nginx/templates/default.conf.template > /etc/nginx/conf.d/default.conf + +exec nginx -g 'daemon off;' diff --git a/frontend-workflow/index.html b/frontend-workflow/index.html index 3da8207b..f3bf7a22 100644 --- a/frontend-workflow/index.html +++ b/frontend-workflow/index.html @@ -2,7 +2,9 @@ - + + + Paper2Any diff --git a/frontend-workflow/nginx.conf b/frontend-workflow/nginx.conf.template similarity index 87% rename from frontend-workflow/nginx.conf rename to frontend-workflow/nginx.conf.template index 86646a2e..c95d7e3f 100644 --- a/frontend-workflow/nginx.conf +++ b/frontend-workflow/nginx.conf.template @@ -1,10 +1,10 @@ server { - listen 80; + listen __NGINX_LISTEN_PORT__; server_name _; client_max_body_size 200m; location /api/ { - proxy_pass http://paper2any-backend:8000; + proxy_pass __BACKEND_UPSTREAM_URL__; proxy_http_version 1.1; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; @@ -16,7 +16,7 @@ server { } location /paper2video/ { - proxy_pass http://paper2any-backend:8000; + proxy_pass __BACKEND_UPSTREAM_URL__; proxy_http_version 1.1; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; @@ -25,7 +25,7 @@ server { } location /outputs/ { - proxy_pass http://paper2any-backend:8000; + proxy_pass __BACKEND_UPSTREAM_URL__; proxy_http_version 1.1; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; diff --git a/frontend-workflow/package-lock.json b/frontend-workflow/package-lock.json index 2589b264..acf7682d 100644 --- a/frontend-workflow/package-lock.json +++ b/frontend-workflow/package-lock.json @@ -15,6 +15,7 @@ "i18next-browser-languagedetector": "^8.2.0", "lucide-react": "^0.294.0", "mermaid": "^10.9.5", + "pptxgenjs": "^4.0.1", "react": "^18.2.0", "react-dom": "^18.2.0", "react-drawio": "^1.0.7", @@ -30,6 +31,7 @@ "autoprefixer": "^10.4.16", "postcss": "^8.4.32", "tailwindcss": "^3.3.6", + "tsx": "^4.20.6", "typescript": "^5.2.2", "vite": "^5.0.8" } @@ -591,6 +593,22 @@ "node": ">=12" } }, + "node_modules/@esbuild/netbsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.7.tgz", + "integrity": "sha512-b6pqtrQdigZBwZxAn1UpazEisvwaIDvdbMbmrly7cDTMFnw/+3lVxxCTGOrkPVnsYIosJJXAsILG9XcQS+Yu6w==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, "node_modules/@esbuild/netbsd-x64": { "version": "0.21.5", "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.21.5.tgz", @@ -608,6 +626,22 @@ "node": ">=12" } }, + "node_modules/@esbuild/openbsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.7.tgz", + "integrity": "sha512-AFuojMQTxAz75Fo8idVcqoQWEHIXFRbOc1TrVcFSgCZtQfSdc1RXgB3tjOn/krRHENUB4j00bfGjyl2mJrU37A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, "node_modules/@esbuild/openbsd-x64": { "version": "0.21.5", "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.21.5.tgz", @@ -625,6 +659,22 @@ "node": ">=12" } }, + "node_modules/@esbuild/openharmony-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.7.tgz", + "integrity": "sha512-+KrvYb/C8zA9CU/g0sR6w2RBw7IGc5J2BPnc3dYc5VJxHCSF1yNMxTV5LQ7GuKteQXZtspjFbiuW5/dOj7H4Yw==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "openharmony" + ], + "engines": { + "node": ">=18" + } + }, "node_modules/@esbuild/sunos-x64": { "version": "0.21.5", "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.21.5.tgz", @@ -2262,6 +2312,11 @@ "dev": true, "license": "MIT" }, + "node_modules/core-util-is": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz", + "integrity": "sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==" + }, "node_modules/cose-base": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/cose-base/-/cose-base-1.0.3.tgz", @@ -3002,6 +3057,18 @@ "node": ">=6.9.0" } }, + "node_modules/get-tsconfig": { + "version": "4.13.7", + "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.7.tgz", + "integrity": "sha512-7tN6rFgBlMgpBML5j8typ92BKFi2sFQvIdpAqLA2beia5avZDrMs0FLZiM5etShWq5irVyGcGMEA1jcDaK7A/Q==", + "dev": true, + "dependencies": { + "resolve-pkg-maps": "^1.0.0" + }, + "funding": { + "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + } + }, "node_modules/glob-parent": { "version": "6.0.2", "dev": true, @@ -3089,6 +3156,11 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/https": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/https/-/https-1.0.0.tgz", + "integrity": "sha512-4EC57ddXrkaF0x83Oj8sM6SLQHAWXw90Skqu2M4AEWENZ3F02dFJE/GARA8igO79tcgYqGrD7ae4f5L3um2lgg==" + }, "node_modules/i18next": { "version": "25.7.4", "resolved": "https://registry.npmjs.org/i18next/-/i18next-25.7.4.tgz", @@ -3150,6 +3222,30 @@ "node": ">=0.10.0" } }, + "node_modules/image-size": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/image-size/-/image-size-1.2.1.tgz", + "integrity": "sha512-rH+46sQJ2dlwfjfhCyNx5thzrv+dtmBIhPHk0zgRUukHzZ/kRueTJXoYYsclBaKcSMBWuGbOFXtioLpzTb5euw==", + "dependencies": { + "queue": "6.0.2" + }, + "bin": { + "image-size": "bin/image-size.js" + }, + "engines": { + "node": ">=16.x" + } + }, + "node_modules/immediate": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", + "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==" + }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, "node_modules/inline-style-parser": { "version": "0.2.7", "resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.2.7.tgz", @@ -3273,6 +3369,11 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==" + }, "node_modules/jiti": { "version": "1.21.7", "dev": true, @@ -3307,6 +3408,17 @@ "node": ">=6" } }, + "node_modules/jszip": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", + "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==", + "dependencies": { + "lie": "~3.3.0", + "pako": "~1.0.2", + "readable-stream": "~2.3.6", + "setimmediate": "^1.0.5" + } + }, "node_modules/katex": { "version": "0.16.28", "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.28.tgz", @@ -3352,6 +3464,14 @@ "integrity": "sha512-8h2oVEZNktL4BH2JCOI90iD1yXwL6iNW7KcCKT2QZgQJR2vbqDsldCTPRU9NifTCqHZci57XvQQ15YTu+sTYPg==", "license": "MIT" }, + "node_modules/lie": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", + "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==", + "dependencies": { + "immediate": "~3.0.5" + } + }, "node_modules/lilconfig": { "version": "3.1.3", "dev": true, @@ -5905,6 +6025,11 @@ "node": ">= 6" } }, + "node_modules/pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==" + }, "node_modules/parse-entities": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/parse-entities/-/parse-entities-4.0.2.tgz", @@ -6110,6 +6235,35 @@ "dev": true, "license": "MIT" }, + "node_modules/pptxgenjs": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/pptxgenjs/-/pptxgenjs-4.0.1.tgz", + "integrity": "sha512-TeJISr8wouAuXw4C1F/mC33xbZs/FuEG6nH9FG1Zj+nuPcGMP5YRHl6X+j3HSUnS1f3at6k75ZZXPMZlA5Lj9A==", + "dependencies": { + "@types/node": "^22.8.1", + "https": "^1.0.0", + "image-size": "^1.2.1", + "jszip": "^3.10.1" + } + }, + "node_modules/pptxgenjs/node_modules/@types/node": { + "version": "22.19.17", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.17.tgz", + "integrity": "sha512-wGdMcf+vPYM6jikpS/qhg6WiqSV/OhG+jeeHT/KlVqxYfD40iYJf9/AE1uQxVWFvU7MipKRkRv8NSHiCGgPr8Q==", + "dependencies": { + "undici-types": "~6.21.0" + } + }, + "node_modules/pptxgenjs/node_modules/undici-types": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", + "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==" + }, + "node_modules/process-nextick-args": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", + "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==" + }, "node_modules/property-information": { "version": "7.1.0", "resolved": "https://registry.npmjs.org/property-information/-/property-information-7.1.0.tgz", @@ -6120,6 +6274,14 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/queue": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/queue/-/queue-6.0.2.tgz", + "integrity": "sha512-iHZWu+q3IdFZFX36ro/lKBkSvfkztY5Y7HMiPlOUjhupPcG2JMfst2KKEpu5XndviX/3UhFbRngUPNKtgvtZiA==", + "dependencies": { + "inherits": "~2.0.3" + } + }, "node_modules/queue-microtask": { "version": "1.2.3", "dev": true, @@ -6264,6 +6426,20 @@ "pify": "^2.3.0" } }, + "node_modules/readable-stream": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, "node_modules/readdirp": { "version": "3.6.0", "dev": true, @@ -6843,6 +7019,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/resolve-pkg-maps": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", + "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, + "funding": { + "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" + } + }, "node_modules/reusify": { "version": "1.1.0", "dev": true, @@ -6938,6 +7123,11 @@ "node": ">=6" } }, + "node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, "node_modules/safer-buffer": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", @@ -6959,6 +7149,11 @@ "semver": "bin/semver.js" } }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==" + }, "node_modules/source-map-js": { "version": "1.2.1", "dev": true, @@ -6977,6 +7172,14 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, "node_modules/stringify-entities": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz", @@ -7195,6 +7398,434 @@ "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "license": "0BSD" }, + "node_modules/tsx": { + "version": "4.21.0", + "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz", + "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", + "dev": true, + "dependencies": { + "esbuild": "~0.27.0", + "get-tsconfig": "^4.7.5" + }, + "bin": { + "tsx": "dist/cli.mjs" + }, + "engines": { + "node": ">=18.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.3" + } + }, + "node_modules/tsx/node_modules/@esbuild/aix-ppc64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz", + "integrity": "sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "aix" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/android-arm": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.7.tgz", + "integrity": "sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/android-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz", + "integrity": "sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/android-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.7.tgz", + "integrity": "sha512-x5VpMODneVDb70PYV2VQOmIUUiBtY3D3mPBG8NxVk5CogneYhkR7MmM3yR/uMdITLrC1ml/NV1rj4bMJuy9MCg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/darwin-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.7.tgz", + "integrity": "sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/darwin-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.7.tgz", + "integrity": "sha512-rYnXrKcXuT7Z+WL5K980jVFdvVKhCHhUwid+dDYQpH+qu+TefcomiMAJpIiC2EM3Rjtq0sO3StMV/+3w3MyyqQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/freebsd-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.7.tgz", + "integrity": "sha512-B48PqeCsEgOtzME2GbNM2roU29AMTuOIN91dsMO30t+Ydis3z/3Ngoj5hhnsOSSwNzS+6JppqWsuhTp6E82l2w==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/freebsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.7.tgz", + "integrity": "sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-arm": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.7.tgz", + "integrity": "sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==", + "cpu": [ + "arm" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz", + "integrity": "sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-ia32": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.7.tgz", + "integrity": "sha512-GA48aKNkyQDbd3KtkplYWT102C5sn/EZTY4XROkxONgruHPU72l+gW+FfF8tf2cFjeHaRbWpOYa/uRBz/Xq1Pg==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-loong64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.7.tgz", + "integrity": "sha512-a4POruNM2oWsD4WKvBSEKGIiWQF8fZOAsycHOt6JBpZ+JN2n2JH9WAv56SOyu9X5IqAjqSIPTaJkqN8F7XOQ5Q==", + "cpu": [ + "loong64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-mips64el": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.7.tgz", + "integrity": "sha512-KabT5I6StirGfIz0FMgl1I+R1H73Gp0ofL9A3nG3i/cYFJzKHhouBV5VWK1CSgKvVaG4q1RNpCTR2LuTVB3fIw==", + "cpu": [ + "mips64el" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-ppc64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.7.tgz", + "integrity": "sha512-gRsL4x6wsGHGRqhtI+ifpN/vpOFTQtnbsupUF5R5YTAg+y/lKelYR1hXbnBdzDjGbMYjVJLJTd2OFmMewAgwlQ==", + "cpu": [ + "ppc64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-riscv64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.7.tgz", + "integrity": "sha512-hL25LbxO1QOngGzu2U5xeXtxXcW+/GvMN3ejANqXkxZ/opySAZMrc+9LY/WyjAan41unrR3YrmtTsUpwT66InQ==", + "cpu": [ + "riscv64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-s390x": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.7.tgz", + "integrity": "sha512-2k8go8Ycu1Kb46vEelhu1vqEP+UeRVj2zY1pSuPdgvbd5ykAw82Lrro28vXUrRmzEsUV0NzCf54yARIK8r0fdw==", + "cpu": [ + "s390x" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/linux-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.7.tgz", + "integrity": "sha512-hzznmADPt+OmsYzw1EE33ccA+HPdIqiCRq7cQeL1Jlq2gb1+OyWBkMCrYGBJ+sxVzve2ZJEVeePbLM2iEIZSxA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/netbsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.7.tgz", + "integrity": "sha512-OfatkLojr6U+WN5EDYuoQhtM+1xco+/6FSzJJnuWiUw5eVcicbyK3dq5EeV/QHT1uy6GoDhGbFpprUiHUYggrw==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "netbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/openbsd-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.7.tgz", + "integrity": "sha512-+A1NJmfM8WNDv5CLVQYJ5PshuRm/4cI6WMZRg1by1GwPIQPCTs1GLEUHwiiQGT5zDdyLiRM/l1G0Pv54gvtKIg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "openbsd" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/sunos-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.7.tgz", + "integrity": "sha512-ikktIhFBzQNt/QDyOL580ti9+5mL/YZeUPKU2ivGtGjdTYoqz6jObj6nOMfhASpS4GU4Q/Clh1QtxWAvcYKamA==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "sunos" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/win32-arm64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.7.tgz", + "integrity": "sha512-7yRhbHvPqSpRUV7Q20VuDwbjW5kIMwTHpptuUzV+AA46kiPze5Z7qgt6CLCK3pWFrHeNfDd1VKgyP4O+ng17CA==", + "cpu": [ + "arm64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/win32-ia32": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.7.tgz", + "integrity": "sha512-SmwKXe6VHIyZYbBLJrhOoCJRB/Z1tckzmgTLfFYOfpMAx63BJEaL9ExI8x7v0oAO3Zh6D/Oi1gVxEYr5oUCFhw==", + "cpu": [ + "ia32" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/@esbuild/win32-x64": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.7.tgz", + "integrity": "sha512-56hiAJPhwQ1R4i+21FVF7V8kSD5zZTdHcVuRFMW0hn753vVfQN8xlx4uOPT4xoGH0Z/oVATuR82AiqSTDIpaHg==", + "cpu": [ + "x64" + ], + "dev": true, + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">=18" + } + }, + "node_modules/tsx/node_modules/esbuild": { + "version": "0.27.7", + "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz", + "integrity": "sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==", + "dev": true, + "hasInstallScript": true, + "bin": { + "esbuild": "bin/esbuild" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "@esbuild/aix-ppc64": "0.27.7", + "@esbuild/android-arm": "0.27.7", + "@esbuild/android-arm64": "0.27.7", + "@esbuild/android-x64": "0.27.7", + "@esbuild/darwin-arm64": "0.27.7", + "@esbuild/darwin-x64": "0.27.7", + "@esbuild/freebsd-arm64": "0.27.7", + "@esbuild/freebsd-x64": "0.27.7", + "@esbuild/linux-arm": "0.27.7", + "@esbuild/linux-arm64": "0.27.7", + "@esbuild/linux-ia32": "0.27.7", + "@esbuild/linux-loong64": "0.27.7", + "@esbuild/linux-mips64el": "0.27.7", + "@esbuild/linux-ppc64": "0.27.7", + "@esbuild/linux-riscv64": "0.27.7", + "@esbuild/linux-s390x": "0.27.7", + "@esbuild/linux-x64": "0.27.7", + "@esbuild/netbsd-arm64": "0.27.7", + "@esbuild/netbsd-x64": "0.27.7", + "@esbuild/openbsd-arm64": "0.27.7", + "@esbuild/openbsd-x64": "0.27.7", + "@esbuild/openharmony-arm64": "0.27.7", + "@esbuild/sunos-x64": "0.27.7", + "@esbuild/win32-arm64": "0.27.7", + "@esbuild/win32-ia32": "0.27.7", + "@esbuild/win32-x64": "0.27.7" + } + }, "node_modules/typescript": { "version": "5.9.3", "devOptional": true, @@ -7368,7 +7999,6 @@ }, "node_modules/util-deprecate": { "version": "1.0.2", - "dev": true, "license": "MIT" }, "node_modules/uuid": { diff --git a/frontend-workflow/package.json b/frontend-workflow/package.json index a1b65fce..cbc19892 100644 --- a/frontend-workflow/package.json +++ b/frontend-workflow/package.json @@ -5,7 +5,8 @@ "scripts": { "dev": "vite", "build": "tsc && vite build", - "preview": "vite preview" + "preview": "vite preview", + "export:structured-ppt": "tsx scripts/run_paper2ppt_structured_export_cli.ts" }, "dependencies": { "@supabase/supabase-js": "^2.89.0", @@ -15,6 +16,7 @@ "i18next-browser-languagedetector": "^8.2.0", "lucide-react": "^0.294.0", "mermaid": "^10.9.5", + "pptxgenjs": "^4.0.1", "react": "^18.2.0", "react-dom": "^18.2.0", "react-drawio": "^1.0.7", @@ -30,6 +32,7 @@ "autoprefixer": "^10.4.16", "postcss": "^8.4.32", "tailwindcss": "^3.3.6", + "tsx": "^4.20.6", "typescript": "^5.2.2", "vite": "^5.0.8" } diff --git a/frontend-workflow/public/paper2any-favicon.png b/frontend-workflow/public/paper2any-favicon.png new file mode 100644 index 00000000..a7876acf Binary files /dev/null and b/frontend-workflow/public/paper2any-favicon.png differ diff --git a/frontend-workflow/scripts/run_paper2ppt_structured_export_cli.ts b/frontend-workflow/scripts/run_paper2ppt_structured_export_cli.ts new file mode 100644 index 00000000..8c5e294d --- /dev/null +++ b/frontend-workflow/scripts/run_paper2ppt_structured_export_cli.ts @@ -0,0 +1,206 @@ +#!/usr/bin/env node + +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import path from 'node:path'; +import process from 'node:process'; +import { exportStructuredSlidesToPptx } from '../src/components/paper2ppt/exportStructuredSlides.ts'; +import type { FrontendDeckTheme, FrontendSlide } from '../src/components/paper2ppt/types.ts'; + +interface CliArgs { + slidesJson: string; + themeJson?: string; + output: string; + assetBaseUrl?: string; +} + +const normalizeLayoutData = (layoutData: any) => { + if (!layoutData || typeof layoutData !== 'object') { + return { type: 'bullets', titleKey: 'title' }; + } + return { + ...layoutData, + eyebrowKey: layoutData.eyebrow_key || layoutData.eyebrowKey, + titleKey: layoutData.title_key || layoutData.titleKey, + footerKey: layoutData.footer_key || layoutData.footerKey, + summaryKey: layoutData.summary_key || layoutData.summaryKey, + subtitleKey: layoutData.subtitle_key || layoutData.subtitleKey, + presenterKey: layoutData.presenter_key || layoutData.presenterKey, + quoteKey: layoutData.quote_key || layoutData.quoteKey, + bulletsKey: layoutData.bullets_key || layoutData.bulletsKey, + takeawayKey: layoutData.takeaway_key || layoutData.takeawayKey, + leftHeadingKey: layoutData.left_heading_key || layoutData.leftHeadingKey, + leftBodyKey: layoutData.left_body_key || layoutData.leftBodyKey, + leftPointsKey: layoutData.left_points_key || layoutData.leftPointsKey, + rightHeadingKey: layoutData.right_heading_key || layoutData.rightHeadingKey, + rightBodyKey: layoutData.right_body_key || layoutData.rightBodyKey, + rightPointsKey: layoutData.right_points_key || layoutData.rightPointsKey, + visualKey: layoutData.visual_key || layoutData.visualKey, + visualCaptionKey: layoutData.visual_caption_key || layoutData.visualCaptionKey, + leftTitleKey: layoutData.left_title_key || layoutData.leftTitleKey, + rightTitleKey: layoutData.right_title_key || layoutData.rightTitleKey, + cards: Array.isArray(layoutData.cards) + ? layoutData.cards.map((card: any) => ({ + titleKey: card.title_key || card.titleKey, + bodyKey: card.body_key || card.bodyKey, + })) + : [], + timeline: Array.isArray(layoutData.timeline) + ? layoutData.timeline.map((item: any) => ({ + labelKey: item.label_key || item.labelKey, + bodyKey: item.body_key || item.bodyKey, + })) + : [], + }; +}; + +const normalizeThemeLock = (themeLock: any) => ({ + mustKeep: Array.isArray(themeLock?.must_keep || themeLock?.mustKeep) + ? (themeLock.must_keep || themeLock.mustKeep).map((item: unknown) => String(item || '')).filter(Boolean) + : [], + preferredLayoutPatterns: Array.isArray(themeLock?.preferred_layout_patterns || themeLock?.preferredLayoutPatterns) + ? (themeLock.preferred_layout_patterns || themeLock.preferredLayoutPatterns) + .map((item: unknown) => String(item || '')) + .filter(Boolean) + : [], + componentSignature: String(themeLock?.component_signature || themeLock?.componentSignature || ''), + avoid: Array.isArray(themeLock?.avoid) + ? themeLock.avoid.map((item: unknown) => String(item || '')).filter(Boolean) + : [], +}); + +const normalizeTypography = (typography: any) => ({ + titleFontStack: String(typography?.title_font_stack || typography?.titleFontStack || ''), + bodyFontStack: String(typography?.body_font_stack || typography?.bodyFontStack || ''), + eyebrowSize: Number(typography?.eyebrow_size || typography?.eyebrowSize || 18), + titleSize: Number(typography?.title_size || typography?.titleSize || 56), + summarySize: Number(typography?.summary_size || typography?.summarySize || 26), + bodySize: Number(typography?.body_size || typography?.bodySize || 24), +}); + +const normalizeFrontendSlides = (slides: any[]): FrontendSlide[] => + slides.map((slide: any, index: number) => ({ + slideId: String(slide.slide_id || slide.slideId || index + 1), + pageNum: Number(slide.page_num || slide.pageNum || index + 1), + title: slide.title || `Slide ${index + 1}`, + layoutType: slide.layout_type || slide.layoutType || 'bullets', + layoutData: normalizeLayoutData(slide.layout_data || slide.layoutData || {}), + editableFields: Array.isArray(slide.editable_fields || slide.editableFields) + ? (slide.editable_fields || slide.editableFields).map((field: any) => ({ + key: String(field.key || ''), + label: String(field.label || field.key || ''), + type: field.type === 'list' || field.type === 'textarea' ? field.type : 'text', + value: String(field.value || ''), + items: Array.isArray(field.items) ? field.items.map((item: any) => String(item || '')) : [], + })) + : [], + visualAssets: Array.isArray(slide.visual_assets || slide.visualAssets) + ? (slide.visual_assets || slide.visualAssets).map((asset: any, assetIndex: number) => ({ + key: String(asset.key || `main_visual_${assetIndex + 1}`), + label: String(asset.label || asset.key || `Image ${assetIndex + 1}`), + src: String(asset.src || ''), + previewSrc: String(asset.preview_src || asset.previewSrc || asset.src || ''), + originalSrc: String(asset.original_src || asset.originalSrc || asset.storage_path || asset.storagePath || asset.src || ''), + alt: String(asset.alt || asset.label || asset.key || ''), + sourceType: asset.source_type === 'paper_asset' || asset.sourceType === 'paper_asset' + ? 'paper_asset' + : asset.source_type === 'upload' || asset.sourceType === 'upload' + ? 'upload' + : 'generated', + storagePath: asset.storage_path || asset.storagePath || undefined, + previewStoragePath: asset.preview_storage_path || asset.previewStoragePath || undefined, + prompt: asset.prompt || undefined, + style: asset.style || undefined, + })) + : [], + generationNote: slide.generation_note || slide.generationNote || '', + status: slide.status === 'processing' || slide.status === 'pending' ? slide.status : 'done', + review: { + status: 'idle', + summary: '', + issues: [], + }, + })); + +const normalizeFrontendDeckTheme = (theme: any): FrontendDeckTheme | undefined => { + if (!theme || typeof theme !== 'object') return undefined; + const themeLock = theme.theme_lock || theme.themeLock || {}; + return { + themeName: String(theme.theme_name || theme.themeName || 'locked_deck_theme'), + visualMood: String(theme.visual_mood || theme.visualMood || ''), + styleFamily: String(theme.style_family || theme.styleFamily || 'modern') as FrontendDeckTheme['styleFamily'], + footerText: String(theme.footer_text || theme.footerText || ''), + sectionLabelTemplate: String(theme.section_label_template || theme.sectionLabelTemplate || ''), + palette: { + bg: String(theme.palette?.bg || '#0b1020'), + panel: String(theme.palette?.panel || 'rgba(15, 23, 42, 0.92)'), + primary: String(theme.palette?.primary || '#7dd3fc'), + secondary: String(theme.palette?.secondary || '#38bdf8'), + accent: String(theme.palette?.accent || '#f59e0b'), + text: String(theme.palette?.text || '#e2e8f0'), + muted: String(theme.palette?.muted || '#94a3b8'), + }, + typography: normalizeTypography(theme.typography || {}), + themeLock: normalizeThemeLock(themeLock), + }; +}; + +const usage = () => { + console.log(`Usage: + npm run export:structured-ppt -- --slides-json /path/to/frontend_slides.json --theme-json /path/to/frontend_theme.json --output /path/to/out.pptx [--asset-base-url http://127.0.0.1:8000] +`); +}; + +const parseArgs = (): CliArgs => { + const args = process.argv.slice(2); + const read = (name: string) => { + const index = args.indexOf(name); + if (index === -1 || index + 1 >= args.length) return ''; + return args[index + 1]; + }; + + const slidesJson = read('--slides-json'); + const output = read('--output'); + const themeJson = read('--theme-json') || undefined; + const assetBaseUrl = read('--asset-base-url') || undefined; + + if (!slidesJson || !output) { + usage(); + throw new Error('Missing required --slides-json or --output'); + } + + return { slidesJson, themeJson, output, assetBaseUrl }; +}; + +const main = async () => { + const args = parseArgs(); + const slides = normalizeFrontendSlides(JSON.parse(await readFile(args.slidesJson, 'utf-8'))); + const theme = args.themeJson + ? normalizeFrontendDeckTheme(JSON.parse(await readFile(args.themeJson, 'utf-8'))) + : undefined; + + const result = await exportStructuredSlidesToPptx({ + slides, + deckTheme: theme, + fileName: path.basename(args.output), + assetBaseUrl: args.assetBaseUrl, + outputType: 'nodebuffer', + }); + + if (!('buffer' in result)) { + throw new Error('Expected nodebuffer export result'); + } + + await mkdir(path.dirname(args.output), { recursive: true }); + await writeFile(args.output, Buffer.from(result.buffer)); + console.log(JSON.stringify({ + success: true, + output: path.resolve(args.output), + slide_count: slides.length, + asset_base_url: args.assetBaseUrl || '', + })); +}; + +main().catch((error) => { + console.error(String(error?.stack || error?.message || error)); + process.exit(1); +}); diff --git a/frontend-workflow/src/components/Image2DrawioPage.tsx b/frontend-workflow/src/components/Image2DrawioPage.tsx index 02ff7c38..be021343 100644 --- a/frontend-workflow/src/components/Image2DrawioPage.tsx +++ b/frontend-workflow/src/components/Image2DrawioPage.tsx @@ -204,9 +204,9 @@ const Image2DrawioPage = () => { if (userApiConfigRequired) { formData.append('chat_api_url', apiUrl.trim()); formData.append('api_key', apiKey.trim()); + formData.append('gen_fig_model', genFigModel); + formData.append('vlm_model', vlmModel); } - formData.append('gen_fig_model', genFigModel); - formData.append('vlm_model', vlmModel); formData.append('email', user?.id || user?.email || ''); setStatusMessage(t('status.processing')); @@ -611,12 +611,16 @@ const Image2DrawioPage = () => { + {!userApiConfigRequired && ( +

Free 模式下由后端统一选择 DrawIO 转换使用的视觉模型。

+ )} diff --git a/frontend-workflow/src/components/Image2PptPage.tsx b/frontend-workflow/src/components/Image2PptPage.tsx index 1230b844..47762405 100644 --- a/frontend-workflow/src/components/Image2PptPage.tsx +++ b/frontend-workflow/src/components/Image2PptPage.tsx @@ -221,7 +221,9 @@ const Image2PptPage = () => { try { setIsValidating(true); setError(null); - await verifyLlmConnection(llmApiUrl, apiKey, import.meta.env.VITE_DEFAULT_LLM_MODEL || 'deepseek-v3.2'); + if (userApiConfigRequired) { + await verifyLlmConnection(llmApiUrl, apiKey, import.meta.env.VITE_DEFAULT_LLM_MODEL || 'deepseek-v3.2'); + } setIsValidating(false); } catch (err) { setIsValidating(false); @@ -268,8 +270,8 @@ const Image2PptPage = () => { if (userApiConfigRequired) { formData.append('chat_api_url', llmApiUrl.trim()); formData.append('api_key', apiKey.trim()); + formData.append('gen_fig_model', genFigModel); } - formData.append('gen_fig_model', genFigModel); } else { formData.append('use_ai_edit', 'false'); } @@ -553,7 +555,8 @@ const Image2PptPage = () => { setModel(event.target.value)} + disabled={!userApiConfigRequired} className="w-full rounded-2xl border border-white/10 bg-white/5 px-3 py-3 text-sm text-white outline-none focus:border-cyan-300/35" > {MINDMAP_MODELS.map((item) => ( @@ -546,6 +545,9 @@ export default function MindMapPage() { ))} + {!userApiConfigRequired ? ( +

Free 模式下由后端统一选择思维导图模型。

+ ) : null}
diff --git a/frontend-workflow/src/components/Pdf2PptPage.tsx b/frontend-workflow/src/components/Pdf2PptPage.tsx index fe819f0a..abec48b5 100644 --- a/frontend-workflow/src/components/Pdf2PptPage.tsx +++ b/frontend-workflow/src/components/Pdf2PptPage.tsx @@ -222,7 +222,9 @@ const Pdf2PptPage = () => { try { setIsValidating(true); setError(null); - await verifyLlmConnection(llmApiUrl, apiKey, import.meta.env.VITE_DEFAULT_LLM_MODEL || 'deepseek-v3.2'); + if (userApiConfigRequired) { + await verifyLlmConnection(llmApiUrl, apiKey, import.meta.env.VITE_DEFAULT_LLM_MODEL || 'deepseek-v3.2'); + } setIsValidating(false); } catch (err) { setIsValidating(false); @@ -266,11 +268,11 @@ const Pdf2PptPage = () => { if (useAiEdit) { formData.append('use_ai_edit', 'true'); - if (userApiConfigRequired) { - formData.append('chat_api_url', llmApiUrl.trim()); - formData.append('api_key', apiKey.trim()); - } - formData.append('gen_fig_model', genFigModel); + if (userApiConfigRequired) { + formData.append('chat_api_url', llmApiUrl.trim()); + formData.append('api_key', apiKey.trim()); + formData.append('gen_fig_model', genFigModel); + } } else { formData.append('use_ai_edit', 'false'); } @@ -579,7 +581,8 @@ const Pdf2PptPage = () => { setModel(e.target.value)} - className="w-full rounded-lg border border-white/20 bg-black/40 px-4 py-2.5 text-sm text-gray-100 outline-none focus:ring-2 focus:ring-teal-500" + disabled={!userApiConfigRequired} + className="w-full rounded-lg border border-white/20 bg-black/40 px-4 py-2.5 text-sm text-gray-100 outline-none focus:ring-2 focus:ring-teal-500 disabled:opacity-50 disabled:cursor-not-allowed" > {modelOptions.map((option) => ( @@ -1692,13 +1678,17 @@ const Ppt2PolishPage = () => { value={model} onChange={(e) => setModel(e.target.value)} placeholder="自定义模型" - className="w-full rounded-lg border border-white/20 bg-black/40 px-4 py-2.5 text-sm text-gray-100 outline-none focus:ring-2 focus:ring-teal-500" + disabled={!userApiConfigRequired} + className="w-full rounded-lg border border-white/20 bg-black/40 px-4 py-2.5 text-sm text-gray-100 outline-none focus:ring-2 focus:ring-teal-500 disabled:opacity-50 disabled:cursor-not-allowed" />
{t('upload.config.customModelTip')}
+ {!userApiConfigRequired && ( +

Free 模式下由后端统一选择文本模型。

+ )}
@@ -1706,7 +1696,7 @@ const Ppt2PolishPage = () => { setApiUrl(e.target.value)} - className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-emerald-500" - > - {[apiUrl, ...API_URL_OPTIONS].filter((v, i, a) => a.indexOf(v) === i).map((url: string) => ( - - ))} - -
+ {userApiConfigRequired ? ( + <> +
+ + +
-
- - setApiKey(e.target.value)} - placeholder="sk-..." - className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-emerald-500 font-mono" - /> -
+
+ + setApiKey(e.target.value)} + placeholder="sk-..." + className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-emerald-500 font-mono" + /> +
-
- - setModel(e.target.value)} - className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-emerald-500" - /> -
+
+ + setModel(e.target.value)} + className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-emerald-500" + /> +
+ + ) : ( +
+ Free 模式下由后端统一选择深度研究模型、搜索凭证与接口配置。 +
+ )}
-
- - setSearchApiKey(e.target.value)} - placeholder="search_api_key" - className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-emerald-500 font-mono" - /> -
- {searchProvider === 'google_cse' && ( + {userApiConfigRequired && ( +
+ + setSearchApiKey(e.target.value)} + placeholder="search_api_key" + className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-emerald-500 font-mono" + /> +
+ )} + {userApiConfigRequired && searchProvider === 'google_cse' && (
{ const { user } = useAuthStore(); + const { userApiConfigRequired } = useRuntimeBilling(); const [mindmapGenerating, setMindmapGenerating] = useState(false); const [generatedMermaidCode, setGeneratedMermaidCode] = useState(''); const [showPreview, setShowPreview] = useState(false); @@ -49,7 +51,7 @@ export const MindMapTool = ({ files = [], selectedIds, onGenerateSuccess }: Mind return; } - if (!mindmapParams.api_key) { + if (userApiConfigRequired && !mindmapParams.api_key) { alert('请输入 API Key'); return; } @@ -75,12 +77,16 @@ export const MindMapTool = ({ files = [], selectedIds, onGenerateSuccess }: Mind file_paths: filePaths, user_id: user.id, email: user.email, - api_url: mindmapParams.api_url, - api_key: mindmapParams.api_key, - model: mindmapParams.model, mindmap_style: mindmapParams.mindmap_style, max_depth: mindmapParams.max_depth, - language: mindmapParams.language + language: mindmapParams.language, + ...(userApiConfigRequired + ? { + api_url: mindmapParams.api_url, + api_key: mindmapParams.api_key, + model: mindmapParams.model, + } + : {}) }) }); @@ -148,29 +154,37 @@ export const MindMapTool = ({ files = [], selectedIds, onGenerateSuccess }: Mind {/* Configuration */}
-
- - setMindmapParams({...mindmapParams, api_key: e.target.value})} - placeholder="sk-..." - className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-cyan-500 font-mono" - /> -
+ {userApiConfigRequired ? ( + <> +
+ + setMindmapParams({...mindmapParams, api_key: e.target.value})} + placeholder="sk-..." + className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-cyan-500 font-mono" + /> +
-
- - -
+
+ + +
+ + ) : ( +
+ Free 模式下由后端统一选择思维导图模型与接口配置。 +
+ )}
@@ -178,7 +192,8 @@ export const MindMapTool = ({ files = [], selectedIds, onGenerateSuccess }: Mind setPodcastParams({...podcastParams, api_key: e.target.value})} - placeholder="sk-..." - className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-green-500 font-mono" - /> -
- -
- - -
+ {userApiConfigRequired ? ( + <> +
+ + setPodcastParams({...podcastParams, api_key: e.target.value})} + placeholder="sk-..." + className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-green-500 font-mono" + /> +
+ +
+ + +
+ + ) : ( +
+ Free 模式下由后端统一选择播客脚本模型、TTS 模型和接口配置。 +
+ )}
@@ -253,7 +267,8 @@ export const PodcastTool = ({ files = [], selectedIds, onGenerateSuccess }: Podc setPptParams({...pptParams, api_key: e.target.value})} - placeholder="sk-..." - className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-purple-500 font-mono" - /> -
+ {userApiConfigRequired ? ( + <> +
+ + setPptParams({...pptParams, api_key: e.target.value})} + placeholder="sk-..." + className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-purple-500 font-mono" + /> +
+
+ + +
+ + ) : ( +
+ Free 模式下由后端统一选择 PPT 生成使用的文本与生图模型。 +
+ )}
@@ -198,27 +231,6 @@ export const PptTool = ({ files, selectedIds, onGenerateSuccess }: PptToolProps) 需要向量入库并基于检索生成大纲 -
- - -
-
@@ -226,7 +238,8 @@ export const PptTool = ({ files, selectedIds, onGenerateSuccess }: PptToolProps) setPptParams({...pptParams, gen_fig_model: e.target.value})} - disabled={pptParams.api_url === 'http://123.129.219.111:3000/v1'} + disabled={!userApiConfigRequired || pptParams.api_url === 'http://123.129.219.111:3000/v1'} className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-purple-500 disabled:opacity-50" > diff --git a/frontend-workflow/src/components/knowledge-base/tools/ReportTool.tsx b/frontend-workflow/src/components/knowledge-base/tools/ReportTool.tsx index 9c8bfcde..8ad99200 100644 --- a/frontend-workflow/src/components/knowledge-base/tools/ReportTool.tsx +++ b/frontend-workflow/src/components/knowledge-base/tools/ReportTool.tsx @@ -7,6 +7,7 @@ import { getApiSettings } from '../../../services/apiSettingsService'; import { backendFetch } from '../../../services/backendClient'; import { useAuthStore } from '../../../stores/authStore'; import { MarkdownViewerModal } from './MarkdownViewerModal'; +import { useRuntimeBilling } from '../../../hooks/useRuntimeBilling'; interface ReportToolProps { files: KnowledgeFile[]; @@ -16,6 +17,7 @@ interface ReportToolProps { export const ReportTool = ({ files = [], selectedIds, onGenerateSuccess }: ReportToolProps) => { const { user } = useAuthStore(); + const { userApiConfigRequired } = useRuntimeBilling(); const [apiUrl, setApiUrl] = useState('https://api.apiyi.com/v1'); const [apiKey, setApiKey] = useState(''); const [model, setModel] = useState('gpt-5.1'); @@ -46,7 +48,7 @@ export const ReportTool = ({ files = [], selectedIds, onGenerateSuccess }: Repor alert('请先登录后再生成报告。'); return; } - if (!apiKey.trim()) { + if (userApiConfigRequired && !apiKey.trim()) { alert('请输入 API Key'); return; } @@ -67,14 +69,18 @@ export const ReportTool = ({ files = [], selectedIds, onGenerateSuccess }: Repor }, body: JSON.stringify({ file_paths: filePaths, - api_url: apiUrl, - api_key: apiKey, - model, language, report_style: style, length, email: user.email, - user_id: user.id + user_id: user.id, + ...(userApiConfigRequired + ? { + api_url: apiUrl, + api_key: apiKey, + model, + } + : {}) }) }); @@ -131,39 +137,47 @@ export const ReportTool = ({ files = [], selectedIds, onGenerateSuccess }: Repor
-
- - -
+ {userApiConfigRequired ? ( + <> +
+ + +
-
- - setApiKey(e.target.value)} - placeholder="sk-..." - className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-fuchsia-500 font-mono" - /> -
+
+ + setApiKey(e.target.value)} + placeholder="sk-..." + className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-fuchsia-500 font-mono" + /> +
-
- - setModel(e.target.value)} - className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-fuchsia-500" - /> -
+
+ + setModel(e.target.value)} + className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-fuchsia-500" + /> +
+ + ) : ( +
+ Free 模式下由后端统一选择报告生成模型与接口配置。 +
+ )}
@@ -236,29 +243,37 @@ export const SearchTool = ({ files = [], selectedIds = new Set(), knowledgeBases
-
- - -
- -
- - setApiKey(e.target.value)} - placeholder="sk-..." - className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-blue-500 font-mono" - /> -
+ {userApiConfigRequired ? ( + <> +
+ + +
+ +
+ + setApiKey(e.target.value)} + placeholder="sk-..." + className="w-full bg-black/40 border border-white/10 rounded-lg px-3 py-2.5 text-sm text-gray-200 outline-none focus:border-blue-500 font-mono" + /> +
+ + ) : ( +
+ Free 模式下由后端统一选择向量检索使用的嵌入模型与接口配置。 +
+ )}
diff --git a/frontend-workflow/src/components/paper2drawio/index.tsx b/frontend-workflow/src/components/paper2drawio/index.tsx index 0d20914f..ef3d75f0 100644 --- a/frontend-workflow/src/components/paper2drawio/index.tsx +++ b/frontend-workflow/src/components/paper2drawio/index.tsx @@ -18,6 +18,7 @@ import Banner from './Banner'; import QRCodeTooltip from '../QRCodeTooltip'; import ManagedApiNotice from '../ManagedApiNotice'; import { useRuntimeBilling } from '../../hooks/useRuntimeBilling'; +import { appendManagedApiConfig, appendManagedModel } from '../../utils/runtimeBillingForm'; const DRAWIO_ORIGINS = new Set(['https://embed.diagrams.net', 'https://app.diagrams.net']); const STORAGE_KEY = 'paper2drawio_settings'; @@ -297,17 +298,19 @@ export default function Paper2DrawioPage({ const handleGenerate = useCallback(async () => { if (!textContent && !file) return; - // Step 0: Verify LLM Connection first - try { - setIsValidating(true); - setError(null); - await verifyLlmConnection(apiUrl, apiKey, model); - setIsValidating(false); - } catch (err) { - setIsValidating(false); - const errorMsg = err instanceof Error ? err.message : '验证 LLM 连接失败'; - setError(errorMsg); - return; + if (userApiConfigRequired) { + // Step 0: Verify LLM Connection first + try { + setIsValidating(true); + setError(null); + await verifyLlmConnection(apiUrl, apiKey, model); + setIsValidating(false); + } catch (err) { + setIsValidating(false); + const errorMsg = err instanceof Error ? err.message : '验证 LLM 连接失败'; + setError(errorMsg); + return; + } } setIsLoading(true); @@ -315,11 +318,8 @@ export default function Paper2DrawioPage({ try { if (generationMode === 'paper2drawio') { const formData = new FormData(); - formData.append('img_gen_model_name', p2dImageModel); - if (userApiConfigRequired) { - formData.append('chat_api_url', apiUrl); - formData.append('api_key', apiKey); - } + appendManagedModel(formData, userApiConfigRequired, 'img_gen_model_name', p2dImageModel); + appendManagedApiConfig(formData, userApiConfigRequired, apiUrl, apiKey); formData.append('input_type', uploadMode); formData.append('graph_type', 'model_arch'); formData.append('style', p2dStyle); @@ -368,12 +368,9 @@ export default function Paper2DrawioPage({ } const formData = new FormData(); - if (userApiConfigRequired) { - formData.append('chat_api_url', apiUrl); - formData.append('api_key', apiKey); - } + appendManagedApiConfig(formData, userApiConfigRequired, apiUrl, apiKey); const modelToSend = enableModelRace ? withModelOptions(PAPER2DRAWIO_MODELS, model).join(',') : model; - formData.append('model', modelToSend); + appendManagedModel(formData, userApiConfigRequired, 'model', modelToSend); formData.append('input_type', uploadMode === 'file' ? 'PDF' : 'TEXT'); formData.append('diagram_type', diagramType); formData.append('diagram_style', diagramStyle); @@ -927,7 +924,8 @@ export default function Paper2DrawioPage({ - {modelOptions.length > 1 && ( + {!userApiConfigRequired && ( +

Free 模式下由后端统一选择 DrawIO 生成模型。

+ )} + {userApiConfigRequired && modelOptions.length > 1 && (
{graphType === 'model_arch' ? ( diff --git a/frontend-workflow/src/components/paper2graph/index.tsx b/frontend-workflow/src/components/paper2graph/index.tsx index 62021729..bf3f57f2 100644 --- a/frontend-workflow/src/components/paper2graph/index.tsx +++ b/frontend-workflow/src/components/paper2graph/index.tsx @@ -405,9 +405,9 @@ const Paper2FigurePage: React.FC = ({ if (userApiConfigRequired) { formData.append('chat_api_url', llmApiUrl.trim()); formData.append('api_key', apiKey.trim()); + formData.append('gen_fig_model', DEFAULT_IMAGE2DRAWIO_GEN_FIG_MODEL); + formData.append('vlm_model', DEFAULT_IMAGE2DRAWIO_VLM_MODEL); } - formData.append('gen_fig_model', DEFAULT_IMAGE2DRAWIO_GEN_FIG_MODEL); - formData.append('vlm_model', DEFAULT_IMAGE2DRAWIO_VLM_MODEL); formData.append('email', user?.id || user?.email || ''); const res = await backendFetch('/api/v1/image2drawio/generate', { @@ -616,7 +616,9 @@ const Paper2FigurePage: React.FC = ({ } const formData = new FormData(); - formData.append('img_gen_model_name', model); + if (userApiConfigRequired) { + formData.append('img_gen_model_name', model); + } if (userApiConfigRequired) { formData.append('chat_api_url', llmApiUrl.trim()); formData.append('api_key', apiKey.trim()); @@ -648,7 +650,9 @@ const Paper2FigurePage: React.FC = ({ try { setIsValidating(true); setError(null); - await verifyLlmConnection(llmApiUrl, apiKey, import.meta.env.VITE_DEFAULT_LLM_MODEL || "deepseek-v3.2"); + if (userApiConfigRequired) { + await verifyLlmConnection(llmApiUrl, apiKey, import.meta.env.VITE_DEFAULT_LLM_MODEL || "deepseek-v3.2"); + } setIsValidating(false); setIsLoading(true); @@ -774,7 +778,9 @@ const Paper2FigurePage: React.FC = ({ // 当前 UploadMode 仅支持 'file' | 'text',无需图片输入 const formData = new FormData(); - formData.append('img_gen_model_name', model); + if (userApiConfigRequired) { + formData.append('img_gen_model_name', model); + } if (userApiConfigRequired) { formData.append('chat_api_url', llmApiUrl.trim()); formData.append('api_key', apiKey.trim()); diff --git a/frontend-workflow/src/components/paper2poster/UploadStep.tsx b/frontend-workflow/src/components/paper2poster/UploadStep.tsx index d4a9b96a..feed3312 100644 --- a/frontend-workflow/src/components/paper2poster/UploadStep.tsx +++ b/frontend-workflow/src/components/paper2poster/UploadStep.tsx @@ -162,7 +162,8 @@ const UploadStep: React.FC = ({ setConfig({ ...config, vision_model: e.target.value })} - className="w-full px-4 py-3 bg-black/40 border border-white/20 rounded-xl text-gray-100 focus:outline-none focus:ring-2 focus:ring-green-500 transition-colors" + disabled={!showApiConfig} + className="w-full px-4 py-3 bg-black/40 border border-white/20 rounded-xl text-gray-100 focus:outline-none focus:ring-2 focus:ring-green-500 transition-colors disabled:opacity-50 disabled:cursor-not-allowed" > + {!showApiConfig && ( +

Free 模式下由后端统一选择 Poster 文本和视觉模型。

+ )} {/* 海报尺寸 */}
diff --git a/frontend-workflow/src/components/paper2poster/index.tsx b/frontend-workflow/src/components/paper2poster/index.tsx index d1e8a135..af00fd29 100644 --- a/frontend-workflow/src/components/paper2poster/index.tsx +++ b/frontend-workflow/src/components/paper2poster/index.tsx @@ -232,17 +232,19 @@ const Paper2PosterPage = () => { return; } - try { - // Verify LLM Connection - setIsValidating(true); - setError(null); - await verifyLlmConnection(llmApiUrl, apiKey, 'gpt-4o'); - setIsValidating(false); - } catch (err) { - setIsValidating(false); - const message = err instanceof Error ? err.message : 'API 验证失败'; - setError(message); - return; + if (userApiConfigRequired) { + try { + // Verify LLM Connection + setIsValidating(true); + setError(null); + await verifyLlmConnection(llmApiUrl, apiKey, 'gpt-4o'); + setIsValidating(false); + } catch (err) { + setIsValidating(false); + const message = err instanceof Error ? err.message : 'API 验证失败'; + setError(message); + return; + } } setIsUploading(true); @@ -279,9 +281,9 @@ const Paper2PosterPage = () => { if (userApiConfigRequired) { formData.append('chat_api_url', llmApiUrl.trim()); formData.append('api_key', apiKey.trim()); + formData.append('model', config.text_model); + formData.append('vision_model', config.vision_model); } - formData.append('model', config.text_model); - formData.append('vision_model', config.vision_model); formData.append('poster_width', config.poster_width.toString()); formData.append('poster_height', config.poster_height.toString()); diff --git a/frontend-workflow/src/components/paper2ppt/FrontendCompleteStep.tsx b/frontend-workflow/src/components/paper2ppt/FrontendCompleteStep.tsx index 3bc6e8c2..15c3c5fe 100644 --- a/frontend-workflow/src/components/paper2ppt/FrontendCompleteStep.tsx +++ b/frontend-workflow/src/components/paper2ppt/FrontendCompleteStep.tsx @@ -7,11 +7,12 @@ import { RotateCcw, Sparkles, } from 'lucide-react'; -import { FrontendSlide } from './types'; +import { FrontendDeckTheme, FrontendSlide } from './types'; import FrontendSlidePreview from './FrontendSlidePreview'; interface FrontendCompleteStepProps { slides: FrontendSlide[]; + deckTheme?: FrontendDeckTheme | null; downloadUrl: string | null; pdfPreviewUrl: string | null; isGeneratingFinal: boolean; @@ -25,6 +26,7 @@ interface FrontendCompleteStepProps { const FrontendCompleteStep: React.FC = ({ slides, + deckTheme, downloadUrl, pdfPreviewUrl, isGeneratingFinal, @@ -52,9 +54,9 @@ const FrontendCompleteStep: React.FC = ({
{slides.map((slide) => (
- +

- 第 {slide.pageNum} 页 · {slide.title} + 第 {slide.pageNum} 页 · {slide.title} · {slide.layoutType}

))} @@ -70,16 +72,16 @@ const FrontendCompleteStep: React.FC = ({ > {isGeneratingFinal ? ( <> - 正在截图并导出... + 正在生成真可编辑 PPTX... ) : ( <> - 生成最终文件 + 生成可编辑 PPTX )}

- 导出会将每一页前端渲染结果截图,再打包成整页图片版 PPTX / PDF。 + 导出会把结构化 slide schema 直接生成真实可编辑 PPTX,不再走整页截图。

) : ( diff --git a/frontend-workflow/src/components/paper2ppt/FrontendGenerateStep.tsx b/frontend-workflow/src/components/paper2ppt/FrontendGenerateStep.tsx index 2a2e6bb9..d4de9b3b 100644 --- a/frontend-workflow/src/components/paper2ppt/FrontendGenerateStep.tsx +++ b/frontend-workflow/src/components/paper2ppt/FrontendGenerateStep.tsx @@ -1,17 +1,14 @@ -import React, { useEffect, useState } from 'react'; +import React from 'react'; import { AlertCircle, ArrowLeft, CheckCircle2, - Code2, FileText, Loader2, MonitorSmartphone, Plus, RefreshCw, - RotateCcw, ScanSearch, - ShieldCheck, Trash2, } from 'lucide-react'; import { FrontendDeckTheme, FrontendSlide, SlideOutline, Step } from './types'; @@ -29,8 +26,6 @@ interface FrontendGenerateStepProps { setSlidePrompt: (prompt: string) => void; handleRegenerateSlide: () => void; handleReviewSlide: () => void; - applyCodeEdit: (htmlTemplate: string, cssCode: string) => boolean; - handleDebugCodeEdit: (htmlTemplate: string, cssCode: string) => Promise; handleConfirmSlide: () => void; setCurrentStep: (step: Step) => void; error: string | null; @@ -55,8 +50,6 @@ const FrontendGenerateStep: React.FC = ({ setSlidePrompt, handleRegenerateSlide, handleReviewSlide, - applyCodeEdit, - handleDebugCodeEdit, handleConfirmSlide, setCurrentStep, error, @@ -68,40 +61,29 @@ const FrontendGenerateStep: React.FC = ({ removeListItem, replaceVisualAsset, }) => { - const [panelMode, setPanelMode] = useState<'preview' | 'code'>('preview'); - const [draftHtml, setDraftHtml] = useState(''); - const [draftCss, setDraftCss] = useState(''); - const [codeStatus, setCodeStatus] = useState(null); const currentSlide = frontendSlides[currentSlideIndex]; const outlineSlide = outlineData[currentSlideIndex]; - const isCodeDirty = draftHtml !== (currentSlide?.htmlTemplate || '') || draftCss !== (currentSlide?.cssCode || ''); const busyMessage = taskMessage || (currentSlide?.status === 'processing' ? '当前页仍在生成中,请稍候。' : '后台任务仍在处理中,请稍候。'); const reviewStatusMessage = isReviewing - ? taskMessage || '当前页正在进行视觉检查,确认并继续会在检查结束后解锁。' + ? taskMessage || '当前页正在进行结构检查,确认并继续会在检查结束后解锁。' : taskMessage || ''; const reviewDisabledReason = !currentSlide ? '当前页尚未生成' : isGenerating ? busyMessage : isReviewing - ? '当前页正在进行视觉检查' + ? '当前页正在进行结构检查' : ''; const confirmDisabledReason = !currentSlide ? '当前页尚未生成' : isGenerating ? busyMessage : isReviewing - ? '当前页正在进行视觉检查,检查完成后才能确认并继续' + ? '当前页正在进行结构检查,检查完成后才能确认并继续' : currentSlide.status !== 'done' ? '当前页尚未完成生成' : ''; - useEffect(() => { - setDraftHtml(currentSlide?.htmlTemplate || ''); - setDraftCss(currentSlide?.cssCode || ''); - setCodeStatus(null); - }, [currentSlide?.slideId, currentSlide?.htmlTemplate, currentSlide?.cssCode]); - return (
@@ -153,152 +135,60 @@ const FrontendGenerateStep: React.FC = ({ 结构说明

- {outlineSlide?.layout_description || '模型将自动规划文本优先的前端布局'} + {outlineSlide?.layout_description || '模型将自动规划结构化前端布局'}

-
- - +
+ {currentSlide?.layoutType || 'structured'}
- {panelMode === 'preview' ? ( - isReviewing ? ( -
- -

视觉检查正在进行中...

-

- {reviewStatusMessage} -

-
- ) : currentSlide?.review?.status === 'repairing' ? ( -
- -

当前页正在自动修复...

-

- {currentSlide.review.summary || '请稍候,修复完成后会恢复可继续操作。'} -

-
- ) : isGenerating && currentSlide?.status === 'processing' ? ( -
- -

正在生成这一页的前端代码...

-

- {taskMessage || '大模型正在编排 HTML/CSS 模板'} -

-
- ) : currentSlide ? ( - - updateFieldValue(currentSlideIndex, fieldKey, value) - } - onInlineListItemChange={(fieldKey, itemIndex, value) => - updateListItem(currentSlideIndex, fieldKey, itemIndex, value) - } - onInlineListReplace={(fieldKey, items) => - replaceListItems(currentSlideIndex, fieldKey, items) - } - onReplaceImage={(imageKey, file) => - replaceVisualAsset(currentSlideIndex, imageKey, file) - } - /> - ) : ( -
- 等待生成 -
- ) + {isReviewing ? ( +
+ +

结构检查正在进行中...

+

+ {reviewStatusMessage} +

+
+ ) : currentSlide?.review?.status === 'repairing' ? ( +
+ +

当前页正在自动修复...

+

+ {currentSlide.review.summary || '请稍候,修复完成后会恢复可继续操作。'} +

+
+ ) : isGenerating && currentSlide?.status === 'processing' ? ( +
+ +

正在生成这一页的结构化内容...

+

+ {taskMessage || '大模型正在编排结构化 slide schema'} +

+
+ ) : currentSlide ? ( + + updateFieldValue(currentSlideIndex, fieldKey, value) + } + onInlineListItemChange={(fieldKey, itemIndex, value) => + updateListItem(currentSlideIndex, fieldKey, itemIndex, value) + } + onInlineListReplace={(fieldKey, items) => + replaceListItems(currentSlideIndex, fieldKey, items) + } + onReplaceImage={(imageKey, file) => + replaceVisualAsset(currentSlideIndex, imageKey, file) + } + /> ) : ( -
-
-
- {'允许直接编辑当前页 HTML/CSS。请保留 `{{field:key}}` / `{{list:key}}` 占位符。'} -
-
- - - -
-
-
-
HTML Template
-