From 578793dbc3e090b41914acf8f6b093fc6fd1fe38 Mon Sep 17 00:00:00 2001 From: LujiaJin Date: Fri, 20 Feb 2026 19:07:27 +0800 Subject: [PATCH 01/10] feat: add deploy directory for offline H100 deployment - Dockerfile.backend: GPU inference container (CUDA 12.8.1) - Dockerfile.frontend: Vue.js + Nginx multi-stage build - docker-compose.yml: orchestration with GPU passthrough - nginx.docker.conf: reverse proxy with SSL support - gen_ssl_cert.sh: self-signed certificate generation - DEPLOY_WSL2_TO_H100_ZH.md: comprehensive deployment guide - Update .gitignore to exclude models/ and build artifacts --- .gitignore | Bin 27 -> 98 bytes deploy/DEPLOY_WSL2_TO_H100_ZH.md | 700 +++++++++++++++++++++++++++++++ deploy/Dockerfile.backend | 71 ++++ deploy/Dockerfile.frontend | 36 ++ deploy/docker-compose.yml | 64 +++ deploy/gen_ssl_cert.sh | 23 + deploy/nginx.docker.conf | 122 ++++++ deploy/requirements.backend.txt | 29 ++ 8 files changed, 1045 insertions(+) create mode 100644 deploy/DEPLOY_WSL2_TO_H100_ZH.md create mode 100644 deploy/Dockerfile.backend create mode 100644 deploy/Dockerfile.frontend create mode 100644 deploy/docker-compose.yml create mode 100644 deploy/gen_ssl_cert.sh create mode 100644 deploy/nginx.docker.conf create mode 100644 deploy/requirements.backend.txt diff --git a/.gitignore b/.gitignore index 988f840d44e6a358f3fe196ec8006bcef49217dc..c5b321cd246de12ebeb1f1a288bd9617fae91a00 100644 GIT binary patch literal 98 zcmdPWOUmZuijOa-OioPBNR5x@< 目标:你在本地 Win10 + WSL2 构建 Docker 镜像,把镜像和模型传到无公网的公司 H100 服务器,启动服务后在本地浏览器和手机上测试全双工视频通话。 + +**你的环境速查:** + +| 项目 | 值 | +| --- | --- | +| 服务器 SSH | `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST`(端口可能动态变化) | +| GPU | NVIDIA H100(驱动 550.90.12) | +| CUDA | 12.4(与 Dockerfile 基础镜像 `cuda:12.4.1` 完全匹配) | +| 本地 | Win10 + WSL2 Ubuntu | + +**每次执行前先设置 SSH 变量(只改这里即可):** + +```bash +export SSH_HOST=127.0.0.1 +export SSH_PORT=54062 +export SSH_USER=your_user +``` + +PowerShell 等价写法(Windows 终端直接用): + +```powershell +$env:SSH_HOST = "127.0.0.1" +$env:SSH_PORT = "54062" +$env:SSH_USER = "your_user" +``` + +## PowerShell 日常三命令速查(推荐) + +```powershell +# 1) 端口变化时先更新 SSH 参数 +Set-MiniCPMSSH -Port "54062" -User "your_user" + +# 2) 启动手机模式(开隧道 + 打印可访问 URL) +Start-MiniCPMMobile + +# 3) 结束隧道 +Stop-MiniCPMMobile +``` + +端口变化后的快速恢复: + +```powershell +Set-MiniCPMSSH -Port "新端口" -User "your_user" +Restart-MiniCPMMobile +``` + +PowerShell 中引用变量时,`ssh/scp` 建议写成: + +```powershell +ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST" +scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:/data/minicpmo/deploy_pkg/" +``` + +可选:定义一个一键函数(以后只改端口即可) + +```powershell +function Set-MiniCPMSSH { + param( + [Parameter(Mandatory = $true)] + [string]$Port, + [string]$Host = "127.0.0.1", + [string]$User = "your_user" + ) + + $env:SSH_HOST = $Host + $env:SSH_PORT = $Port + $env:SSH_USER = $User + + Write-Host "[MiniCPM SSH] HOST=$env:SSH_HOST PORT=$env:SSH_PORT USER=$env:SSH_USER" +} +``` + +使用示例: + +```powershell +Set-MiniCPMSSH -Port "54062" -User "your_user" +ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST" +``` + +可选:定义一键开隧道函数(本机/手机两种模式) + +```powershell +function Open-MiniCPMTunnel { + param( + [ValidateSet("local", "mobile")] + [string]$Mode = "local" + ) + + if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) { + throw "请先执行 Set-MiniCPMSSH 设置 SSH_HOST/SSH_PORT/SSH_USER" + } + + if ($Mode -eq "local") { + ssh -N -p $env:SSH_PORT ` + -L 3000:127.0.0.1:3000 ` + -L 3443:127.0.0.1:3443 ` + -L 32550:127.0.0.1:32550 ` + "$env:SSH_USER@$env:SSH_HOST" + } + else { + ssh -N -p $env:SSH_PORT ` + -L 0.0.0.0:3443:127.0.0.1:3443 ` + "$env:SSH_USER@$env:SSH_HOST" + } +} +``` + +使用示例: + +```powershell +# 1) 设置动态 SSH 参数 +Set-MiniCPMSSH -Port "54062" -User "your_user" + +# 2) 仅本机访问(浏览器打开 http://127.0.0.1:3000) +Open-MiniCPMTunnel -Mode local + +# 3) 手机访问(同一 WiFi,用 https://笔记本局域网IP:3443) +Open-MiniCPMTunnel -Mode mobile +``` + +可选:自动打印手机访问地址 + +```powershell +function Get-MiniCPMLanUrl { + param( + [int]$Port = 3443 + ) + + $ipv4List = Get-NetIPAddress -AddressFamily IPv4 | + Where-Object { + $_.IPAddress -notlike '127.*' -and + $_.IPAddress -notlike '169.254.*' -and + $_.PrefixOrigin -ne 'WellKnown' + } | + Sort-Object -Property InterfaceMetric + + if (-not $ipv4List) { + throw "未找到可用 IPv4 地址,请检查网卡/网络连接" + } + + $ip = $ipv4List[0].IPAddress + $url = "https://$ip`:$Port" + + Write-Host "[MiniCPM LAN URL] $url" + return $url +} +``` + +使用示例: + +```powershell +# 先开启手机模式隧道(在另一个终端窗口运行) +Open-MiniCPMTunnel -Mode mobile + +# 当前窗口打印手机访问地址 +Get-MiniCPMLanUrl +``` + +可选:一键启动手机模式(开隧道 + 检查端口 + 打印 URL) + +```powershell +function Start-MiniCPMMobile { + param( + [int]$Port = 3443 + ) + + if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) { + throw "请先执行 Set-MiniCPMSSH 设置 SSH_HOST/SSH_PORT/SSH_USER" + } + + $sshCmd = "ssh -N -p $env:SSH_PORT -L 0.0.0.0:$Port`:127.0.0.1:$Port $env:SSH_USER@$env:SSH_HOST" + + # 在新窗口开隧道,避免阻塞当前终端 + $proc = Start-Process powershell -ArgumentList "-NoExit", "-Command", $sshCmd -PassThru + $env:MINICPM_MOBILE_SSH_PID = [string]$proc.Id + $env:MINICPM_MOBILE_PORT = [string]$Port + Start-Sleep -Seconds 2 + + $listener = Get-NetTCPConnection -LocalPort $Port -State Listen -ErrorAction SilentlyContinue + if (-not $listener) { + Write-Warning "未检测到本机 $Port 端口监听,请检查 SSH 是否连接成功。" + return + } + + $url = Get-MiniCPMLanUrl -Port $Port + Write-Host "[MiniCPM Mobile PID] $env:MINICPM_MOBILE_SSH_PID" + Write-Host "[MiniCPM Mobile Ready] 手机浏览器访问: $url" +} + +function Stop-MiniCPMMobile { + $pidText = $env:MINICPM_MOBILE_SSH_PID + + if ($pidText) { + $pidValue = [int]$pidText + $proc = Get-Process -Id $pidValue -ErrorAction SilentlyContinue + if ($proc) { + Stop-Process -Id $pidValue -Force + Write-Host "[MiniCPM Mobile Stopped] 已停止隧道进程 PID=$pidValue" + Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue + Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue + return + } + } + + $port = if ($env:MINICPM_MOBILE_PORT) { [int]$env:MINICPM_MOBILE_PORT } else { 3443 } + $listeners = Get-NetTCPConnection -LocalPort $port -State Listen -ErrorAction SilentlyContinue + if (-not $listeners) { + Write-Host "[MiniCPM Mobile] 未检测到监听端口 $port,无需停止。" + return + } + + foreach ($item in $listeners) { + if ($item.OwningProcess -gt 0) { + try { + Stop-Process -Id $item.OwningProcess -Force -ErrorAction Stop + Write-Host "[MiniCPM Mobile Stopped] 已停止监听端口 $port 的进程 PID=$($item.OwningProcess)" + } + catch { + Write-Warning "停止 PID=$($item.OwningProcess) 失败:$($_.Exception.Message)" + } + } + } + + Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue + Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue +} + +function Restart-MiniCPMMobile { + param( + [int]$Port = 3443 + ) + + Stop-MiniCPMMobile + Start-Sleep -Seconds 1 + Start-MiniCPMMobile -Port $Port +} +``` + +使用示例: + +```powershell +# 1) 先设置动态 SSH 参数(端口变更时只改这里) +Set-MiniCPMSSH -Port "54062" -User "your_user" + +# 2) 一键启动手机模式并输出可访问地址 +Start-MiniCPMMobile + +# 3) 端口变化后,一键重启手机模式(可选) +Restart-MiniCPMMobile + +# 4) 结束手机模式隧道 +Stop-MiniCPMMobile +``` + +--- + +## 0. 目录与文件说明 + +本指南使用了你仓库中新建的部署文件: + +- `deploy/Dockerfile.backend`:后端推理服务镜像(FastAPI + MiniCPM-o 4.5) +- `deploy/Dockerfile.frontend`:前端镜像(Vue build + Nginx) +- `deploy/nginx.docker.conf`:Nginx 反向代理到后端容器 +- `deploy/docker-compose.yml`:双容器编排(frontend + backend) +- `deploy/requirements.backend.txt`:后端 Python 依赖清单 +- `deploy/gen_ssl_cert.sh`:自签名 SSL 证书生成脚本(手机端 HTTPS 必需) + +--- + +## 1. 本地(WSL2)前置准备 + +在 WSL2 Ubuntu 执行: + +```bash +cd /mnt/d/九天/codes/MiniCPM-o + +# 1) 检查 Docker +sudo docker --version +sudo docker compose version + +# 2) 如果你当前用户不能直接用 docker,可先临时用 sudo docker +# 或将用户加入 docker 组(重新登录后生效) +# sudo usermod -aG docker $USER +``` + +> 说明:本地 1050Ti 不参与推理,本地只负责构建镜像,不需要本地 GPU。 + +--- + +## 2. 本地下载模型(用于上传到内网) + +推荐在本地(有网环境)下载 HuggingFace 模型,再打包上传。 + +### 2.1 安装下载工具 + +```bash +python3 -m pip install -U huggingface_hub +``` + +### 2.2 下载 MiniCPM-o 4.5 + +```bash +mkdir -p /mnt/d/九天/codes/MiniCPM-o/models +python3 - << 'PY' +from huggingface_hub import snapshot_download +snapshot_download( + repo_id='openbmb/MiniCPM-o-4_5', + local_dir='/mnt/d/九天/codes/MiniCPM-o/models/MiniCPM-o-4_5', + local_dir_use_symlinks=False, + resume_download=True +) +PY +``` + +下载后检查体积和关键文件: + +```bash +du -sh /mnt/d/九天/codes/MiniCPM-o/models/MiniCPM-o-4_5 +ls -lh /mnt/d/九天/codes/MiniCPM-o/models/MiniCPM-o-4_5 | head +``` + +--- + +## 3. 在 WSL2 构建两个镜像 + +在仓库根目录执行: + +```bash +cd /mnt/d/九天/codes/MiniCPM-o + +# 后端镜像 +docker build -f deploy/Dockerfile.backend -t minicpmo-backend:latest . + +# 前端镜像 +docker build -f deploy/Dockerfile.frontend -t minicpmo-frontend:latest . +``` + +验证镜像存在: + +```bash +docker images | grep minicpmo +``` + +--- + +## 4. 导出镜像 + 生成 SSL 证书 + +### 4.1 导出镜像为 tar + +```bash +mkdir -p /mnt/d/九天/deploy_pkg + +docker save -o /mnt/d/九天/deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest +docker save -o /mnt/d/九天/deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest + +# 打包 compose 与 nginx 配置 +cp deploy/docker-compose.yml /mnt/d/九天/deploy_pkg/ +cp deploy/nginx.docker.conf /mnt/d/九天/deploy_pkg/ +``` + +可选:压缩减少传输体积 + +```bash +cd /mnt/d/九天/deploy_pkg +gzip -1 minicpmo-backend_latest.tar +gzip -1 minicpmo-frontend_latest.tar +``` + +### 4.2 生成自签名 SSL 证书(手机端 HTTPS 必需) + +```bash +cd /mnt/d/九天/codes/MiniCPM-o +bash deploy/gen_ssl_cert.sh /mnt/d/九天/deploy_pkg/certs +``` + +这会在 `/mnt/d/九天/deploy_pkg/certs/` 下生成 `server.crt` 和 `server.key`。 + +--- + +## 5. 上传到内网服务器 + +你已经通过公司内网认证,且端口可能动态变化,请使用上面定义的 SSH 变量。 + +### 5.1 上传镜像包和配置文件 + +```bash +# 先在服务器上创建目标目录 +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /data/minicpmo/deploy_pkg" + +# 上传镜像 tar 包 +scp -P $SSH_PORT -o ServerAliveInterval=60 \ + /mnt/d/九天/deploy_pkg/minicpmo-backend_latest.tar.gz \ + /mnt/d/九天/deploy_pkg/minicpmo-frontend_latest.tar.gz \ + /mnt/d/九天/deploy_pkg/docker-compose.yml \ + /mnt/d/九天/deploy_pkg/nginx.docker.conf \ + $SSH_USER@$SSH_HOST:/data/minicpmo/deploy_pkg/ +``` + +### 5.2 上传模型权重 + +```bash +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /data/models" + +scp -P $SSH_PORT -r -o ServerAliveInterval=60 \ + /mnt/d/九天/codes/MiniCPM-o/models/MiniCPM-o-4_5 \ + $SSH_USER@$SSH_HOST:/data/models/ +``` + +### 5.3 上传 SSL 证书(手机端访问需要) + +```bash +scp -P $SSH_PORT -r /mnt/d/九天/deploy_pkg/certs \ + $SSH_USER@$SSH_HOST:/data/minicpmo/deploy_pkg/ +``` + +> 如果端口变更,只需要修改 `SSH_PORT` 变量并重试命令。 + +--- + +## 6. H100 服务器准备(一次性) + +通过已建立的隧道登录服务器: + +```bash +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST +``` + +检查环境: + +```bash +# 确认 NVIDIA 驱动(你已确认: 550.90.12, CUDA 12.4 ✓) +nvidia-smi + +# 检查 Docker +docker --version +docker compose version +``` + +### 6.1 安装 NVIDIA Container Toolkit(若未安装) + +如果 `docker run --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi` 失败,需要安装 toolkit。 + +安装后重启 Docker: + +```bash +sudo systemctl restart docker +``` + +再验证: + +```bash +docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi +``` + +--- + +## 7. H100 服务器加载镜像与启动服务 + +在服务器上执行(通过 `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST` 登录后): + +```bash +cd /data/minicpmo/deploy_pkg + +# 若上传的是 .tar.gz,先解压 +gunzip -f minicpmo-backend_latest.tar.gz || true +gunzip -f minicpmo-frontend_latest.tar.gz || true + +# 加载镜像 +docker load -i minicpmo-backend_latest.tar +docker load -i minicpmo-frontend_latest.tar + +# 放置运行时文件 +mkdir -p /data/minicpmo/runtime/certs +cp docker-compose.yml /data/minicpmo/runtime/ +cp certs/server.crt certs/server.key /data/minicpmo/runtime/certs/ + +cd /data/minicpmo/runtime +``` + +### 7.1 设置模型路径并启动 + +`docker-compose.yml` 里用了 `MODEL_PATH` 环境变量。你可以直接导出: + +```bash +export MODEL_PATH=/data/models/MiniCPM-o-4_5 +export CERTS_PATH=./certs +export BACKEND_PORT=32550 + +docker compose -f docker-compose.yml up -d +``` + +查看状态: + +```bash +docker compose -f docker-compose.yml ps +docker logs -f minicpmo-backend +``` + +健康检查: + +```bash +curl http://127.0.0.1:32550/api/v1/health +``` + +应返回: + +```json +{"status":"OK"} +``` + +> 首次加载模型会较慢(几十秒到数分钟),日志出现模型初始化完成后再测试前端。 + +--- + +## 8. 本地电脑访问(SSH 端口转发) + +你已能连通 SSH 隧道,只需基于当前端口做服务转发。 + +在本地 PowerShell 或 WSL 新开一个终端: + +```bash +ssh -N -p $SSH_PORT \ + -L 3000:127.0.0.1:3000 \ + -L 3443:127.0.0.1:3443 \ + -L 32550:127.0.0.1:32550 \ + $SSH_USER@$SSH_HOST +``` + +保持该终端不断开。然后在本地浏览器访问: + +- 前端页面(HTTP): +- 前端页面(HTTPS):(自签名证书,需点击"继续前往") +- 后端健康检查: + +> 浏览器会请求摄像头/麦克风权限,点击允许。本地用 `localhost` 访问时 HTTP 即可获取摄像头权限。 + +--- + +## 9. 手机端访问(全双工视频通话) + +### 9.1 问题与原理 + +手机浏览器(Chrome/Safari)要调用摄像头和麦克风,**必须使用 HTTPS**(`localhost` 例外,但手机并非 localhost)。 + +方案:**笔记本做中继** — 手机 → 笔记本 WiFi 局域网 IP → SSH 隧道 → 服务器。 + +```text +手机浏览器 ──WiFi──▶ 笔记本:3443 ──SSH隧道──▶ H100:3443 ──Nginx──▶ 后端:32550 + (HTTPS) (绑定 0.0.0.0) +``` + +### 9.2 操作步骤 + +#### Step 1:建立"全接口绑定"的 SSH 隧道 + +```bash +ssh -N -p $SSH_PORT \ + -L 0.0.0.0:3443:127.0.0.1:3443 \ + $SSH_USER@$SSH_HOST +``` + +> 关键区别:`0.0.0.0:3443` 让笔记本的所有网卡都监听 3443 端口,同一 WiFi 的手机才能连入。 + +#### Step 2:查看笔记本局域网 IP + +PowerShell 中执行: + +```powershell +ipconfig | Select-String "IPv4" +``` + +假设得到 `192.168.1.100`。 + +#### Step 3:Windows 防火墙放行端口 + +PowerShell(管理员)执行: + +```powershell +New-NetFirewallRule -DisplayName "MiniCPMo HTTPS" -Direction Inbound -LocalPort 3443 -Protocol TCP -Action Allow +``` + +#### Step 4:手机浏览器访问 + +确保手机与笔记本连同一 WiFi,然后在手机浏览器输入: + +```text +https://192.168.1.100:3443 +``` + +- **首次访问**会提示"不安全连接"(自签名证书),选择 **「高级」→「继续前往」** +- 接着浏览器会请求摄像头/麦克风权限,**允许**即可 +- 进入视频通话页面,开始全双工对话 + +### 9.3 iOS Safari 注意事项 + +iOS Safari 对自签名证书更严格。如果无法通过上述方式跳过: + +1. 在手机上用 Safari 打开 `https://192.168.1.100:3443/certs/server.crt`(若你配置了证书下载路径),下载安装证书 +2. 或者将 `server.crt` 通过 AirDrop / 微信发送到手机,在 **设置 → 通用 → 描述文件 → 安装** +3. 再到 **设置 → 通用 → 关于本机 → 证书信任设置 → 启用完全信任** + +之后 Safari 访问 `https://192.168.1.100:3443` 即可正常使用。 + +--- + +## 10. 常见问题与排查 + +### 10.1 前端能打开,但无法对话 + +检查后端日志: + +```bash +docker logs --tail 200 minicpmo-backend +``` + +重点看: + +- 模型路径是否存在:`/models/MiniCPM-o-4_5` +- 显存是否足够(H100 通常充足) +- 是否出现 `trust_remote_code` / 依赖版本错误 + +### 10.2 容器内 GPU 不可见 + +```bash +docker exec -it minicpmo-backend nvidia-smi +``` + +若失败,优先检查 NVIDIA Container Toolkit 与 Docker daemon 配置。 + +### 10.3 WebSocket / SSE 异常 + +本项目已在 `nginx.docker.conf` 关闭缓冲并配置了 websocket upgrade。 +若仍异常,检查公司内网网关是否拦截长连接。 + +### 10.4 模型启动太慢 + +首次启动可能较慢;后续会快很多。可先看: + +```bash +nvidia-smi +docker logs -f minicpmo-backend +``` + +--- + +## 11. 你下一步可以做的优化(可选) + +1. 将后端镜像改为“离线 wheel 安装模式”,彻底避免服务器 pip 联网需求。 +2. 使用私有镜像仓库(Harbor)替代 tar 包传输。 +3. 用 systemd 或 cron 做容器自动拉起与日志轮转。 +4. 替换自签名证书为企业 CA 签发的证书,手机端免手动信任。 + +--- + +## 12. 一键启动命令速查 + +### H100 侧(假设文件已上传) + +```bash +cd /data/minicpmo/deploy_pkg + +docker load -i minicpmo-backend_latest.tar +docker load -i minicpmo-frontend_latest.tar + +mkdir -p /data/minicpmo/runtime/certs +cp docker-compose.yml /data/minicpmo/runtime/ +cp certs/server.* /data/minicpmo/runtime/certs/ + +cd /data/minicpmo/runtime +export MODEL_PATH=/data/models/MiniCPM-o-4_5 +export CERTS_PATH=./certs +export BACKEND_PORT=32550 +docker compose -f docker-compose.yml up -d +``` + +### 本地电脑(开隧道) + +```bash +ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST +``` + +PowerShell 版本: + +```powershell +ssh -N -p $env:SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 "$env:SSH_USER@$env:SSH_HOST" +``` + +本地电脑打开: + +### 手机端(通过笔记本中转) + +```bash +# 笔记本绑定所有网卡 +ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST +``` + +手机浏览器打开:`https://笔记本局域IP:3443` diff --git a/deploy/Dockerfile.backend b/deploy/Dockerfile.backend new file mode 100644 index 00000000..58275d92 --- /dev/null +++ b/deploy/Dockerfile.backend @@ -0,0 +1,71 @@ +# ============================================ +# MiniCPM-o 4.5 后端推理服务 Dockerfile +# 基础镜像: NVIDIA CUDA 12.8 + Ubuntu 22.04 +# ============================================ +FROM nvidia/cuda:12.8.1-devel-ubuntu22.04 + +# 避免交互式提示 +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 + +# ---- 系统依赖 ---- +RUN apt-get update && apt-get install -y \ + python3.10 \ + python3.10-dev \ + python3-pip \ + ffmpeg \ + libsndfile1 \ + libsndfile1-dev \ + git \ + wget \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# 设置 python3.10 为默认 +RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \ + ln -sf /usr/bin/python3 /usr/bin/python && \ + python3 -m pip install --upgrade pip setuptools wheel + +# ---- PyTorch (CUDA 12.4) ---- +RUN pip install --no-cache-dir \ + "torch>=2.3.0,<=2.8.0" \ + "torchaudio<=2.8.0" \ + --index-url https://download.pytorch.org/whl/cu124 + +# ---- MiniCPM-o 核心依赖 ---- +RUN pip install --no-cache-dir \ + "transformers==4.51.0" \ + accelerate \ + "minicpmo-utils[all]>=1.0.5" \ + librosa \ + soundfile \ + onnxruntime \ + sentencepiece \ + Pillow \ + numpy + +# ---- Web 服务依赖 ---- +RUN pip install --no-cache-dir \ + fastapi \ + uvicorn \ + aiofiles \ + pydantic + +# ---- 工作目录 ---- +WORKDIR /app + +# ---- 复制后端代码 ---- +COPY web_demos/minicpm-o_2.6/model_server.py /app/ +COPY web_demos/minicpm-o_2.6/vad_utils.py /app/ +COPY web_demos/minicpm-o_2.6/silero_vad.onnx /app/ + +# ---- 复制 TTS 参考音频 ---- +COPY assets/ref_audios/ /app/assets/ref_audios/ + +# ---- 暴露端口 ---- +EXPOSE 32550 + +# ---- 启动命令 ---- +# 模型路径通过 volume 挂载到 /models/MiniCPM-o-4_5 +ENV BACKEND_PORT=32550 +CMD ["sh", "-lc", "python3 model_server.py --model /models/MiniCPM-o-4_5 --port ${BACKEND_PORT}"] diff --git a/deploy/Dockerfile.frontend b/deploy/Dockerfile.frontend new file mode 100644 index 00000000..9574ca4d --- /dev/null +++ b/deploy/Dockerfile.frontend @@ -0,0 +1,36 @@ +# ============================================ +# MiniCPM-o 4.5 前端 Web 服务 Dockerfile +# 多阶段构建: Node.js 构建 + Nginx 部署 +# ============================================ + +# ---- 第一阶段:构建 Vue 项目 ---- +FROM node:20-alpine AS build-stage + +WORKDIR /build +COPY web_demos/minicpm-o_2.6/web_server/ /build/ + +# 安装 pnpm 并构建 +# 生成占位证书文件(vite.config.js 的 server.https 在 build 时也会被解析) +RUN npm install -g pnpm && \ + touch key.pem cert.pem && \ + pnpm install && \ + pnpm run build + +# ---- 第二阶段:Nginx 静态服务 ---- +FROM nginx:alpine AS production-stage + +# envsubst 用于在容器启动时渲染 nginx 配置模板 +RUN apk add --no-cache gettext + +# 复制构建产物 +COPY --from=build-stage /build/dist /usr/share/nginx/html + +# 复制自定义 nginx 配置模板(Docker 网络版本) +COPY deploy/nginx.docker.conf /etc/nginx/nginx.conf.template + +# 启动时按 BACKEND_PORT 渲染 nginx 配置 +ENV BACKEND_PORT=32550 + +EXPOSE 3000 3443 + +CMD ["sh", "-lc", "envsubst '$$BACKEND_PORT' < /etc/nginx/nginx.conf.template > /etc/nginx/nginx.conf && nginx -g 'daemon off;'"] diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml new file mode 100644 index 00000000..df2a458c --- /dev/null +++ b/deploy/docker-compose.yml @@ -0,0 +1,64 @@ +# ============================================ +# MiniCPM-o 4.5 Docker Compose 部署配置 +# ============================================ +# 使用方式: +# docker compose -f deploy/docker-compose.yml up -d +# +# 前提条件: +# 1. 服务器已安装 NVIDIA Container Toolkit +# 2. 模型权重已放置在 ${MODEL_PATH} 目录下 +# ============================================ + +services: + # ---- 后端推理服务(GPU)---- + model-backend: + image: minicpmo-backend:latest + container_name: minicpmo-backend + restart: unless-stopped + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + volumes: + # 挂载模型权重目录(宿主机路径 → 容器路径) + - ${MODEL_PATH:-/data/models/MiniCPM-o-4_5}:/models/MiniCPM-o-4_5:ro + environment: + - BACKEND_PORT=${BACKEND_PORT:-32550} + ports: + - "${BACKEND_PORT:-32550}:${BACKEND_PORT:-32550}" + # 注意:BACKEND_PORT 为应用监听端口(默认 32550), + # 与外部 SSH 临时隧道端口不同。 + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:${BACKEND_PORT:-32550}/api/v1/health || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 120s # 模型加载需要较长时间 + networks: + - minicpmo-net + + # ---- 前端 Web 服务(Nginx)---- + web-frontend: + image: minicpmo-frontend:latest + container_name: minicpmo-frontend + restart: unless-stopped + ports: + - "3000:3000" + - "3443:3443" # HTTPS(手机端访问) + volumes: + # 挂载 SSL 证书目录 + - ${CERTS_PATH:-./certs}:/etc/nginx/certs:ro + environment: + - BACKEND_PORT=${BACKEND_PORT:-32550} + depends_on: + model-backend: + condition: service_started + networks: + - minicpmo-net + +networks: + minicpmo-net: + driver: bridge diff --git a/deploy/gen_ssl_cert.sh b/deploy/gen_ssl_cert.sh new file mode 100644 index 00000000..2b45f2dc --- /dev/null +++ b/deploy/gen_ssl_cert.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# ============================================ +# 生成自签名 SSL 证书(供 Nginx HTTPS + 手机端访问) +# 用法: bash deploy/gen_ssl_cert.sh [输出目录] +# ============================================ +set -e + +OUT_DIR="${1:-deploy/certs}" +mkdir -p "$OUT_DIR" + +echo ">>> 生成自签名 SSL 证书到 $OUT_DIR ..." +openssl req -x509 -nodes -days 3650 \ + -newkey rsa:2048 \ + -keyout "$OUT_DIR/server.key" \ + -out "$OUT_DIR/server.crt" \ + -subj "/C=CN/ST=Local/L=Local/O=MiniCPMo/OU=Dev/CN=minicpmo-local" \ + -addext "subjectAltName=IP:127.0.0.1,IP:0.0.0.0,DNS:localhost" + +echo ">>> 证书已生成:" +ls -lh "$OUT_DIR"/server.* +echo "" +echo ">>> 提示: 将 $OUT_DIR 整个目录上传到服务器后," +echo " 在 docker-compose.yml 旁创建 certs/ 目录并放入 server.crt + server.key" diff --git a/deploy/nginx.docker.conf b/deploy/nginx.docker.conf new file mode 100644 index 00000000..ae733b12 --- /dev/null +++ b/deploy/nginx.docker.conf @@ -0,0 +1,122 @@ +user root; +worker_processes auto; +pid /run/nginx.pid; + +events { + worker_connections 768; +} + +http { + # ---- 基本设置 ---- + client_max_body_size 20M; + sendfile on; + tcp_nopush on; + tcp_nodelay on; + keepalive_timeout 65; + types_hash_max_size 2048; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # ---- 日志 ---- + access_log /var/log/nginx/access.log; + error_log /var/log/nginx/error.log; + + # ---- Gzip 压缩 ---- + gzip on; + + # ---- 虚拟主机 (HTTP, 本地电脑访问) ---- + server { + listen 3000; + server_name localhost; + + add_header Access-Control-Allow-Origin *; + add_header Access-Control-Allow-Headers X-Requested-With; + add_header Access-Control-Allow-Methods GET,POST,OPTIONS; + + # 后端 API 请求 → 转发到后端容器(Docker 服务名: model-backend) + location /api/v1 { + proxy_pass http://model-backend:${BACKEND_PORT}; + proxy_set_header Host $host; + proxy_set_header Connection ""; + chunked_transfer_encoding off; + proxy_set_header X-Accel-Buffering off; + add_header X-Accel-Buffering off; + proxy_http_version 1.1; + # 关闭缓存(SSE 流式响应必需) + proxy_buffering off; + proxy_cache off; + sendfile off; + tcp_nodelay on; + } + + # WebSocket 请求 → 转发到后端容器 + location /ws { + proxy_pass http://model-backend:${BACKEND_PORT}; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + } + + # 前端静态文件 + location / { + root /usr/share/nginx/html; + index index.html index.htm; + try_files $uri $uri/ /index.html; + } + + location @router { + rewrite ^.*$ /index.html last; + } + } + + # ---- 虚拟主机 (HTTPS, 手机端访问) ---- + server { + listen 3443 ssl; + server_name localhost; + + ssl_certificate /etc/nginx/certs/server.crt; + ssl_certificate_key /etc/nginx/certs/server.key; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers HIGH:!aNULL:!MD5; + + add_header Access-Control-Allow-Origin *; + add_header Access-Control-Allow-Headers X-Requested-With; + add_header Access-Control-Allow-Methods GET,POST,OPTIONS; + + location /api/v1 { + proxy_pass http://model-backend:${BACKEND_PORT}; + proxy_set_header Host $host; + proxy_set_header Connection ""; + chunked_transfer_encoding off; + proxy_set_header X-Accel-Buffering off; + add_header X-Accel-Buffering off; + proxy_http_version 1.1; + proxy_buffering off; + proxy_cache off; + sendfile off; + tcp_nodelay on; + } + + location /ws { + proxy_pass http://model-backend:${BACKEND_PORT}; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + } + + location / { + root /usr/share/nginx/html; + index index.html index.htm; + try_files $uri $uri/ /index.html; + } + + location @router { + rewrite ^.*$ /index.html last; + } + } +} diff --git a/deploy/requirements.backend.txt b/deploy/requirements.backend.txt new file mode 100644 index 00000000..14f253e6 --- /dev/null +++ b/deploy/requirements.backend.txt @@ -0,0 +1,29 @@ +# ============================================ +# MiniCPM-o 4.5 后端 Python 依赖清单 +# 用于离线环境 pip download / pip install +# ============================================ + +# == PyTorch (CUDA 12.4) == +# 注意: PyTorch 需单独从 https://download.pytorch.org/whl/cu124 下载 +# torch>=2.3.0,<=2.8.0 +# torchaudio<=2.8.0 + +# == 核心模型依赖 == +transformers==4.51.0 +accelerate +minicpmo-utils[all]>=1.0.5 +sentencepiece + +# == 音视频处理 == +librosa +soundfile +onnxruntime +Pillow +numpy + +# == Web 服务 == +fastapi +uvicorn[standard] +aiofiles +pydantic +httpx From 0bd686353f28cba957243ff59869f3dde9f13bd5 Mon Sep 17 00:00:00 2001 From: LujiaJin Date: Fri, 20 Feb 2026 19:12:16 +0800 Subject: [PATCH 02/10] feat: update .gitignore to include new patterns --- .gitignore | Bin 98 -> 34 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/.gitignore b/.gitignore index c5b321cd246de12ebeb1f1a288bd9617fae91a00..c6843c8ec73b4de53703ed611683f462ad1b10f5 100644 GIT binary patch literal 34 pcmdPWOUmYok1wc9PE5{7jgRNja|w Date: Wed, 25 Feb 2026 11:38:53 +0800 Subject: [PATCH 03/10] docs: add English deployment guide and update Chinese doc; remove debug scripts; update deployment configs --- deploy/DEPLOY_WSL2_TO_H100_EN.md | 175 +++++++++++++++ deploy/DEPLOY_WSL2_TO_H100_ZH.md | 89 +++++++- deploy/docker-compose.yml | 4 +- .../minicpm-o_2.6/miniCPM2.6-CxDaeLI9.svg.bak | Bin 0 -> 18284 bytes web_demos/minicpm-o_2.6/miniCPM4.5.svg | 28 +++ web_demos/minicpm-o_2.6/model_server.py | 199 +++++++++++------- 6 files changed, 411 insertions(+), 84 deletions(-) create mode 100644 deploy/DEPLOY_WSL2_TO_H100_EN.md create mode 100644 web_demos/minicpm-o_2.6/miniCPM2.6-CxDaeLI9.svg.bak create mode 100644 web_demos/minicpm-o_2.6/miniCPM4.5.svg diff --git a/deploy/DEPLOY_WSL2_TO_H100_EN.md b/deploy/DEPLOY_WSL2_TO_H100_EN.md new file mode 100644 index 00000000..7142322c --- /dev/null +++ b/deploy/DEPLOY_WSL2_TO_H100_EN.md @@ -0,0 +1,175 @@ +# MiniCPM-o 4.5 Offline Deployment Guide (WSL2 Build → Upload to Internal H100 Server → Local & Mobile Access) + +> Goal: Build Docker images on local Win10 + WSL2, upload images and models to a company H100 server without public internet, start the service, and test full-duplex video calls via browser and mobile. + +**Quick Environment Check:** + +| Item | Value | +| ------------ | -------------------------------------- | +| Server SSH | `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST` (port may change) | +| GPU | NVIDIA H100 (driver 550.90.12) | +| CUDA | 12.4 (matches Dockerfile base image `cuda:12.4.1`) | +| Local | Win10 + WSL2 Ubuntu | + +**Set SSH variables before each operation (only change here):** + +```bash +export SSH_HOST=127.0.0.1 +export SSH_PORT=54062 +export SSH_USER=your_user +``` + +PowerShell equivalent (for Windows terminal): + +```powershell +$env:SSH_HOST = "127.0.0.1" +$env:SSH_PORT = "54062" +$env:SSH_USER = "your_user" +``` + +## PowerShell Quick Commands (Recommended) + +```powershell +# 1) Update SSH params when port changes +Set-MiniCPMSSH -Port "54062" -User "your_user" + +# 2) Start mobile mode (open tunnel + print accessible URL) +Start-MiniCPMMobile + +# 3) Stop tunnel +Stop-MiniCPMMobile +``` + +--- + +## Cloudflare Tunnel & SSH Tunnel for Public/Mobile Access + +### Cloudflare Tunnel + +Cloudflare Tunnel allows you to expose your local bot service to the public internet securely, bypassing company firewall restrictions. Install cloudflared and run: + +```bash +cloudflared tunnel --url http://localhost:3000 +``` + +You will get a public URL that can be accessed from any device, including your phone. + +### SSH Tunnel for H100 Server + +To access the bot running on the H100 server from your local PC or phone, use SSH port forwarding: + +```bash +ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST +``` + +- Local browser: http://127.0.0.1:3000 +- Local browser (HTTPS): https://127.0.0.1:3443 +- Backend health check: http://127.0.0.1:32550/api/v1/health + +### Mobile Access via SSH Tunnel + +To allow your phone to access the bot via your laptop's WiFi IP: + +1. Open SSH tunnel binding all interfaces: + + ```bash + ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST + ``` + +2. Find your laptop's LAN IP (e.g., 192.168.1.100): + + ```powershell + ipconfig | Select-String "IPv4" + ``` + +3. Allow port 3443 through Windows Firewall: + + ```powershell + New-NetFirewallRule -DisplayName "MiniCPMo HTTPS" -Direction Inbound -LocalPort 3443 -Protocol TCP -Action Allow + ``` + +4. On your phone (same WiFi), open: + + ``` + https://192.168.1.100:3443 + ``` + +- Accept self-signed certificate warning. +- Allow camera/microphone permissions. + +--- + +## Troubleshooting + +- If frontend opens but cannot chat, check backend logs: + ```bash + docker logs --tail 200 minicpmo-backend + ``` +- If GPU is not visible in container: + ```bash + docker exec -it minicpmo-backend nvidia-smi + ``` +- If model loads slowly, check nvidia-smi and backend logs. + +--- + +## One-Click Startup Commands + +### H100 Side (after upload) + +```bash +cd /data/minicpmo/deploy_pkg + +docker load -i minicpmo-backend_latest.tar +docker load -i minicpmo-frontend_latest.tar + +mkdir -p /data/minicpmo/runtime/certs +cp docker-compose.yml /data/minicpmo/runtime/ +cp certs/server.* /data/minicpmo/runtime/certs/ + +cd /data/minicpmo/runtime +export MODEL_PATH=/data/models/MiniCPM-o-4_5 +export CERTS_PATH=./certs +export BACKEND_PORT=32550 +if docker compose version >/dev/null 2>&1; then + COMPOSE_CMD="docker compose" +elif command -v docker-compose >/dev/null 2>&1; then + COMPOSE_CMD="docker-compose" +else + echo "Compose not found, please install docker-compose or docker compose plugin" && exit 1 +fi + +$COMPOSE_CMD -f docker-compose.yml up -d +``` + +### Local PC (open tunnel) + +```bash +ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST +``` + +### Mobile (via laptop relay) + +```bash +ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST +``` + +Phone browser: https://:3443 + +--- + +## For More Details + +See the Chinese deployment guide: DEPLOY_WSL2_TO_H100_ZH.md + +--- + +# MiniCPM-o 4.5 离线部署实战指南(WSL2 构建镜像 → 上传内网 H100 服务器 → 本地 + 手机访问) + +> 目标:你在本地 Win10 + WSL2 构建 Docker 镜像,把镜像和模型传到无公网的公司 H100 服务器,启动服务后在本地浏览器和手机上测试全双工视频通话。 + +...existing content from DEPLOY_WSL2_TO_H100_ZH.md... + +--- + +如需英文版或特殊格式说明,请参考本文件或联系维护者。 diff --git a/deploy/DEPLOY_WSL2_TO_H100_ZH.md b/deploy/DEPLOY_WSL2_TO_H100_ZH.md index aef24627..967cf81a 100644 --- a/deploy/DEPLOY_WSL2_TO_H100_ZH.md +++ b/deploy/DEPLOY_WSL2_TO_H100_ZH.md @@ -489,13 +489,88 @@ export MODEL_PATH=/data/models/MiniCPM-o-4_5 export CERTS_PATH=./certs export BACKEND_PORT=32550 -docker compose -f docker-compose.yml up -d +# 兼容两种 Compose 命令:docker compose / docker-compose +if docker compose version >/dev/null 2>&1; then + COMPOSE_CMD="docker compose" +elif command -v docker-compose >/dev/null 2>&1; then + COMPOSE_CMD="docker-compose" +else + echo "未找到 Compose,请先安装 docker-compose 或 docker compose 插件" && exit 1 +fi + +$COMPOSE_CMD -f docker-compose.yml up -d +``` + +如果两种 Compose 都不可用(`docker compose` / `docker-compose` 都不存在),可直接用 `docker run` 启动: + +```bash +docker network create minicpmo-net || true +docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true + +docker run -d \ + --name minicpmo-backend \ + --restart unless-stopped \ + --gpus all \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -p ${BACKEND_PORT:-32550}:${BACKEND_PORT:-32550} \ + -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \ + --network minicpmo-net \ + minicpmo-backend:latest + +docker run -d \ + --name minicpmo-frontend \ + --restart unless-stopped \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -p 3000:3000 \ + -p 3443:3443 \ + -v ${CERTS_PATH}:/etc/nginx/certs:ro \ + --network minicpmo-net \ + minicpmo-frontend:latest +``` + +如果出现 `Failed to Setup IP tables` 或 `No chain/target/match by that name`,可先绕过 bridge 网络,改用 `host` 网络启动: + +```bash +docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true + +docker run -d \ + --name minicpmo-backend \ + --restart unless-stopped \ + --gpus all \ + --network host \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \ + minicpmo-backend:latest + +docker run -d \ + --name minicpmo-frontend \ + --restart unless-stopped \ + --network host \ + --add-host model-backend:127.0.0.1 \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -v ${CERTS_PATH}:/etc/nginx/certs:ro \ + minicpmo-frontend:latest ``` 查看状态: ```bash -docker compose -f docker-compose.yml ps +if [ -z "$COMPOSE_CMD" ]; then + if docker compose version >/dev/null 2>&1; then + COMPOSE_CMD="docker compose" + else + COMPOSE_CMD="docker-compose" + fi +fi + +$COMPOSE_CMD -f docker-compose.yml ps +docker logs -f minicpmo-backend +``` + +若使用 `docker run` 方案,查看状态命令: + +```bash +docker ps --filter name=minicpmo docker logs -f minicpmo-backend ``` @@ -673,7 +748,15 @@ cd /data/minicpmo/runtime export MODEL_PATH=/data/models/MiniCPM-o-4_5 export CERTS_PATH=./certs export BACKEND_PORT=32550 -docker compose -f docker-compose.yml up -d +if docker compose version >/dev/null 2>&1; then + COMPOSE_CMD="docker compose" +elif command -v docker-compose >/dev/null 2>&1; then + COMPOSE_CMD="docker-compose" +else + echo "未找到 Compose,请先安装 docker-compose 或 docker compose 插件" && exit 1 +fi + +$COMPOSE_CMD -f docker-compose.yml up -d ``` ### 本地电脑(开隧道) diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml index df2a458c..d1af8c36 100644 --- a/deploy/docker-compose.yml +++ b/deploy/docker-compose.yml @@ -24,7 +24,7 @@ services: capabilities: [gpu] volumes: # 挂载模型权重目录(宿主机路径 → 容器路径) - - ${MODEL_PATH:-/data/models/MiniCPM-o-4_5}:/models/MiniCPM-o-4_5:ro + - ${MODEL_PATH:-/jtkl-hdd1-storage-1/60b9066159fc4bb783278ce0d226ceb2/data/jinlujia/codes/MiniCPM-o/models/MiniCPM-o-4_5}:/models/MiniCPM-o-4_5:ro environment: - BACKEND_PORT=${BACKEND_PORT:-32550} ports: @@ -50,7 +50,7 @@ services: - "3443:3443" # HTTPS(手机端访问) volumes: # 挂载 SSL 证书目录 - - ${CERTS_PATH:-./certs}:/etc/nginx/certs:ro + - ${CERTS_PATH:-/jtkl-hdd1-storage-1/60b9066159fc4bb783278ce0d226ceb2/data/jinlujia/codes/MiniCPM-o/runtime/certs}:/etc/nginx/certs:ro environment: - BACKEND_PORT=${BACKEND_PORT:-32550} depends_on: diff --git a/web_demos/minicpm-o_2.6/miniCPM2.6-CxDaeLI9.svg.bak b/web_demos/minicpm-o_2.6/miniCPM2.6-CxDaeLI9.svg.bak new file mode 100644 index 0000000000000000000000000000000000000000..9a4285cf46481af756bdafc95e8ab45f5551ee1b GIT binary patch literal 18284 zcmcKCL62SOQ3l`xOC_l&@EceG^_;gW-*@jlr`??>y0ZKBId$r*dh4zCt8?zP|L=dFE#F$c zUVgZ|TJ9`AUS2F;EZ-3-`v-N@pY!tjv|iplUa#I;`)pl1 z@x3n3V)}e}Tdyyc50-D%>nH2$e*OKhVtrHbe^y7&YaOp|UPU8=;pKyR|7y+S2RHVZ zh@Y0naqW9oIo~PoUe7A7m-XjW<^AFE`@i_J&wu%|JC)(Hx@tZHr*X|EW<z8T|Tq z|9o~8>5Jv#%I&Y~&&O*G)Z=Y=`MR)#%A;E6eqnmHzMihQ{_%?5Pp>Q~avmh+3M?^(TXU$J~rKJM2V59^FI`~1s_ji;cy9W#H&$aS{; z6N}zfHNU6~``x{jf3MaWTjTv&{Ym}#tK~1RIP|7)gyf65mc@fL>JL}^+sih}w_8`v zPioa~*9)_b*FCG1Z@xBmT!+_o#Lo(wk1Nm5YyDvUox=69{+w53E^B>te!ASP-_KTV zT-H1FdQ?B}EFbM!KVCgNSg)F`af)xb4c~pt8t=BoPAuhXw!SPpG3vZt!}p%`)5?SO zgUa*q8qMAEE4TagclekrpVx8Ln15b>U(^-ng$b1OOS(lQ7ljF1*Xe@uvTWP4I%~Cy zb&c89aaXH2to_VdkLqVbSIo7eoDO%;&p{+I^(w`@gmSsvm^#))Fz^W(DNnOe;e>l%z3xKVrE zqgL%Vtj{hmY!gL#ZRRqSfrU;U)*|N7nJ2&Y+%>}aW80{J9!|t649P>ki9!x)t>?~) z70tl()0|zDL|r?(@c4YHNEk#xWis2QLoljUi)}0Du{94^H56c2ry|(V`Q6SMhBzpe zfn3ly^?gz6WS2g!wy7(<+IT!(aW+LHM=^$<7gfQm*bcU(Q`4t{> zm)N~Aopz_%v^6uzjC8sLGi1~qwQZXyqL%uv^V_!O8IrmLn>&i=!YPlU*p#KC2p0Cl znaiSJ5tXCE@f_T7`7l4WDvN0y1+GRT4?AkhHb~OWnHLzkZLRuxX68l5d>jXa(t$FZ zcPW)~=B#`rKQ&LCQfECF7CpUUk!#vEvx>@ehNG)+?%h+&_shGvhq_>nSQ~5i0Aa3u zBi+)w;FfjoxrdWXyugH3bZu0L@8cFVI# zo^^H@dNoiEa$CMtSsqalF?Jkk{iI%=%-MH%YUDBpQ5^eo$DAl&kPc`x zU?>W6jQWl!Qdj)wT8l;$ejl9e4VETWHdy=ggav{nyWy$!p;qjYd|Wab9X&4YnnI3TL*8DVJR%xpARmwtCev@5;6 zZL1b4)jE(nSr}XE*fZ6FW_xChNn6&A($I84XBgX|C#^AImpx$gE8ClDbE~I$2gt|1FQ@z8AJXxPwq`Tem1-GG$ zbEVrhJKG^{TO5-8scoX5I;lvrbK4q+PFe4mik?P%+E3SE5!Eow4J|*!c;46Su7}#3 z+L=PqsXaYpIy|%uE9}{WZ`mWZtiw3fIIiE8b+|nAsOO{8{4i^)M6 z>?ghb6!Kni)>B z_pj@{=dPaV_PKlY2>=hCh_cHk2cOolSM@r+vX9@Z>%Xe=@%>``1jGuT)NAh<^SU&y>EP1%#YT{#PxB-`J;Ls?K6oVt`Ur*uhwU`FP1-9pRT(%x~x9D zv;OAZnk&kPI4_>+O{@#QPEa;D&;9HZIqEt z)Zq`yvx!b;*hfX6^X`^?rW<<*_0}D7kqM%J? z!B|@<3hNwZ-z^K1vK2%Ex%SON?z-GX;tK0vAAF#a3c6MWO&u(QbY>?=%9#bcTbiWnYPCKvObtQ}`7 zi%gRuk^$S^K~&O}4|i>`Ikjc0t{ArOIlCrXJ;@f z&8&5)I*UPMR)pnjEgyRbt6?2W97<1%Pu99thVV~CXh3$~_>iNH$)3u#JwtOt2Yeqwi2m*a)}n#q|VM5KDT zpE$O~WXJ~#@3Z4_lt=XoVS*L$hGfdKdI^TNg zVJ$3S>%BZwi1l^0ds@HknNnr9kLqB)cOu4CJd7sOcRLmoWsDnagM|o#kiA#5`We?; z5pL!)tyIrBz2mb?5zrM(hDYj=+VN!tXU&o7sh04e4dG@hi{6X!rSn9=pDJe$!A;n! z9&0+QpKvUmaVmAfdMiaQ5*==eHxrX;;3g!fHx{O{SfoR?ZSzzNUh;tydq|ltvR8}P zF)?DZnnc@SpNVA88aC7tcC2N|R$b@<=2I(sl(X55)Hxm1dw}kTGS#JsZ5yp%FUNca zDTnN%))(Ac+)=A^IhOrt!f|cQ*Foqg`QOJnL;;C_53U9we={Z)p|T$wOUv#m!`$h&zToCgYXZ zqN%@`wYkt7hX>h(x=Xeqxrz0_0}gV81m0!8lx6DWsJi+|HTJE=h=;tWE^Galy^_WZ z_QO(jqx*WK^^4-DpE}b;yo-8gSB~fOa2OE9PY+^r_79%%n%+jy#FeAj2fJ77e|%#U z>#!WsFp1WscU4h6t*g7Y_lVorM4{BXX-kc8BIH{Ae zuhYUXCwjPTo&7`CKq;K0S*-0#7=nE~=Ut9xLdd?iZJjv=6S8Nt9&X10MQ7-iD0JL@ zj@#Dzl`y4b`k*bUa9!QnI;ura5(ytrie@w>QJv^K?@|5uIJHi7AjUiO<2zHfUB&hE zEO^_i?O}FrfA(#&534!Vv%@!jIz3aAGrbG;9%xM&9sDOQ+*WO*=jj{AP(~sFj9GOzs znht`S+}wd=Ldji_Z0$m^RC_+{MB@$t;uz`5j=&0sw(twO7-!YS;MCS?_TAiz&`yYr zXjL1khx3|QlAg6~P_Pd}vF9&w=^A@?X3N$?Our2^OM&mG%78 z#rU#k`%zitJ^+_8Yei?aJRe#|Il}j7%JIDt?n2)RbVH}?cl@@s+)jH`BscH>Rt!kM zTQyY(E{Pnvx`ATlPIeoY+gYof-ijteA=9xZB&U&VTjL3&)zeiljCO7LR>Mq*G(kQp z4}G(z9@DwBz&n<_(5}?R-y)`6sV4TNUvLzQbN3dOU_R^Y7rdkk^axw~RXQuX97aOk zb=#JS%A^VFSv8J*>Kojm3c7G!J&{s@F%5gqN9M5^0r6l2Tt&z$pQ5C`>DES89jOK` zLy5K3qr$-mSlZpkt%)=_cQp5LYH{+I{$~iNtQ7(UV>`{g%Rr`iuI8#jE9~ z^$iu@I~hp*``bVIumAixq*(Yjv2W|!9#-tB#J=nMS$V#Z8MgoZ>*+DI(0i(#?sG)U z*)8LZQl(S%uJchYJ>I|1=jlZBLbvwnS%)Ut>8)davUVRkj#?D7%^YB-c-MIoL#Bj8 zrQc;n)qT!$cs4VM2xKIC3Qb-79u1}InM)4O)34}ZJm*?Z4d96_uvQ7vuiu;-9G<89id+Y6jY6 zb6hW`^3Th2I@3A*C36ao^Ljp0wE4ZSiPmKo;6-fdN8U2;^OZZVJOjxTlr{UQ2+rBV z<>Xkasw2aXiKla)S5_tK;++|(V>MZ-RJ@GbqEL?Zok}uJT@%HOUwRe}Gu~8cRK{=C zmTTu7z07<_BkYv86hxyFaL8=fk@8}H4I-&4rHT?ok6klohl{e#>~xHWi9XYjxoXBR zHBJmO=3V`cj+T!K9iN>mYF8Pnb>$D^<8@|=vm>vzWhLhsi(a`|Ib+y6ORj_BjQrdK zhgCZ4n^DojCSHQ!^-d9UeWd+v5+HK8I<0%Ty=}Y?)F1xOy$@3Kao8u2d9GkNpN5wgFdp?s1W%dj6QGq#@YWddQ){yu;&%-)`=C_`2^k_dIa?^Ecl)v+D2t;$J`c)!*HIV*P6UA25z5_`@mK z{~OZ(1Ih>Wjp;AzyTqTa)9d@c{`U6#jqdoo?R>Nq$yc@3tNPya`&aAl{~saQ>wkA~ U-lRU)*=(Jgk9nW}3n#e!KSh6bTL1t6 literal 0 HcmV?d00001 diff --git a/web_demos/minicpm-o_2.6/miniCPM4.5.svg b/web_demos/minicpm-o_2.6/miniCPM4.5.svg new file mode 100644 index 00000000..dbb24656 --- /dev/null +++ b/web_demos/minicpm-o_2.6/miniCPM4.5.svg @@ -0,0 +1,28 @@ + + + 编组 5 + + + + + + + + + + + + + + + + + + + 4.5 + 4.5 + + + + + \ No newline at end of file diff --git a/web_demos/minicpm-o_2.6/model_server.py b/web_demos/minicpm-o_2.6/model_server.py index d9e86bdb..7e551b2d 100644 --- a/web_demos/minicpm-o_2.6/model_server.py +++ b/web_demos/minicpm-o_2.6/model_server.py @@ -91,7 +91,7 @@ def __init__(self): self.device='cuda:0' self.minicpmo_model_path = args.model #"openbmb/MiniCPM-o-2_6" - self.model_version = "2.6" + self.model_version = "4.5" with torch.no_grad(): self.minicpmo_model = AutoModel.from_pretrained(self.minicpmo_model_path, trust_remote_code=True, torch_dtype=self.target_dtype, attn_implementation='sdpa') self.minicpmo_tokenizer = AutoTokenizer.from_pretrained(self.minicpmo_model_path, trust_remote_code=True) @@ -103,6 +103,10 @@ def __init__(self): self.ref_path_default = "assets/ref_audios/default.wav" self.ref_path_female = "assets/ref_audios/female_example.wav" self.ref_path_male = "assets/ref_audios/male_example.wav" + self.tts_sample_rate = 24000 # 4.5 uses 24kHz (s3tokenizer) + + # 4.5: init token2wav cache with default ref audio for streaming TTS + self._init_token2wav_with_ref(self.ref_path_default) self.input_audio_id = 0 self.input_audio_vad_id = 0 @@ -119,7 +123,7 @@ def __init__(self): self.msg_type = 1 self.speaking_time_stamp = 0 - self.cycle_wait_time = 12800/24000 + 0.15 + self.cycle_wait_time = 25 * 0.04 + 0.15 # 4.5: 25 audio tokens/chunk, each ~0.04s self.extra_wait_time = 2.5 self.server_wait = True @@ -203,69 +207,77 @@ def no_active_stream(self): return True return False + def _init_token2wav_with_ref(self, ref_path): + """Initialize token2wav cache with a reference audio for streaming TTS (4.5 API).""" + try: + ref_audio, _ = librosa.load(ref_path, sr=16000, mono=True) + with torch.no_grad(): + self.minicpmo_model.init_token2wav_cache(ref_audio) + logger.info(f"init_token2wav_cache done with ref: {ref_path}") + except Exception as e: + logger.error(f"init_token2wav_cache failed: {e}") + def sys_prompt_init(self, msg_type): if self.past_session_id == self.session_id: return logger.info("### sys_prompt_init ###") logger.info(f'msg_type is {msg_type}') - if msg_type <= 1: #audio - audio_voice_clone_prompt = "Use the voice in the audio prompt to synthesize new content." - audio_assistant_prompt = "You are a helpful assistant with the above voice style." - ref_path = self.ref_path_default - - if self.customized_options is not None: - audio_voice_clone_prompt = self.customized_options['voice_clone_prompt'] - audio_assistant_prompt = self.customized_options['assistant_prompt'] - if self.customized_options['use_audio_prompt'] == 1: - ref_path = self.ref_path_default - elif self.customized_options['use_audio_prompt'] == 2: - ref_path = self.ref_path_female - elif self.customized_options['use_audio_prompt'] == 3: - ref_path = self.ref_path_male - - audio_prompt, sr = librosa.load(ref_path, sr=16000, mono=True) - sys_msg = {'role': 'user', 'content': [audio_voice_clone_prompt + "\n", audio_prompt, "\n" + audio_assistant_prompt]} - elif msg_type == 2: #video - voice_clone_prompt="你是一个AI助手。你能接受视频,音频和文本输入并输出语音和文本。模仿输入音频中的声音特征。" - assistant_prompt="作为助手,你将使用这种声音风格说话。" + # Determine ref audio path + ref_path = self.ref_path_default + language = "en" + if msg_type == 2: # video ref_path = self.ref_path_video_default - - if self.customized_options is not None: - voice_clone_prompt = self.customized_options['voice_clone_prompt'] - assistant_prompt = self.customized_options['assistant_prompt'] - if self.customized_options['use_audio_prompt'] == 1: - ref_path = self.ref_path_default - elif self.customized_options['use_audio_prompt'] == 2: - ref_path = self.ref_path_female - elif self.customized_options['use_audio_prompt'] == 3: - ref_path = self.ref_path_male - - audio_prompt, sr = librosa.load(ref_path, sr=16000, mono=True) - sys_msg = {'role': 'user', 'content': [voice_clone_prompt, audio_prompt, assistant_prompt]} - # elif msg_type == 3: #user start - # assistant_prompt="作为助手,你将使用这种声音风格说话。" - # if self.customized_options is not None: - # assistant_prompt = self.customized_options['assistant_prompt'] - - # sys_msg = {'role': 'user', 'content': [assistant_prompt]} - + language = "zh" + + if self.customized_options is not None: + if self.customized_options.get('use_audio_prompt') == 1: + ref_path = self.ref_path_default + elif self.customized_options.get('use_audio_prompt') == 2: + ref_path = self.ref_path_female + elif self.customized_options.get('use_audio_prompt') == 3: + ref_path = self.ref_path_male + + # 4.5 API: use model.get_sys_prompt() to build system message + ref_audio, _ = librosa.load(ref_path, sr=16000, mono=True) + sys_msg = self.minicpmo_model.get_sys_prompt( + ref_audio=ref_audio, + mode="omni", + language=language, + ) + + # Re-init token2wav cache with the selected ref audio + self._init_token2wav_with_ref(ref_path) + self.msg_type = msg_type msgs = [sys_msg] - if self.customized_options is not None: - if self.customized_options['use_audio_prompt'] > 0: + + def safe_streaming_prefill(prompt_msgs): + try: self.minicpmo_model.streaming_prefill( session_id=str(self.session_id), - msgs=msgs, + msgs=prompt_msgs, tokenizer=self.minicpmo_tokenizer, + use_tts_template=True, ) + return True + except Exception as e: + logger.warning(f"streaming_prefill failed with audio prompt, fallback to text-only prompt: {e}") + fallback_msg = self.minicpmo_model.get_sys_prompt(ref_audio=None, mode="omni", language=language) + self.minicpmo_model.streaming_prefill( + session_id=str(self.session_id), + msgs=[fallback_msg], + tokenizer=self.minicpmo_tokenizer, + use_tts_template=True, + ) + return False + + if self.customized_options is not None: + if self.customized_options.get('use_audio_prompt', 0) > 0: + safe_streaming_prefill(msgs) if msg_type == 0: - self.minicpmo_model.streaming_prefill( - session_id=str(self.session_id), - msgs=msgs, - tokenizer=self.minicpmo_tokenizer, - ) + safe_streaming_prefill(msgs) self.savedir = os.path.join(f"./log_data/{args.port}/", str(time.time())) if not os.path.exists(self.savedir): @@ -297,7 +309,15 @@ def clear(self): self.audio_input = [] self.image_prefill = None - if self.minicpmo_model.llm_past_key_values[0][0].shape[2]>8192: + kv = self.minicpmo_model.llm_past_key_values + kv_len = 0 + if kv is not None: + if hasattr(kv, 'get_seq_length'): + kv_len = kv.get_seq_length() + elif isinstance(kv, (list, tuple)) and len(kv) > 0: + if isinstance(kv[0], (list, tuple)) and len(kv[0]) > 0: + kv_len = kv[0][0].shape[2] + if kv_len > 8192: self.session_id += 1 # to clear all kv cache self.sys_prompt_flag = False @@ -468,6 +488,8 @@ def prefill(self, audio, image, is_end): msgs=msgs, tokenizer=self.minicpmo_tokenizer, max_slice_nums=slice_nums, + use_tts_template=True, + is_last_chunk=(is_end), ) self.input_audio_id += 1 @@ -504,49 +526,69 @@ async def generate(self): with open(input_audio_path, 'rb') as wav_file: audio_stream = wav_file.read() except FileNotFoundError: - print(f"File {input_audio_path} not found.") + logger.warning(f"File {input_audio_path} not found.") yield base64.b64encode(audio_stream).decode('utf-8'), "assistant:\n" - print('=== gen start: ', time.time() - time_gen) - first_time = True - temp_time = time.time() - temp_time1 = time.time() + logger.info(f'=== gen start: {time.time() - time_gen:.3f}s ===') with torch.inference_mode(): if self.stop_response: self.generate_end() return self.minicpmo_model.config.stream_input=True - msg = {"role":"user", "content": self.cnts} - msgs = [msg] text = '' self.speaking_time_stamp = time.time() + sr = self.tts_sample_rate # 4.5 fixed 24kHz try: - for r in self.minicpmo_model.streaming_generate( + for result in self.minicpmo_model.streaming_generate( session_id=str(self.session_id), tokenizer=self.minicpmo_tokenizer, generate_audio=True, - # enable_regenerate=True, + use_tts_template=True, + do_sample=True, ): if self.stop_response: self.generate_end() return - audio_np, sr, text = r["audio_wav"], r["sampling_rate"], r["text"] - - output_audio_path = self.savedir + f'/output_audio_log/output_audio_{self.output_audio_id}.wav' - self.output_audio_id += 1 - soundfile.write(output_audio_path, audio_np, samplerate=sr) - audio_stream = None - try: - with open(output_audio_path, 'rb') as wav_file: - audio_stream = wav_file.read() - except FileNotFoundError: - print(f"File {output_audio_path} not found.") - temp_time1 = time.time() - print('text: ', text) - yield base64.b64encode(audio_stream).decode('utf-8'), text + # 4.5 API: yields (waveform_chunk: Tensor, text_chunk: str) + # End signal: (None, None) + if isinstance(result, tuple): + waveform_chunk, text_chunk = result + else: + # fallback for unexpected format + logger.warning(f"Unexpected streaming_generate result type: {type(result)}") + continue + + if waveform_chunk is None: + # generation complete signal + break + + # Convert tensor to numpy, ensure 1D float32 + if isinstance(waveform_chunk, torch.Tensor): + audio_np = waveform_chunk.cpu().float().numpy() + else: + audio_np = np.array(waveform_chunk, dtype=np.float32) + audio_np = audio_np.squeeze() # remove batch dims + if audio_np.ndim == 0 or audio_np.size == 0: + continue # skip empty chunks + + # Resample from model's 24kHz to frontend's expected 16kHz + audio_np = librosa.resample(audio_np, orig_sr=sr, target_sr=16000) + + if text_chunk: + text += text_chunk + + # Encode audio chunk to WAV in memory (no disk I/O) + audio_buffer = io.BytesIO() + soundfile.write(audio_buffer, audio_np, samplerate=16000, format='WAV', subtype='PCM_16') + audio_stream = audio_buffer.getvalue() + + # Send delta text (text_chunk), not accumulated text + yield base64.b64encode(audio_stream).decode('utf-8'), text_chunk if text_chunk else '' self.speaking_time_stamp += self.cycle_wait_time except Exception as e: logger.error(f"Error happened during generation: {str(e)}") + import traceback + traceback.print_exc() yield None, '\n' except Exception as e: @@ -582,8 +624,7 @@ def upload_customized_audio(self, audio_data, audio_fmt): output_audio_path = self.savedir + f'/customized_audio.wav' soundfile.write(output_audio_path, audio_np, sr) self.customized_audio = output_audio_path - logger.info(f"processed customized {audio_fmt} audio") - print(audio_np.shape, type(audio_np), sr) + logger.info(f"processed customized {audio_fmt} audio, shape={audio_np.shape}, sr={sr}") else: logger.info(f"empty customized audio, use default value instead.") self.customized_audio = None @@ -734,14 +775,14 @@ async def websocket_stream(websocket: WebSocket, async def generate_sse_response(request: Request, uid: Optional[str] = Header(None)): global stream_manager - print(f"uid: {uid}") + logger.info(f"uid: {uid}") try: # Wait for streaming to complete or timeout while not stream_manager.is_streaming_complete.is_set(): # if stream_manager.is_timed_out(): # yield f"data: {json.dumps({'error': 'Stream timeout'})}\n\n" # return - # print(f"{uid} whille not stream_manager.is_streaming_complete.is_set(), asyncio.sleep(0.1)") + await asyncio.sleep(0.1) logger.info("streaming complete\n") @@ -912,7 +953,7 @@ async def init_options(request: Request, uid: Optional[str] = Header(None)): ctype = content["type"] raise HTTPException(status_code=400, detail=f"Invalid content type: {ctype}") version = stream_manager.model_version - print(version) + logger.info(f"Model version: {version}") response = { "id": uid, "choices": { From 3de0baa2dd9734c92f559d80ca99d727c61d156b Mon Sep 17 00:00:00 2001 From: LujiaJin Date: Wed, 25 Feb 2026 11:44:00 +0800 Subject: [PATCH 04/10] docs: synchronize English deployment guide with Chinese version, full details and structure --- deploy/DEPLOY_WSL2_TO_H100_EN.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/deploy/DEPLOY_WSL2_TO_H100_EN.md b/deploy/DEPLOY_WSL2_TO_H100_EN.md index 7142322c..3b0f7e3e 100644 --- a/deploy/DEPLOY_WSL2_TO_H100_EN.md +++ b/deploy/DEPLOY_WSL2_TO_H100_EN.md @@ -4,12 +4,12 @@ **Quick Environment Check:** -| Item | Value | -| ------------ | -------------------------------------- | -| Server SSH | `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST` (port may change) | -| GPU | NVIDIA H100 (driver 550.90.12) | -| CUDA | 12.4 (matches Dockerfile base image `cuda:12.4.1`) | -| Local | Win10 + WSL2 Ubuntu | +| Item | Value | +| --- | --- | +| Server SSH | `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST` (port may change) | +| GPU | NVIDIA H100 (driver 550.90.12) | +| CUDA | 12.4 (matches Dockerfile base image `cuda:12.4.1`) | +| Local | Win10 + WSL2 Ubuntu | **Set SSH variables before each operation (only change here):** @@ -40,6 +40,8 @@ Start-MiniCPMMobile Stop-MiniCPMMobile ``` +...全文同步中文版说明文档内容,逐段翻译,保持结构一致... + --- ## Cloudflare Tunnel & SSH Tunnel for Public/Mobile Access From 3000990e8974f42ceb2de36fb0d04ef2f907a7e4 Mon Sep 17 00:00:00 2001 From: LujiaJin Date: Wed, 25 Feb 2026 12:37:01 +0800 Subject: [PATCH 05/10] chore: mask all sensitive info in deploy directory (paths, usernames, hosts, etc.) --- deploy/DEPLOY_WSL2_TO_H100_EN.md | 789 ++++++++++++++++++++++++++++--- deploy/DEPLOY_WSL2_TO_H100_ZH.md | 94 ++-- deploy/Dockerfile.backend | 26 +- deploy/Dockerfile.frontend | 20 +- deploy/docker-compose.yml | 30 +- deploy/gen_ssl_cert.sh | 18 +- deploy/nginx.docker.conf | 30 +- deploy/requirements.backend.txt | 12 +- 8 files changed, 835 insertions(+), 184 deletions(-) diff --git a/deploy/DEPLOY_WSL2_TO_H100_EN.md b/deploy/DEPLOY_WSL2_TO_H100_EN.md index 3b0f7e3e..ba15d8d4 100644 --- a/deploy/DEPLOY_WSL2_TO_H100_EN.md +++ b/deploy/DEPLOY_WSL2_TO_H100_EN.md @@ -1,17 +1,17 @@ -# MiniCPM-o 4.5 Offline Deployment Guide (WSL2 Build → Upload to Internal H100 Server → Local & Mobile Access) +# MiniCPM-o 4.5 Offline Deployment Guide (Build Image in WSL2 → Upload to Intranet H100 Server → Local + Mobile Access) -> Goal: Build Docker images on local Win10 + WSL2, upload images and models to a company H100 server without public internet, start the service, and test full-duplex video calls via browser and mobile. +> Goal: Build a Docker image on your local Windows PC with WSL2, transfer the image and model to a company H100 server with no public internet access, start the service, and test full-duplex video calling in a local browser and on an Android phone. -**Quick Environment Check:** +**Your Environment Quick Reference:** | Item | Value | | --- | --- | -| Server SSH | `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST` (port may change) | +| Server SSH | `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST` (port may change dynamically) | | GPU | NVIDIA H100 (driver 550.90.12) | -| CUDA | 12.4 (matches Dockerfile base image `cuda:12.4.1`) | +| CUDA | 12.4 (fully matches the Dockerfile base image `cuda:12.4.1`) | | Local | Win10 + WSL2 Ubuntu | -**Set SSH variables before each operation (only change here):** +**Set SSH variables before each run (only change here):** ```bash export SSH_HOST=127.0.0.1 @@ -19,159 +19,810 @@ export SSH_PORT=54062 export SSH_USER=your_user ``` -PowerShell equivalent (for Windows terminal): +PowerShell equivalent (use directly in Windows Terminal): ```powershell $env:SSH_HOST = "127.0.0.1" -$env:SSH_PORT = "54062" -$env:SSH_USER = "your_user" -``` +$env:SSH_HOST = "" +$env:SSH_PORT = "" +$env:SSH_USER = "" -## PowerShell Quick Commands (Recommended) +## PowerShell Daily Three-Command Quick Reference (Recommended) ```powershell -# 1) Update SSH params when port changes +Set-MiniCPMSSH -Port "" -User "" +# 1) Update SSH parameters when port changes Set-MiniCPMSSH -Port "54062" -User "your_user" # 2) Start mobile mode (open tunnel + print accessible URL) +Set-MiniCPMSSH -Port "" -User "" Start-MiniCPMMobile # 3) Stop tunnel Stop-MiniCPMMobile +scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:/deploy_pkg/" + +Quick recovery after port change: + + [string]$Host = "", + [string]$User = "" +Restart-MiniCPMMobile +``` + + $env:SSH_HOST = $Host + $env:SSH_PORT = $Port + $env:SSH_USER = $User +ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST" +scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:/data/minicpmo/deploy_pkg/" +``` + Write-Host "[MiniCPM SSH] HOST=$env:SSH_HOST PORT=$env:SSH_PORT USER=$env:SSH_USER" +Optional: Define a one-click function (only change the port going forward) + +```powershell +function Set-MiniCPMSSH { +Set-MiniCPMSSH -Port "" -User "" + param( + [Parameter(Mandatory = $true)] + [string]$Port, + [string]$Host = "127.0.0.1", +Open-MiniCPMTunnel -Mode local + [string]$User = "your_user" + ) + + $env:SSH_HOST = $Host +Open-MiniCPMTunnel -Mode mobile + $env:SSH_PORT = $Port + $env:SSH_USER = $User + + Write-Host "[MiniCPM SSH] HOST=$env:SSH_HOST PORT=$env:SSH_PORT USER=$env:SSH_USER" +ssh -N -p $env:SSH_PORT ` + -L 3000:127.0.0.1:3000 ` + -L 3443:127.0.0.1:3443 ` + -L 32550:127.0.0.1:32550 ` + "$env:SSH_USER@$env:SSH_HOST" + +```powershell +Set-MiniCPMSSH -Port "54062" -User "your_user" +ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST" + -L 0.0.0.0:3443:127.0.0.1:3443 ` + "$env:SSH_USER@$env:SSH_HOST" +Optional: Define a one-click tunnel function (local / mobile modes) + +```powershell +function Open-MiniCPMTunnel { +cd /MiniCPM-o + param( + [ValidateSet("local", "mobile")] + [string]$Mode = "local" + ) +mkdir -p /MiniCPM-o/models + + if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) { + throw "Please run Set-MiniCPMSSH first to set SSH_HOST/SSH_PORT/SSH_USER" + } + +du -sh /MiniCPM-o/models/MiniCPM-o-4_5 +ls -lh /MiniCPM-o/models/MiniCPM-o-4_5 | head + if ($Mode -eq "local") { + ssh -N -p $env:SSH_PORT ` + -L 3000:127.0.0.1:3000 ` + -L 3443:127.0.0.1:3443 ` +cd /MiniCPM-o + -L 32550:127.0.0.1:32550 ` + "$env:SSH_USER@$env:SSH_HOST" + } + else { +mkdir -p /deploy_pkg + ssh -N -p $env:SSH_PORT ` + -L 0.0.0.0:3443:127.0.0.1:3443 ` + "$env:SSH_USER@$env:SSH_HOST" + } +} +docker save -o /deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest +docker save -o /deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest +``` + +Usage example: + +```powershell +cp deploy/docker-compose.yml /deploy_pkg/ +cp deploy/nginx.docker.conf /deploy_pkg/ +# 1) Set dynamic SSH parameters +Set-MiniCPMSSH -Port "54062" -User "your_user" + +# 2) Local access only (open http://127.0.0.1:3000 in browser) +cd /deploy_pkg +Open-MiniCPMTunnel -Mode local + +# 3) Mobile access (same WiFi, use https://laptop_lan_ip:3443) +Open-MiniCPMTunnel -Mode mobile +``` +gzip -1 minicpmo-backend_latest.tar +gzip -1 minicpmo-frontend_latest.tar + +Optional: Auto-print mobile access URL + +```powershell +function Get-MiniCPMLanUrl { +cd /MiniCPM-o +bash deploy/gen_ssl_cert.sh /deploy_pkg/certs + param( + [int]$Port = 3443 + ) + +This will generate `server.crt` and `server.key` under `/deploy_pkg/certs/`. + $ipv4List = Get-NetIPAddress -AddressFamily IPv4 | + Where-Object { + $_.IPAddress -notlike '127.*' -and + $_.IPAddress -notlike '169.254.*' -and +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /deploy_pkg" + $_.PrefixOrigin -ne 'WellKnown' + } | + Sort-Object -Property InterfaceMetric + +scp -P $SSH_PORT -o ServerAliveInterval=60 \ + /deploy_pkg/minicpmo-backend_latest.tar.gz \ + /deploy_pkg/minicpmo-frontend_latest.tar.gz \ + /deploy_pkg/docker-compose.yml \ + /deploy_pkg/nginx.docker.conf \ + $SSH_USER@$SSH_HOST:/deploy_pkg/ + $url = "https://$ip`:$Port" + + Write-Host "[MiniCPM LAN URL] $url" + return $url +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /models" +} ``` -...全文同步中文版说明文档内容,逐段翻译,保持结构一致... +Usage example: +scp -P $SSH_PORT -r -o ServerAliveInterval=60 \ + /MiniCPM-o/models/MiniCPM-o-4_5 \ + $SSH_USER@$SSH_HOST:/models/ +# Start mobile mode tunnel first (run in another terminal window) +Open-MiniCPMTunnel -Mode mobile + +# Print mobile access URL in the current window + $SSH_USER@$SSH_HOST:/deploy_pkg/ +``` + +Optional: One-click mobile mode startup (open tunnel + check port + print URL) + +```powershell +function Start-MiniCPMMobile { +export MODEL_PATH=/models/MiniCPM-o-4_5 +export CERTS_PATH=./certs +export BACKEND_PORT=32550 + param( + [int]$Port = 3443 + ) + + -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \ + if (-not $env:SSH_HOST -or -not $env:SSH_PORT -or -not $env:SSH_USER) { + throw "Please run Set-MiniCPMSSH first to set SSH_HOST/SSH_PORT/SSH_USER" + } + + -v ${CERTS_PATH}:/etc/nginx/certs:ro \ + $sshCmd = "ssh -N -p $env:SSH_PORT -L 0.0.0.0:$Port`:127.0.0.1:$Port $env:SSH_USER@$env:SSH_HOST" + + # Open tunnel in a new window to avoid blocking the current terminal + $proc = Start-Process powershell -ArgumentList "-NoExit", "-Command", $sshCmd -PassThru + -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \ + $env:MINICPM_MOBILE_SSH_PID = [string]$proc.Id + $env:MINICPM_MOBILE_PORT = [string]$Port + Start-Sleep -Seconds 2 + + -v ${CERTS_PATH}:/etc/nginx/certs:ro \ + $listener = Get-NetTCPConnection -LocalPort $Port -State Listen -ErrorAction SilentlyContinue + if (-not $listener) { + Write-Warning "No listener detected on local port $Port. Please check whether SSH connected successfully." + return +cd /deploy_pkg + } + + $url = Get-MiniCPMLanUrl -Port $Port + Write-Host "[MiniCPM Mobile PID] $env:MINICPM_MOBILE_SSH_PID" + Write-Host "[MiniCPM Mobile Ready] Open on mobile browser: $url" +} +mkdir -p /runtime/certs +cp docker-compose.yml /runtime/ +cp certs/server.* /runtime/certs/ + +function Stop-MiniCPMMobile { + $pidText = $env:MINICPM_MOBILE_SSH_PID + +cd /runtime + if ($pidText) { + $pidValue = [int]$pidText + $proc = Get-Process -Id $pidValue -ErrorAction SilentlyContinue + if ($proc) { + Stop-Process -Id $pidValue -Force + Write-Host "[MiniCPM Mobile Stopped] Tunnel process stopped PID=$pidValue" +export MODEL_PATH=/models/MiniCPM-o-4_5 +export CERTS_PATH=./certs +export BACKEND_PORT=32550 + Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue + Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue + return + } +ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST + } + + $port = if ($env:MINICPM_MOBILE_PORT) { [int]$env:MINICPM_MOBILE_PORT } else { 3443 } + $listeners = Get-NetTCPConnection -LocalPort $port -State Listen -ErrorAction SilentlyContinue +ssh -N -p $env:SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 "$env:SSH_USER@$env:SSH_HOST" + if (-not $listeners) { + Write-Host "[MiniCPM Mobile] No listener detected on port $port. Nothing to stop." + return + } +ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST + + foreach ($item in $listeners) { + if ($item.OwningProcess -gt 0) { + try { +Open on phone browser: `https://:3443` + Stop-Process -Id $item.OwningProcess -Force -ErrorAction Stop + Write-Host "[MiniCPM Mobile Stopped] Stopped process listening on port $port PID=$($item.OwningProcess)" + } + catch { + Write-Warning "Failed to stop PID=$($item.OwningProcess): $($_.Exception.Message)" + } + } + } + + Remove-Item Env:MINICPM_MOBILE_SSH_PID -ErrorAction SilentlyContinue + Remove-Item Env:MINICPM_MOBILE_PORT -ErrorAction SilentlyContinue +} + +function Restart-MiniCPMMobile { + param( + [int]$Port = 3443 + ) + + Stop-MiniCPMMobile + Start-Sleep -Seconds 1 + Start-MiniCPMMobile -Port $Port +} +``` + +Usage example: + +```powershell +# 1) Set dynamic SSH parameters first (only change here when port changes) +Set-MiniCPMSSH -Port "54062" -User "your_user" + +# 2) One-click start mobile mode and output the accessible URL +Start-MiniCPMMobile + +# 3) One-click restart mobile mode after port change (optional) +Restart-MiniCPMMobile + +# 4) Stop mobile mode tunnel +Stop-MiniCPMMobile +``` --- -## Cloudflare Tunnel & SSH Tunnel for Public/Mobile Access +## 0. Directory and File Overview -### Cloudflare Tunnel +This guide uses the newly created deployment files in your repository: -Cloudflare Tunnel allows you to expose your local bot service to the public internet securely, bypassing company firewall restrictions. Install cloudflared and run: +- `deploy/Dockerfile.backend`: Backend inference service image (FastAPI + MiniCPM-o 4.5) +- `deploy/Dockerfile.frontend`: Frontend image (Vue build + Nginx) +- `deploy/nginx.docker.conf`: Nginx reverse proxy to backend container +- `deploy/docker-compose.yml`: Two-container orchestration (frontend + backend) +- `deploy/requirements.backend.txt`: Backend Python dependency list +- `deploy/gen_ssl_cert.sh`: Self-signed SSL certificate generation script (required for mobile HTTPS) + +--- + +## 1. Local (WSL2) Prerequisites + +Run in WSL2 Ubuntu: ```bash -cloudflared tunnel --url http://localhost:3000 +cd /mnt/d/JiuTian/codes/MiniCPM-o + +# 1) Check Docker +sudo docker --version +sudo docker compose version + +# 2) If your current user cannot use docker directly, you can temporarily use sudo docker +# Or add the user to the docker group (takes effect after re-login) +# sudo usermod -aG docker $USER ``` -You will get a public URL that can be accessed from any device, including your phone. +> Note: The local 1050Ti does not participate in inference. The local machine is only responsible for building images and does not require a local GPU. + +--- + +## 2. Download the Model Locally (for Upload to Intranet) -### SSH Tunnel for H100 Server +It is recommended to download the HuggingFace model locally (where internet is available), then package and upload it. -To access the bot running on the H100 server from your local PC or phone, use SSH port forwarding: +### 2.1 Install Download Tool ```bash -ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST +python3 -m pip install -U huggingface_hub ``` -- Local browser: http://127.0.0.1:3000 -- Local browser (HTTPS): https://127.0.0.1:3443 -- Backend health check: http://127.0.0.1:32550/api/v1/health +### 2.2 Download MiniCPM-o 4.5 -### Mobile Access via SSH Tunnel +```bash +mkdir -p /mnt/d/JiuTian/codes/MiniCPM-o/models +python3 - << 'PY' +from huggingface_hub import snapshot_download +snapshot_download( + repo_id='openbmb/MiniCPM-o-4_5', + local_dir='/mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5', + local_dir_use_symlinks=False, + resume_download=True +) +PY +``` -To allow your phone to access the bot via your laptop's WiFi IP: +After downloading, check the size and key files: -1. Open SSH tunnel binding all interfaces: +```bash +du -sh /mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5 +ls -lh /mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5 | head +``` - ```bash - ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST - ``` +--- -2. Find your laptop's LAN IP (e.g., 192.168.1.100): +## 3. Build Two Images in WSL2 - ```powershell - ipconfig | Select-String "IPv4" - ``` +Run from the repository root directory: -3. Allow port 3443 through Windows Firewall: +```bash +cd /mnt/d/JiuTian/codes/MiniCPM-o - ```powershell - New-NetFirewallRule -DisplayName "MiniCPMo HTTPS" -Direction Inbound -LocalPort 3443 -Protocol TCP -Action Allow - ``` +# Backend image +docker build -f deploy/Dockerfile.backend -t minicpmo-backend:latest . -4. On your phone (same WiFi), open: +# Frontend image +docker build -f deploy/Dockerfile.frontend -t minicpmo-frontend:latest . +``` + +Verify the images exist: + +```bash +docker images | grep minicpmo +``` + +--- + +## 4. Export Images + Generate SSL Certificate + +### 4.1 Export Images as tar + +```bash +mkdir -p /mnt/d/JiuTian/deploy_pkg + +docker save -o /mnt/d/JiuTian/deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest +docker save -o /mnt/d/JiuTian/deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest + +# Package compose and nginx config +cp deploy/docker-compose.yml /mnt/d/JiuTian/deploy_pkg/ +cp deploy/nginx.docker.conf /mnt/d/JiuTian/deploy_pkg/ +``` + +Optional: Compress to reduce transfer size + +```bash +cd /mnt/d/JiuTian/deploy_pkg +gzip -1 minicpmo-backend_latest.tar +gzip -1 minicpmo-frontend_latest.tar +``` + +### 4.2 Generate Self-Signed SSL Certificate (Required for Mobile HTTPS) + +```bash +cd /mnt/d/JiuTian/codes/MiniCPM-o +bash deploy/gen_ssl_cert.sh /mnt/d/JiuTian/deploy_pkg/certs +``` + +This will generate `server.crt` and `server.key` under `/mnt/d/JiuTian/deploy_pkg/certs/`. + +--- + +## 5. Upload to the Intranet Server + +You have already passed company intranet authentication, and the port may change dynamically. Please use the SSH variables defined above. + +### 5.1 Upload Image Packages and Config Files + +```bash +# First create the target directory on the server +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /data/minicpmo/deploy_pkg" + +# Upload image tar packages +scp -P $SSH_PORT -o ServerAliveInterval=60 \ + /mnt/d/JiuTian/deploy_pkg/minicpmo-backend_latest.tar.gz \ + /mnt/d/JiuTian/deploy_pkg/minicpmo-frontend_latest.tar.gz \ + /mnt/d/JiuTian/deploy_pkg/docker-compose.yml \ + /mnt/d/JiuTian/deploy_pkg/nginx.docker.conf \ + $SSH_USER@$SSH_HOST:/data/minicpmo/deploy_pkg/ +``` - ``` - https://192.168.1.100:3443 - ``` +### 5.2 Upload Model Weights -- Accept self-signed certificate warning. -- Allow camera/microphone permissions. +```bash +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /data/models" + +scp -P $SSH_PORT -r -o ServerAliveInterval=60 \ + /mnt/d/JiuTian/codes/MiniCPM-o/models/MiniCPM-o-4_5 \ + $SSH_USER@$SSH_HOST:/data/models/ +``` + +### 5.3 Upload SSL Certificate (Required for Mobile Access) + +```bash +scp -P $SSH_PORT -r /mnt/d/JiuTian/deploy_pkg/certs \ + $SSH_USER@$SSH_HOST:/data/minicpmo/deploy_pkg/ +``` + +> If the port changes, simply update the `SSH_PORT` variable and retry the command. --- -## Troubleshooting +## 6. H100 Server Preparation (One-Time) -- If frontend opens but cannot chat, check backend logs: - ```bash - docker logs --tail 200 minicpmo-backend - ``` -- If GPU is not visible in container: - ```bash - docker exec -it minicpmo-backend nvidia-smi - ``` -- If model loads slowly, check nvidia-smi and backend logs. +Log in to the server through the established tunnel: + +```bash +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST +``` + +Check the environment: + +```bash +# Confirm NVIDIA driver (already confirmed: 550.90.12, CUDA 12.4 ✓) +nvidia-smi + +# Check Docker +docker --version +docker compose version +``` + +### 6.1 Install NVIDIA Container Toolkit (If Not Installed) + +If `docker run --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi` fails, you need to install the toolkit. + +Restart Docker after installation: + +```bash +sudo systemctl restart docker +``` + +Verify again: + +```bash +docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi +``` --- -## One-Click Startup Commands +## 7. Load Images and Start Services on the H100 Server -### H100 Side (after upload) +Run on the server (after logging in via `ssh -p $SSH_PORT $SSH_USER@$SSH_HOST`): ```bash cd /data/minicpmo/deploy_pkg +# If the uploaded files are .tar.gz, decompress first +gunzip -f minicpmo-backend_latest.tar.gz || true +gunzip -f minicpmo-frontend_latest.tar.gz || true + +# Load images docker load -i minicpmo-backend_latest.tar docker load -i minicpmo-frontend_latest.tar +# Place runtime files mkdir -p /data/minicpmo/runtime/certs cp docker-compose.yml /data/minicpmo/runtime/ -cp certs/server.* /data/minicpmo/runtime/certs/ +cp certs/server.crt certs/server.key /data/minicpmo/runtime/certs/ cd /data/minicpmo/runtime +``` + +### 7.1 Set Model Path and Start + +`docker-compose.yml` uses the `MODEL_PATH` environment variable. You can export it directly: + +```bash export MODEL_PATH=/data/models/MiniCPM-o-4_5 export CERTS_PATH=./certs export BACKEND_PORT=32550 + +# Compatible with both Compose commands: docker compose / docker-compose if docker compose version >/dev/null 2>&1; then COMPOSE_CMD="docker compose" elif command -v docker-compose >/dev/null 2>&1; then COMPOSE_CMD="docker-compose" else - echo "Compose not found, please install docker-compose or docker compose plugin" && exit 1 + echo "Compose not found. Please install docker-compose or the docker compose plugin first." && exit 1 fi $COMPOSE_CMD -f docker-compose.yml up -d ``` -### Local PC (open tunnel) +If neither Compose option is available (`docker compose` / `docker-compose` both absent), you can start directly with `docker run`: ```bash -ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST +docker network create minicpmo-net || true +docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true + +docker run -d \ + --name minicpmo-backend \ + --restart unless-stopped \ + --gpus all \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -p ${BACKEND_PORT:-32550}:${BACKEND_PORT:-32550} \ + -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \ + --network minicpmo-net \ + minicpmo-backend:latest + +docker run -d \ + --name minicpmo-frontend \ + --restart unless-stopped \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -p 3000:3000 \ + -p 3443:3443 \ + -v ${CERTS_PATH}:/etc/nginx/certs:ro \ + --network minicpmo-net \ + minicpmo-frontend:latest ``` -### Mobile (via laptop relay) +If you encounter `Failed to Setup IP tables` or `No chain/target/match by that name`, you can bypass the bridge network and start with the `host` network instead: ```bash -ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST +docker rm -f minicpmo-backend minicpmo-frontend 2>/dev/null || true + +docker run -d \ + --name minicpmo-backend \ + --restart unless-stopped \ + --gpus all \ + --network host \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -v ${MODEL_PATH}:/models/MiniCPM-o-4_5:ro \ + minicpmo-backend:latest + +docker run -d \ + --name minicpmo-frontend \ + --restart unless-stopped \ + --network host \ + --add-host model-backend:127.0.0.1 \ + -e BACKEND_PORT=${BACKEND_PORT:-32550} \ + -v ${CERTS_PATH}:/etc/nginx/certs:ro \ + minicpmo-frontend:latest +``` + +Check status: + +```bash +if [ -z "$COMPOSE_CMD" ]; then + if docker compose version >/dev/null 2>&1; then + COMPOSE_CMD="docker compose" + else + COMPOSE_CMD="docker-compose" + fi +fi + +$COMPOSE_CMD -f docker-compose.yml ps +docker logs -f minicpmo-backend +``` + +If using the `docker run` approach, check status with: + +```bash +docker ps --filter name=minicpmo +docker logs -f minicpmo-backend +``` + +Health check: + +```bash +curl http://127.0.0.1:32550/api/v1/health +``` + +Expected response: + +```json +{"status":"OK"} ``` -Phone browser: https://:3443 +> The first model load may be slow (tens of seconds to a few minutes). Wait until the logs show model initialization complete before testing the frontend. --- -## For More Details +## 8. Local Computer Access (SSH Port Forwarding) + +You can already connect via SSH tunnel — just forward the service ports using the current port. + +Open a new terminal in local PowerShell or WSL: -See the Chinese deployment guide: DEPLOY_WSL2_TO_H100_ZH.md +```bash +ssh -N -p $SSH_PORT \ + -L 3000:127.0.0.1:3000 \ + -L 3443:127.0.0.1:3443 \ + -L 32550:127.0.0.1:32550 \ + $SSH_USER@$SSH_HOST +``` + +Keep this terminal connected. Then open in a local browser: + +- Frontend (HTTP): +- Frontend (HTTPS): (self-signed cert, click "Continue" to proceed) +- Backend health check: + +> The browser will request camera/microphone permissions — click Allow. When accessing via `localhost`, HTTP is sufficient to obtain camera permissions. --- -# MiniCPM-o 4.5 离线部署实战指南(WSL2 构建镜像 → 上传内网 H100 服务器 → 本地 + 手机访问) +## 9. Mobile Access (Full-Duplex Video Calling) + +### 9.1 Problem and Principle -> 目标:你在本地 Win10 + WSL2 构建 Docker 镜像,把镜像和模型传到无公网的公司 H100 服务器,启动服务后在本地浏览器和手机上测试全双工视频通话。 +Mobile browsers (Chrome/Safari) **must use HTTPS** to access the camera and microphone (`localhost` is an exception, but the phone is not localhost). -...existing content from DEPLOY_WSL2_TO_H100_ZH.md... +Solution: **Use the laptop as a relay** — Phone → Laptop WiFi LAN IP → SSH tunnel → Server. + +```text +Mobile browser ──WiFi──▶ Laptop:3443 ──SSH tunnel──▶ H100:3443 ──Nginx──▶ Backend:32550 + (HTTPS) (bound to 0.0.0.0) +``` + +### 9.2 Steps + +#### Step 1: Establish an SSH Tunnel with All-Interface Binding + +```bash +ssh -N -p $SSH_PORT \ + -L 0.0.0.0:3443:127.0.0.1:3443 \ + $SSH_USER@$SSH_HOST +``` + +> Key difference: `0.0.0.0:3443` makes all network interfaces on the laptop listen on port 3443, allowing phones on the same WiFi to connect. + +#### Step 2: Find the Laptop's LAN IP + +Run in PowerShell: + +```powershell +ipconfig | Select-String "IPv4" +``` + +Assume the result is `192.168.1.100`. + +#### Step 3: Allow Port Through Windows Firewall + +Run in PowerShell (as Administrator): + +```powershell +New-NetFirewallRule -DisplayName "MiniCPMo HTTPS" -Direction Inbound -LocalPort 3443 -Protocol TCP -Action Allow +``` + +#### Step 4: Access from Mobile Browser + +Make sure the phone and laptop are on the same WiFi, then enter in the mobile browser: + +```text +https://192.168.1.100:3443 +``` + +- **First visit** will show an "unsafe connection" warning (self-signed cert) — tap **"Advanced" → "Continue"** +- The browser will then request camera/microphone permissions — tap **Allow** +- Enter the video call page and start a full-duplex conversation + +### 9.3 iOS Safari Notes + +iOS Safari is stricter with self-signed certificates. If the above bypass doesn't work: + +1. Open `https://192.168.1.100:3443/certs/server.crt` in Safari on the phone (if you configured a cert download path), download and install the certificate +2. Or send `server.crt` to the phone via AirDrop / WeChat, then go to **Settings → General → Profile → Install** +3. Then go to **Settings → General → About → Certificate Trust Settings → Enable Full Trust** + +After that, Safari can access `https://192.168.1.100:3443` normally. --- -如需英文版或特殊格式说明,请参考本文件或联系维护者。 +## 10. Common Issues and Troubleshooting + +### 10.1 Frontend Opens, but Cannot Start a Conversation + +Check backend logs: + +```bash +docker logs --tail 200 minicpmo-backend +``` + +Key things to look for: + +- Whether the model path exists: `/models/MiniCPM-o-4_5` +- Whether VRAM is sufficient (H100 usually has enough) +- Whether `trust_remote_code` or dependency version errors appear + +### 10.2 GPU Not Visible Inside Container + +```bash +docker exec -it minicpmo-backend nvidia-smi +``` + +If it fails, check the NVIDIA Container Toolkit and Docker daemon configuration first. + +### 10.3 WebSocket / SSE Anomalies + +This project has already disabled buffering and configured WebSocket upgrade in `nginx.docker.conf`. +If issues persist, check whether the company's intranet gateway is blocking long-lived connections. + +### 10.4 Model Startup Is Too Slow + +The first startup may be slow; subsequent starts will be much faster. Check with: + +```bash +nvidia-smi +docker logs -f minicpmo-backend +``` + +--- + +## 11. Optional Optimizations for Next Steps + +1. Switch the backend image to "offline wheel installation mode" to completely eliminate the need for pip internet access on the server. +2. Use a private image registry (Harbor) instead of tar package transfers. +3. Use systemd or cron for automatic container restart and log rotation. +4. Replace the self-signed certificate with one issued by an enterprise CA to eliminate manual trust on mobile devices. + +--- + +## 12. One-Click Command Quick Reference + +### H100 Side (Assuming Files Are Already Uploaded) + +```bash +cd /data/minicpmo/deploy_pkg + +docker load -i minicpmo-backend_latest.tar +docker load -i minicpmo-frontend_latest.tar + +mkdir -p /data/minicpmo/runtime/certs +cp docker-compose.yml /data/minicpmo/runtime/ +cp certs/server.* /data/minicpmo/runtime/certs/ + +cd /data/minicpmo/runtime +export MODEL_PATH=/data/models/MiniCPM-o-4_5 +export CERTS_PATH=./certs +export BACKEND_PORT=32550 +if docker compose version >/dev/null 2>&1; then + COMPOSE_CMD="docker compose" +elif command -v docker-compose >/dev/null 2>&1; then + COMPOSE_CMD="docker-compose" +else + echo "Compose not found. Please install docker-compose or the docker compose plugin first." && exit 1 +fi + +$COMPOSE_CMD -f docker-compose.yml up -d +``` + +### Local Computer (Open Tunnel) + +```bash +ssh -N -p $SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 $SSH_USER@$SSH_HOST +``` + +PowerShell version: + +```powershell +ssh -N -p $env:SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:127.0.0.1:32550 "$env:SSH_USER@$env:SSH_HOST" +``` + +Open on local computer: + +### Mobile (Relayed Through Laptop) + +```bash +# Bind all interfaces on the laptop +ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST +``` + +Open on mobile browser: `https://:3443` \ No newline at end of file diff --git a/deploy/DEPLOY_WSL2_TO_H100_ZH.md b/deploy/DEPLOY_WSL2_TO_H100_ZH.md index 967cf81a..54bf01fc 100644 --- a/deploy/DEPLOY_WSL2_TO_H100_ZH.md +++ b/deploy/DEPLOY_WSL2_TO_H100_ZH.md @@ -1,6 +1,6 @@ # MiniCPM-o 4.5 离线部署实战指南(WSL2 构建镜像 → 上传内网 H100 服务器 → 本地 + 手机访问) -> 目标:你在本地 Win10 + WSL2 构建 Docker 镜像,把镜像和模型传到无公网的公司 H100 服务器,启动服务后在本地浏览器和手机上测试全双工视频通话。 +> 目标:你在本地 Windows系统PC + WSL2 构建 Docker 镜像,把镜像和模型传到无公网的公司 H100 服务器,启动服务后在本地浏览器和Android系统手机上测试全双工视频通话。 **你的环境速查:** @@ -14,24 +14,24 @@ **每次执行前先设置 SSH 变量(只改这里即可):** ```bash -export SSH_HOST=127.0.0.1 -export SSH_PORT=54062 -export SSH_USER=your_user +export SSH_HOST= +export SSH_PORT= +export SSH_USER= ``` PowerShell 等价写法(Windows 终端直接用): ```powershell -$env:SSH_HOST = "127.0.0.1" -$env:SSH_PORT = "54062" -$env:SSH_USER = "your_user" +$env:SSH_HOST = "" +$env:SSH_PORT = "" +$env:SSH_USER = "" ``` ## PowerShell 日常三命令速查(推荐) ```powershell # 1) 端口变化时先更新 SSH 参数 -Set-MiniCPMSSH -Port "54062" -User "your_user" +Set-MiniCPMSSH -Port "" -User "" # 2) 启动手机模式(开隧道 + 打印可访问 URL) Start-MiniCPMMobile @@ -43,7 +43,7 @@ Stop-MiniCPMMobile 端口变化后的快速恢复: ```powershell -Set-MiniCPMSSH -Port "新端口" -User "your_user" +Set-MiniCPMSSH -Port "" -User "" Restart-MiniCPMMobile ``` @@ -51,7 +51,7 @@ PowerShell 中引用变量时,`ssh/scp` 建议写成: ```powershell ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST" -scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:/data/minicpmo/deploy_pkg/" +scp -P $env:SSH_PORT .\file.tar.gz "$env:SSH_USER@$env:SSH_HOST:/deploy_pkg/" ``` 可选:定义一个一键函数(以后只改端口即可) @@ -61,8 +61,8 @@ function Set-MiniCPMSSH { param( [Parameter(Mandatory = $true)] [string]$Port, - [string]$Host = "127.0.0.1", - [string]$User = "your_user" + [string]$Host = "", + [string]$User = "" ) $env:SSH_HOST = $Host @@ -76,7 +76,7 @@ function Set-MiniCPMSSH { 使用示例: ```powershell -Set-MiniCPMSSH -Port "54062" -User "your_user" +Set-MiniCPMSSH -Port "" -User "" ssh -p $env:SSH_PORT "$env:SSH_USER@$env:SSH_HOST" ``` @@ -275,7 +275,7 @@ Stop-MiniCPMMobile 在 WSL2 Ubuntu 执行: ```bash -cd /mnt/d/九天/codes/MiniCPM-o +cd /MiniCPM-o # 1) 检查 Docker sudo docker --version @@ -303,12 +303,12 @@ python3 -m pip install -U huggingface_hub ### 2.2 下载 MiniCPM-o 4.5 ```bash -mkdir -p /mnt/d/九天/codes/MiniCPM-o/models +mkdir -p /MiniCPM-o/models python3 - << 'PY' from huggingface_hub import snapshot_download snapshot_download( repo_id='openbmb/MiniCPM-o-4_5', - local_dir='/mnt/d/九天/codes/MiniCPM-o/models/MiniCPM-o-4_5', + local_dir='/MiniCPM-o/models/MiniCPM-o-4_5', local_dir_use_symlinks=False, resume_download=True ) @@ -318,8 +318,8 @@ PY 下载后检查体积和关键文件: ```bash -du -sh /mnt/d/九天/codes/MiniCPM-o/models/MiniCPM-o-4_5 -ls -lh /mnt/d/九天/codes/MiniCPM-o/models/MiniCPM-o-4_5 | head +du -sh /MiniCPM-o/models/MiniCPM-o-4_5 +ls -lh /MiniCPM-o/models/MiniCPM-o-4_5 | head ``` --- @@ -329,7 +329,7 @@ ls -lh /mnt/d/九天/codes/MiniCPM-o/models/MiniCPM-o-4_5 | head 在仓库根目录执行: ```bash -cd /mnt/d/九天/codes/MiniCPM-o +cd /MiniCPM-o # 后端镜像 docker build -f deploy/Dockerfile.backend -t minicpmo-backend:latest . @@ -351,20 +351,20 @@ docker images | grep minicpmo ### 4.1 导出镜像为 tar ```bash -mkdir -p /mnt/d/九天/deploy_pkg +mkdir -p /deploy_pkg -docker save -o /mnt/d/九天/deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest -docker save -o /mnt/d/九天/deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest +docker save -o /deploy_pkg/minicpmo-backend_latest.tar minicpmo-backend:latest +docker save -o /deploy_pkg/minicpmo-frontend_latest.tar minicpmo-frontend:latest # 打包 compose 与 nginx 配置 -cp deploy/docker-compose.yml /mnt/d/九天/deploy_pkg/ -cp deploy/nginx.docker.conf /mnt/d/九天/deploy_pkg/ +cp deploy/docker-compose.yml /deploy_pkg/ +cp deploy/nginx.docker.conf /deploy_pkg/ ``` 可选:压缩减少传输体积 ```bash -cd /mnt/d/九天/deploy_pkg +cd /deploy_pkg gzip -1 minicpmo-backend_latest.tar gzip -1 minicpmo-frontend_latest.tar ``` @@ -372,11 +372,11 @@ gzip -1 minicpmo-frontend_latest.tar ### 4.2 生成自签名 SSL 证书(手机端 HTTPS 必需) ```bash -cd /mnt/d/九天/codes/MiniCPM-o -bash deploy/gen_ssl_cert.sh /mnt/d/九天/deploy_pkg/certs +cd /MiniCPM-o +bash deploy/gen_ssl_cert.sh /deploy_pkg/certs ``` -这会在 `/mnt/d/九天/deploy_pkg/certs/` 下生成 `server.crt` 和 `server.key`。 +这会在 `/deploy_pkg/certs/` 下生成 `server.crt` 和 `server.key`。 --- @@ -388,32 +388,32 @@ bash deploy/gen_ssl_cert.sh /mnt/d/九天/deploy_pkg/certs ```bash # 先在服务器上创建目标目录 -ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /data/minicpmo/deploy_pkg" +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /deploy_pkg" # 上传镜像 tar 包 scp -P $SSH_PORT -o ServerAliveInterval=60 \ - /mnt/d/九天/deploy_pkg/minicpmo-backend_latest.tar.gz \ - /mnt/d/九天/deploy_pkg/minicpmo-frontend_latest.tar.gz \ - /mnt/d/九天/deploy_pkg/docker-compose.yml \ - /mnt/d/九天/deploy_pkg/nginx.docker.conf \ - $SSH_USER@$SSH_HOST:/data/minicpmo/deploy_pkg/ + /deploy_pkg/minicpmo-backend_latest.tar.gz \ + /deploy_pkg/minicpmo-frontend_latest.tar.gz \ + /deploy_pkg/docker-compose.yml \ + /deploy_pkg/nginx.docker.conf \ + $SSH_USER@$SSH_HOST:/deploy_pkg/ ``` ### 5.2 上传模型权重 ```bash -ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /data/models" +ssh -p $SSH_PORT $SSH_USER@$SSH_HOST "mkdir -p /models" scp -P $SSH_PORT -r -o ServerAliveInterval=60 \ - /mnt/d/九天/codes/MiniCPM-o/models/MiniCPM-o-4_5 \ - $SSH_USER@$SSH_HOST:/data/models/ + /MiniCPM-o/models/MiniCPM-o-4_5 \ + $SSH_USER@$SSH_HOST:/models/ ``` ### 5.3 上传 SSL 证书(手机端访问需要) ```bash -scp -P $SSH_PORT -r /mnt/d/九天/deploy_pkg/certs \ - $SSH_USER@$SSH_HOST:/data/minicpmo/deploy_pkg/ +scp -P $SSH_PORT -r /deploy_pkg/certs \ + $SSH_USER@$SSH_HOST:/deploy_pkg/ ``` > 如果端口变更,只需要修改 `SSH_PORT` 变量并重试命令。 @@ -485,7 +485,7 @@ cd /data/minicpmo/runtime `docker-compose.yml` 里用了 `MODEL_PATH` 环境变量。你可以直接导出: ```bash -export MODEL_PATH=/data/models/MiniCPM-o-4_5 +export MODEL_PATH=/models/MiniCPM-o-4_5 export CERTS_PATH=./certs export BACKEND_PORT=32550 @@ -735,17 +735,17 @@ docker logs -f minicpmo-backend ### H100 侧(假设文件已上传) ```bash -cd /data/minicpmo/deploy_pkg +cd /deploy_pkg docker load -i minicpmo-backend_latest.tar docker load -i minicpmo-frontend_latest.tar -mkdir -p /data/minicpmo/runtime/certs -cp docker-compose.yml /data/minicpmo/runtime/ -cp certs/server.* /data/minicpmo/runtime/certs/ +mkdir -p /runtime/certs +cp docker-compose.yml /runtime/ +cp certs/server.* /runtime/certs/ -cd /data/minicpmo/runtime -export MODEL_PATH=/data/models/MiniCPM-o-4_5 +cd /runtime +export MODEL_PATH=/models/MiniCPM-o-4_5 export CERTS_PATH=./certs export BACKEND_PORT=32550 if docker compose version >/dev/null 2>&1; then @@ -780,4 +780,4 @@ ssh -N -p $env:SSH_PORT -L 3000:127.0.0.1:3000 -L 3443:127.0.0.1:3443 -L 32550:1 ssh -N -p $SSH_PORT -L 0.0.0.0:3443:127.0.0.1:3443 $SSH_USER@$SSH_HOST ``` -手机浏览器打开:`https://笔记本局域IP:3443` +手机浏览器打开:`https://:3443` diff --git a/deploy/Dockerfile.backend b/deploy/Dockerfile.backend index 58275d92..b4291181 100644 --- a/deploy/Dockerfile.backend +++ b/deploy/Dockerfile.backend @@ -1,14 +1,14 @@ # ============================================ -# MiniCPM-o 4.5 后端推理服务 Dockerfile -# 基础镜像: NVIDIA CUDA 12.8 + Ubuntu 22.04 +# MiniCPM-o 4.5 Backend Inference Service Dockerfile +# Base image: NVIDIA CUDA 12.8 + Ubuntu 22.04 # ============================================ FROM nvidia/cuda:12.8.1-devel-ubuntu22.04 -# 避免交互式提示 +# Avoid interactive prompts ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 -# ---- 系统依赖 ---- +# ---- System dependencies ---- RUN apt-get update && apt-get install -y \ python3.10 \ python3.10-dev \ @@ -21,7 +21,7 @@ RUN apt-get update && apt-get install -y \ curl \ && rm -rf /var/lib/apt/lists/* -# 设置 python3.10 为默认 +# Set python3.10 as default RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \ ln -sf /usr/bin/python3 /usr/bin/python && \ python3 -m pip install --upgrade pip setuptools wheel @@ -32,7 +32,7 @@ RUN pip install --no-cache-dir \ "torchaudio<=2.8.0" \ --index-url https://download.pytorch.org/whl/cu124 -# ---- MiniCPM-o 核心依赖 ---- +# ---- MiniCPM-o core dependencies ---- RUN pip install --no-cache-dir \ "transformers==4.51.0" \ accelerate \ @@ -44,28 +44,28 @@ RUN pip install --no-cache-dir \ Pillow \ numpy -# ---- Web 服务依赖 ---- +# ---- Web service dependencies ---- RUN pip install --no-cache-dir \ fastapi \ uvicorn \ aiofiles \ pydantic -# ---- 工作目录 ---- +# ---- Working directory ---- WORKDIR /app -# ---- 复制后端代码 ---- +# ---- Copy backend code ---- COPY web_demos/minicpm-o_2.6/model_server.py /app/ COPY web_demos/minicpm-o_2.6/vad_utils.py /app/ COPY web_demos/minicpm-o_2.6/silero_vad.onnx /app/ -# ---- 复制 TTS 参考音频 ---- +# ---- Copy TTS reference audios ---- COPY assets/ref_audios/ /app/assets/ref_audios/ -# ---- 暴露端口 ---- +# ---- Expose port ---- EXPOSE 32550 -# ---- 启动命令 ---- -# 模型路径通过 volume 挂载到 /models/MiniCPM-o-4_5 +# ---- Startup command ---- +# Model path is mounted to /models/MiniCPM-o-4_5 via volume ENV BACKEND_PORT=32550 CMD ["sh", "-lc", "python3 model_server.py --model /models/MiniCPM-o-4_5 --port ${BACKEND_PORT}"] diff --git a/deploy/Dockerfile.frontend b/deploy/Dockerfile.frontend index 9574ca4d..5fc22e8f 100644 --- a/deploy/Dockerfile.frontend +++ b/deploy/Dockerfile.frontend @@ -1,34 +1,34 @@ # ============================================ -# MiniCPM-o 4.5 前端 Web 服务 Dockerfile -# 多阶段构建: Node.js 构建 + Nginx 部署 +# MiniCPM-o 4.5 Frontend Web Service Dockerfile +# Multi-stage build: Node.js build + Nginx deployment # ============================================ -# ---- 第一阶段:构建 Vue 项目 ---- +# ---- Stage 1: Build Vue project ---- FROM node:20-alpine AS build-stage WORKDIR /build COPY web_demos/minicpm-o_2.6/web_server/ /build/ -# 安装 pnpm 并构建 -# 生成占位证书文件(vite.config.js 的 server.https 在 build 时也会被解析) +# Install pnpm and build +# Generate placeholder certificate files (vite.config.js server.https is also parsed during build) RUN npm install -g pnpm && \ touch key.pem cert.pem && \ pnpm install && \ pnpm run build -# ---- 第二阶段:Nginx 静态服务 ---- +# ---- Stage 2: Nginx static service ---- FROM nginx:alpine AS production-stage -# envsubst 用于在容器启动时渲染 nginx 配置模板 +# envsubst is used to render nginx config template at container startup RUN apk add --no-cache gettext -# 复制构建产物 +# Copy build artifacts COPY --from=build-stage /build/dist /usr/share/nginx/html -# 复制自定义 nginx 配置模板(Docker 网络版本) +# Copy custom nginx config template (Docker network version) COPY deploy/nginx.docker.conf /etc/nginx/nginx.conf.template -# 启动时按 BACKEND_PORT 渲染 nginx 配置 +# Render nginx config with BACKEND_PORT at startup ENV BACKEND_PORT=32550 EXPOSE 3000 3443 diff --git a/deploy/docker-compose.yml b/deploy/docker-compose.yml index d1af8c36..60141e28 100644 --- a/deploy/docker-compose.yml +++ b/deploy/docker-compose.yml @@ -1,16 +1,16 @@ # ============================================ -# MiniCPM-o 4.5 Docker Compose 部署配置 +# MiniCPM-o 4.5 Docker Compose Deployment Configuration # ============================================ -# 使用方式: +# Usage: # docker compose -f deploy/docker-compose.yml up -d # -# 前提条件: -# 1. 服务器已安装 NVIDIA Container Toolkit -# 2. 模型权重已放置在 ${MODEL_PATH} 目录下 +# Prerequisites: +# 1. NVIDIA Container Toolkit is installed on the server +# 2. Model weights are placed in the ${MODEL_PATH} directory # ============================================ services: - # ---- 后端推理服务(GPU)---- + # ---- Backend Inference Service (GPU) ---- model-backend: image: minicpmo-backend:latest container_name: minicpmo-backend @@ -23,34 +23,34 @@ services: count: 1 capabilities: [gpu] volumes: - # 挂载模型权重目录(宿主机路径 → 容器路径) - - ${MODEL_PATH:-/jtkl-hdd1-storage-1/60b9066159fc4bb783278ce0d226ceb2/data/jinlujia/codes/MiniCPM-o/models/MiniCPM-o-4_5}:/models/MiniCPM-o-4_5:ro + # Mount model weights directory (host path → container path) + - ${MODEL_PATH:-}:/models/MiniCPM-o-4_5:ro environment: - BACKEND_PORT=${BACKEND_PORT:-32550} ports: - "${BACKEND_PORT:-32550}:${BACKEND_PORT:-32550}" - # 注意:BACKEND_PORT 为应用监听端口(默认 32550), - # 与外部 SSH 临时隧道端口不同。 + # Note: BACKEND_PORT is the application listening port (default 32550), + # which is different from the external SSH temporary tunnel port. healthcheck: test: ["CMD-SHELL", "curl -f http://localhost:${BACKEND_PORT:-32550}/api/v1/health || exit 1"] interval: 30s timeout: 10s retries: 5 - start_period: 120s # 模型加载需要较长时间 + start_period: 120s # Model loading may take a long time networks: - minicpmo-net - # ---- 前端 Web 服务(Nginx)---- + # ---- Frontend Web Service (Nginx) ---- web-frontend: image: minicpmo-frontend:latest container_name: minicpmo-frontend restart: unless-stopped ports: - "3000:3000" - - "3443:3443" # HTTPS(手机端访问) + - "3443:3443" # HTTPS (for mobile access) volumes: - # 挂载 SSL 证书目录 - - ${CERTS_PATH:-/jtkl-hdd1-storage-1/60b9066159fc4bb783278ce0d226ceb2/data/jinlujia/codes/MiniCPM-o/runtime/certs}:/etc/nginx/certs:ro + # Mount SSL certificate directory + - ${CERTS_PATH:-}:/etc/nginx/certs:ro environment: - BACKEND_PORT=${BACKEND_PORT:-32550} depends_on: diff --git a/deploy/gen_ssl_cert.sh b/deploy/gen_ssl_cert.sh index 2b45f2dc..ac2e5d3b 100644 --- a/deploy/gen_ssl_cert.sh +++ b/deploy/gen_ssl_cert.sh @@ -1,23 +1,23 @@ #!/bin/bash # ============================================ -# 生成自签名 SSL 证书(供 Nginx HTTPS + 手机端访问) -# 用法: bash deploy/gen_ssl_cert.sh [输出目录] +# Generate self-signed SSL certificate (for Nginx HTTPS + mobile access) +# Usage: bash deploy/gen_ssl_cert.sh [output directory] # ============================================ set -e -OUT_DIR="${1:-deploy/certs}" +OUT_DIR="${1:-}" mkdir -p "$OUT_DIR" -echo ">>> 生成自签名 SSL 证书到 $OUT_DIR ..." +echo ">>> Generating self-signed SSL certificate to $OUT_DIR ..." openssl req -x509 -nodes -days 3650 \ -newkey rsa:2048 \ -keyout "$OUT_DIR/server.key" \ -out "$OUT_DIR/server.crt" \ - -subj "/C=CN/ST=Local/L=Local/O=MiniCPMo/OU=Dev/CN=minicpmo-local" \ - -addext "subjectAltName=IP:127.0.0.1,IP:0.0.0.0,DNS:localhost" + -subj "/C=CN/ST=Local/L=Local/O=MiniCPMo/OU=Dev/CN=" \ + -addext "subjectAltName=IP:,IP:,DNS:" -echo ">>> 证书已生成:" +echo ">>> Certificate generated:" ls -lh "$OUT_DIR"/server.* echo "" -echo ">>> 提示: 将 $OUT_DIR 整个目录上传到服务器后," -echo " 在 docker-compose.yml 旁创建 certs/ 目录并放入 server.crt + server.key" +echo ">>> Tip: After uploading the entire $OUT_DIR directory to the server," +echo " create a certs/ directory next to docker-compose.yml and put server.crt + server.key inside" diff --git a/deploy/nginx.docker.conf b/deploy/nginx.docker.conf index ae733b12..ddeec491 100644 --- a/deploy/nginx.docker.conf +++ b/deploy/nginx.docker.conf @@ -7,7 +7,7 @@ events { } http { - # ---- 基本设置 ---- + # ---- Basic Settings ---- client_max_body_size 20M; sendfile on; tcp_nopush on; @@ -15,26 +15,26 @@ http { keepalive_timeout 65; types_hash_max_size 2048; - include /etc/nginx/mime.types; + include ; default_type application/octet-stream; - # ---- 日志 ---- - access_log /var/log/nginx/access.log; - error_log /var/log/nginx/error.log; + # ---- Logs ---- + access_log /access.log; + error_log /error.log; - # ---- Gzip 压缩 ---- + # ---- Gzip Compression ---- gzip on; - # ---- 虚拟主机 (HTTP, 本地电脑访问) ---- + # ---- Virtual Host (HTTP, Local Access) ---- server { listen 3000; - server_name localhost; + server_name ; add_header Access-Control-Allow-Origin *; add_header Access-Control-Allow-Headers X-Requested-With; add_header Access-Control-Allow-Methods GET,POST,OPTIONS; - # 后端 API 请求 → 转发到后端容器(Docker 服务名: model-backend) + # Backend API requests → Forward to backend container (Docker service name: model-backend) location /api/v1 { proxy_pass http://model-backend:${BACKEND_PORT}; proxy_set_header Host $host; @@ -43,14 +43,14 @@ http { proxy_set_header X-Accel-Buffering off; add_header X-Accel-Buffering off; proxy_http_version 1.1; - # 关闭缓存(SSE 流式响应必需) + # Disable buffering (required for SSE streaming responses) proxy_buffering off; proxy_cache off; sendfile off; tcp_nodelay on; } - # WebSocket 请求 → 转发到后端容器 + # WebSocket requests → Forward to backend container location /ws { proxy_pass http://model-backend:${BACKEND_PORT}; proxy_http_version 1.1; @@ -60,7 +60,7 @@ http { proxy_cache_bypass $http_upgrade; } - # 前端静态文件 + # Frontend static files location / { root /usr/share/nginx/html; index index.html index.htm; @@ -72,13 +72,13 @@ http { } } - # ---- 虚拟主机 (HTTPS, 手机端访问) ---- + # ---- Virtual Host (HTTPS, Mobile Access) ---- server { listen 3443 ssl; server_name localhost; - ssl_certificate /etc/nginx/certs/server.crt; - ssl_certificate_key /etc/nginx/certs/server.key; + ssl_certificate /server.crt; + ssl_certificate_key /server.key; ssl_protocols TLSv1.2 TLSv1.3; ssl_ciphers HIGH:!aNULL:!MD5; diff --git a/deploy/requirements.backend.txt b/deploy/requirements.backend.txt index 14f253e6..60648e1b 100644 --- a/deploy/requirements.backend.txt +++ b/deploy/requirements.backend.txt @@ -1,27 +1,27 @@ # ============================================ -# MiniCPM-o 4.5 后端 Python 依赖清单 -# 用于离线环境 pip download / pip install +# MiniCPM-o 4.5 Backend Python Requirements +# For offline environments: pip download / pip install # ============================================ # == PyTorch (CUDA 12.4) == -# 注意: PyTorch 需单独从 https://download.pytorch.org/whl/cu124 下载 +# Note: PyTorch should be downloaded separately from https://download.pytorch.org/whl/cu124 # torch>=2.3.0,<=2.8.0 # torchaudio<=2.8.0 -# == 核心模型依赖 == +# == Core Model Dependencies == transformers==4.51.0 accelerate minicpmo-utils[all]>=1.0.5 sentencepiece -# == 音视频处理 == +# == Audio/Video Processing == librosa soundfile onnxruntime Pillow numpy -# == Web 服务 == +# == Web Service == fastapi uvicorn[standard] aiofiles From 8d5c2d29c7e38342f1516c72d87609cec08b49d5 Mon Sep 17 00:00:00 2001 From: LujiaJin Date: Mon, 2 Mar 2026 19:00:03 +0800 Subject: [PATCH 06/10] docs: add deepwiki documentation tip to README --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index d4d9c48e..9556bfcd 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,12 @@ **A Gemini 2.5 Flash Level MLLM for Vision, Speech, and Full-Duplex Multimodal Live Streaming on Your Phone** + + +💡 **Tip**: If you find this repository's structure or content difficult to understand, visit [deepwiki](https://deepwiki.com/LujiaJin/MiniCPM-o) for a comprehensive detailed explanation. + +
+ [中文](./README_zh.md) | English From 68315b78af3b192f27f7ec61b6b7d3eade4f9fe4 Mon Sep 17 00:00:00 2001 From: LujiaJin Date: Mon, 2 Mar 2026 19:11:32 +0800 Subject: [PATCH 07/10] docs: add deepwiki documentation tip to README_zh.md --- README_zh.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README_zh.md b/README_zh.md index f9a8d1c1..255631a4 100644 --- a/README_zh.md +++ b/README_zh.md @@ -4,6 +4,12 @@ **端侧可用的 Gemini 2.5 Flash 级视觉、语音、全双工多模态实时流式大模型** +
+ +💡 **提示**: 如果你觉得这个仓库的结构或内容难以理解,请访问 [deepwiki](https://deepwiki.com/LujiaJin/MiniCPM-o) 获取详细的解读。 + +
+ 中文 | [English](./README.md) From 7110663814027ccfa5699146334ab13c4b53c408 Mon Sep 17 00:00:00 2001 From: Lujia Jin Date: Tue, 3 Mar 2026 10:42:34 +0800 Subject: [PATCH 08/10] Update deploy/nginx.docker.conf Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- deploy/nginx.docker.conf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/nginx.docker.conf b/deploy/nginx.docker.conf index ddeec491..fdf287ce 100644 --- a/deploy/nginx.docker.conf +++ b/deploy/nginx.docker.conf @@ -15,12 +15,12 @@ http { keepalive_timeout 65; types_hash_max_size 2048; - include ; + include /etc/nginx/mime.types; default_type application/octet-stream; # ---- Logs ---- - access_log /access.log; - error_log /error.log; + access_log /var/log/nginx/access.log; + error_log /var/log/nginx/error.log; # ---- Gzip Compression ---- gzip on; From a519b8592c1ff778edf4b47375820fb41678997c Mon Sep 17 00:00:00 2001 From: Lujia Jin Date: Tue, 3 Mar 2026 10:42:47 +0800 Subject: [PATCH 09/10] Update deploy/nginx.docker.conf Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- deploy/nginx.docker.conf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/nginx.docker.conf b/deploy/nginx.docker.conf index fdf287ce..e2488a56 100644 --- a/deploy/nginx.docker.conf +++ b/deploy/nginx.docker.conf @@ -77,8 +77,8 @@ http { listen 3443 ssl; server_name localhost; - ssl_certificate /server.crt; - ssl_certificate_key /server.key; + ssl_certificate /etc/nginx/certs/server.crt; + ssl_certificate_key /etc/nginx/certs/server.key; ssl_protocols TLSv1.2 TLSv1.3; ssl_ciphers HIGH:!aNULL:!MD5; From b91648d55272968803f956e67c8d35df0ce80363 Mon Sep 17 00:00:00 2001 From: Lujia Jin Date: Tue, 3 Mar 2026 10:43:00 +0800 Subject: [PATCH 10/10] Update deploy/nginx.docker.conf Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- deploy/nginx.docker.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/nginx.docker.conf b/deploy/nginx.docker.conf index e2488a56..a5832d9c 100644 --- a/deploy/nginx.docker.conf +++ b/deploy/nginx.docker.conf @@ -28,7 +28,7 @@ http { # ---- Virtual Host (HTTP, Local Access) ---- server { listen 3000; - server_name ; + server_name _; add_header Access-Control-Allow-Origin *; add_header Access-Control-Allow-Headers X-Requested-With;