From 234c38ddd820ecddb4838f0d69579a31d67254c6 Mon Sep 17 00:00:00 2001 From: PCBZ Date: Sun, 19 Apr 2026 12:32:16 -0700 Subject: [PATCH 1/3] Add Azure ACI deployment support for OpenClaw - Create README.md for Azure ACI deployment instructions - Add docker-compose.yml for local testing with OpenClaw - Implement auto-stop Azure Function to manage idle ACI - Create webhook Azure Function to handle incoming requests and manage ACI startup - Define Terraform configuration for Azure resources including storage, container group, and function app - Add outputs for resource information after deployment - Include example terraform.tfvars for user configuration - Define necessary variables for Azure deployment in variables.tf --- terraform/azure/.dockerignore | 15 + terraform/azure/.envrc | 16 + terraform/azure/FUNCTION-DEPLOYMENT.md | 342 ++++++++++++++++++ terraform/azure/README.md | 251 +++++++++++++ terraform/azure/docker-compose.yml | 66 ++++ .../azure/function/auto-stop/auto_stop.py | 101 ++++++ .../azure/function/auto-stop/function.json | 11 + terraform/azure/function/host.json | 19 + terraform/azure/function/local.settings.json | 28 ++ terraform/azure/function/requirements.txt | 4 + .../azure/function/webhook/function.json | 18 + terraform/azure/function/webhook/webhook.py | 134 +++++++ terraform/azure/main.tf | 249 +++++++++++++ terraform/azure/outputs.tf | 75 ++++ terraform/azure/terraform.tfvars.example | 43 +++ terraform/azure/variables.tf | 148 ++++++++ 16 files changed, 1520 insertions(+) create mode 100644 terraform/azure/.dockerignore create mode 100644 terraform/azure/.envrc create mode 100644 terraform/azure/FUNCTION-DEPLOYMENT.md create mode 100644 terraform/azure/README.md create mode 100644 terraform/azure/docker-compose.yml create mode 100644 terraform/azure/function/auto-stop/auto_stop.py create mode 100644 terraform/azure/function/auto-stop/function.json create mode 100644 terraform/azure/function/host.json create mode 100644 terraform/azure/function/local.settings.json create mode 100644 terraform/azure/function/requirements.txt create mode 100644 terraform/azure/function/webhook/function.json create mode 100644 terraform/azure/function/webhook/webhook.py create mode 100644 terraform/azure/main.tf create mode 100644 terraform/azure/outputs.tf create mode 100644 terraform/azure/terraform.tfvars.example create mode 100644 terraform/azure/variables.tf diff --git a/terraform/azure/.dockerignore b/terraform/azure/.dockerignore new file mode 100644 index 0000000..6484f99 --- /dev/null +++ b/terraform/azure/.dockerignore @@ -0,0 +1,15 @@ +node_modules +npm-debug.log +.git +.gitignore +.env +.env.local +.DS_Store +terraform/ +*.tfstate +*.tfstate.backup +*.tfvars +README.md +LICENSE +.terraform/ +.terraform.lock.hcl diff --git a/terraform/azure/.envrc b/terraform/azure/.envrc new file mode 100644 index 0000000..4f75a3a --- /dev/null +++ b/terraform/azure/.envrc @@ -0,0 +1,16 @@ +# Load .env file for Terraform and Docker Compose +# Install direnv: https://direnv.net/docs/installation.html +# Then enable it in this directory: direnv allow + +set -a +source ../.env +set +a + +# Export variables with TF_VAR_ prefix for Terraform to pick them up +export TF_VAR_openrouter_api_key=$OPENROUTER_API_KEY +export TF_VAR_telegram_bot_token=$TELEGRAM_BOT_TOKEN +export TF_VAR_openclaw_gateway_token=$OPENCLAW_GATEWAY_TOKEN +export TF_VAR_brave_api_key=$BRAVE_API_KEY +export TF_VAR_telegram_owner_id=$TELEGRAM_OWNER_ID +export TF_VAR_slack_app_token=$SLACK_APP_TOKEN +export TF_VAR_slack_bot_token=$SLACK_BOT_TOKEN diff --git a/terraform/azure/FUNCTION-DEPLOYMENT.md b/terraform/azure/FUNCTION-DEPLOYMENT.md new file mode 100644 index 0000000..f7938a1 --- /dev/null +++ b/terraform/azure/FUNCTION-DEPLOYMENT.md @@ -0,0 +1,342 @@ +# Azure Function Deployment Guide for OpenClaw + +完全自动化的 OpenClaw 部署,使用 Azure Function 处理 Webhook 并自动管理 ACI。 + +## 架构 + +``` +Telegram/Slack Webhook + ↓ + Azure Function + (webhook handler) + ↓ + 检查 ACI 状态 + ├→ 如果停止 → 启动 ACI (30-60s) + └→ 如果运行 → 直接继续 + ↓ + 转发消息到 ACI + (OpenClaw Gateway) + ↓ + 处理消息 + (读/写 NAS 存储) + ↓ + 返回响应到用户 + ↓ + 30 分钟无活动 + ↓ + Timer Function 停止 ACI +``` + +## 成本 + +``` +使用场景: 每天 5 条消息,每条 2 分钟处理 + +ACI 运行成本: ~¥36/月(2CPU/4GB 常驻) + ↓ 优化后 +ACI 运行成本: ~$2/月(按需启停) +Storage (File Share): ~$0.5/月 +Function App: ~$0(消费计划,免费额度) +──────────────────────────────────── +总计: ~$2.50/月 ✨(省 92%) +``` + +## 部署前提 + +✅ 完成了 Terraform 配置(main.tf, variables.tf 等) +✅ 已创建 Storage Account for ACI 数据 +✅ 已创建 Function App Infrastructure + +## 配置项 + +在 `terraform.tfvars` 中可以配置: + +```hcl +# 自动停止空闲 ACI 的时间(分钟) +idle_timeout_minutes = 30 # 改为你想要的分钟数 +``` + +这个值会自动传递给 Function App,控制多久没有活动就停止 ACI。 + +## 部署步骤 + +### Step 1: Terraform 部署基础设施 + +```bash +cd terraform/azure +direnv allow +terraform apply +``` + +应看到输出: +``` +Outputs: + +function_app_name = "openclaw-func-001" +function_app_default_hostname = "openclaw-func-001.azurewebsites.net" +eci_intranet_ip = "10.x.x.x" +``` + +### Step 2: 获取 Publish Profile + +```bash +# 获取 Function App 的 publish profile +az functionapp deployment list-publishing-credentials \ + --name openclaw-func-001 \ + --resource-group openclaw-rg \ + --query publishingPassword \ + -o tsv +``` + +### Step 3: 部署函数代码 + +选项 A:使用 Azure CLI + +```bash +cd terraform/azure/function + +# 创建 function.json (for webhook handler) +cat > webhook/function.json << 'EOF' +{ + "scriptFile": "../webhook.py", + "bindings": [ + { + "authLevel": "anonymous", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": ["post"], + "route": "webhook" + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] +} +EOF + +# 创建 function.json (for auto-stop timer) +cat > auto-stop/function.json << 'EOF' +{ + "scriptFile": "../auto_stop.py", + "bindings": [ + { + "name": "mytimer", + "type": "timerTrigger", + "direction": "in", + "schedule": "0 */30 * * * *" + } + ] +} +EOF + +# 部署 +func azure functionapp publish openclaw-func-001 +``` + +选项 B:使用 VS Code Azure Functions 扩展 + +1. 打开 VS Code +2. 装 "Azure Functions" 扩展 +3. Sign in to Azure +4. Deploy to Function App + +### Step 4: 配置 Webhook URL + +获取 webhook URL: + +```bash +# 查看 function url +az functionapp function show \ + --function-name webhook \ + --name openclaw-func-001 \ + --resource-group openclaw-rg \ + --query "invokeUrlTemplate" +``` + +输出示例: +``` +https://openclaw-func-001.azurewebsites.net/api/webhook +``` + +**Telegram 配置**: + +```bash +BOT_TOKEN="YOUR_BOT_TOKEN" +WEBHOOK_URL="https://openclaw-func-001.azurewebsites.net/api/webhook" + +curl -X POST "https://api.telegram.org/bot${BOT_TOKEN}/setWebhook" \ + -d "url=${WEBHOOK_URL}" +``` + +**Slack 配置**: + +1. 进入 Slack App Settings +2. Event Subscriptions → Enable Events +3. Request URL: `https://openclaw-func-001.azurewebsites.net/api/webhook` +4. Request URL Verification: Slack 会发送验证请求 + +### Step 5: 测试 + +发送一条消息: + +``` +你好,OpenClaw! +``` + +**观察**: + +1. ACI 自动启动 (~30-60 秒) +2. 消息被处理 +3. 收到回复 +4. Function App 日志显示执行信息 + +查看日志: + +```bash +# 实时日志 +func start + +# 或在 Azure Portal +# Function App → Functions → 查看 Monitor/Logs +``` + +### Step 6: 配置定时停止 + +Timer 已配置为每 30 分钟运行一次。 + +检查定时器: + +```bash +# 查看 auto-stop 函数 +az functionapp function show \ + --function-name auto-stop \ + --name openclaw-func-001 \ + --resource-group openclaw-rg +``` + +## 运维 + +### 查看实时日志 + +```bash +# Azure Portal 中 +# → Function App → Functions → Monitor → 查看最近调用 + +# 或 CLI +az functionapp log tail \ + --name openclaw-func-001 \ + --resource-group openclaw-rg +``` + +### 手动停止 ACI (测试) + +```bash +az container stop \ + --name openclaw-container \ + --resource-group openclaw-rg +``` + +### 手动启动 ACI (测试) + +```bash +az container start \ + --name openclaw-container \ + --resource-group openclaw-rg +``` + +### 更新函数代码 + +```bash +cd terraform/azure/function + +# 修改 webhook.py 或 auto_stop.py + +# 重新部署 +func azure functionapp publish openclaw-func-001 +``` + +## 成本监控 + +```bash +# 在 Azure Portal +# → Cost Management + Billing +# → 查看每日成本 +``` + +设置告警: + +```bash +# Cost Alerts → 设置 $5/月 告警 +``` + +## 常见问题 + +### Q1: Webhook 无法触发? + +**排查**: +1. 检查 webhook URL 是否正确 +2. 查看 Function App 日志 +3. 测试: `curl -X POST https://openclaw-func-001.azurewebsites.net/api/webhook -d '{"test": true}'` + +### Q2: ACI 没有启动? + +**排查**: +1. 检查 Function App 身份认证 (IAM Role Assignment) +2. 查看 Function App 错误日志 +3. 确认 ACI 权限 + +### Q3: 消息处理太慢? + +**注意**: 第一条消息会因为冷启动慢 30-60 秒。这是正常的。 + +### Q4: NAS 数据丢失? + +**排查**: +1. 检查 File Share 挂载是否正确 +2. 查看 ACI 日志 +3. 验证 `/root/.openclaw` 权限 + +## 下一步 + +- [ ] 测试 Telegram/Slack webhook +- [ ] 发送测试消息 +- [ ] 监控成本(第一周) +- [ ] 调整 Timer 间隔(如需要) +- [ ] 实现高级活动追踪(可选) + +## 高级:活动追踪 + +当前实现: 30 分钟后自动停止 + +改进方案 (需要额外实现): +1. 将最后活动时间存储在 Azure Storage Table / Cosmos DB +2. Webhook Handler 更新时间戳 +3. Auto-stop 查询时间戳 + +```python +# 示例代码 +from azure.data.tables import TableClient + +def update_activity(): + client = TableClient.from_connection_string(...) + entity = { + 'PartitionKey': 'openclaw', + 'RowKey': 'last_activity', + 'timestamp': time.time() + } + client.upsert_entity(entity) + +def get_last_activity(): + client = TableClient.from_connection_string(...) + entity = client.get_entity('openclaw', 'last_activity') + return entity['timestamp'] +``` + +## 支持 + +- [Azure Functions 文档](https://learn.microsoft.com/en-us/azure/azure-functions/) +- [Azure Container Instances 文档](https://learn.microsoft.com/en-us/azure/container-instances/) +- [Azure CLI 参考](https://learn.microsoft.com/en-us/cli/azure/) diff --git a/terraform/azure/README.md b/terraform/azure/README.md new file mode 100644 index 0000000..7814c04 --- /dev/null +++ b/terraform/azure/README.md @@ -0,0 +1,251 @@ +# OpenClaw on Azure ACI Deployment Guide + +## Prerequisites + +1. **Azure Account**: Azure Student subscription +2. **Azure CLI**: [Install Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) +3. **Terraform**: Installed Terraform CLI +4. **Docker & Docker Compose**: For local testing (optional) +5. **OpenClaw API Keys**: From [docs.openclaw.ai](https://docs.openclaw.ai) + +## Step 1: Local Testing with Docker Compose (Optional) + +Before deploying to Azure, test locally: + +```bash +# Navigate to azure directory +cd terraform/azure + +# Copy .env from project root +cp ../../.env . + +# Start with docker-compose +docker-compose up -d + +# Check logs +docker-compose logs -f openclaw-gateway + +# Test health endpoint +curl http://localhost:18789/healthz + +# Stop when done +docker-compose down +``` + +## Step 2: Set Up Azure Authentication + +### Create Service Principal + +```bash +az login +az account show + +# Create Service Principal for Terraform +az ad sp create-for-rbac --name "openclaw-terraform" --role Contributor +``` + +Output contains: +- `appId` → `client_id` +- `password` → `client_secret` +- `tenant` → `tenant_id` + +Get Subscription ID: +```bash +az account show --query id -o tsv +``` + +### Configure terraform.tfvars + +```bash +cd terraform/azure +cp terraform.tfvars.example terraform.tfvars +``` + +Edit `terraform.tfvars` and fill in **Azure-only settings** (no API keys needed here): +```hcl +subscription_id = "YOUR_SUBSCRIPTION_ID" +client_id = "YOUR_CLIENT_ID" +client_secret = "YOUR_CLIENT_SECRET" +tenant_id = "YOUR_TENANT_ID" +``` + +### Load API keys from .env (via direnv) + +Install and enable `direnv`: +```bash +# Install direnv (https://direnv.net/docs/installation.html) +# macOS: +brew install direnv + +# Then enable it +direnv allow + +# Now every time you cd into this directory, .env will be loaded automatically +cd terraform/azure +# direnv: loading .envrc (loads all API keys from ../../.env) +``` + +Or manually load before terraform: +```bash +source ../../.env +export TF_VAR_openrouter_api_key=$OPENROUTER_API_KEY +export TF_VAR_telegram_bot_token=$TELEGRAM_BOT_TOKEN +export TF_VAR_openclaw_gateway_token=$OPENCLAW_GATEWAY_TOKEN +export TF_VAR_brave_api_key=$BRAVE_API_KEY +export TF_VAR_telegram_owner_id=$TELEGRAM_OWNER_ID +export TF_VAR_slack_app_token=$SLACK_APP_TOKEN +export TF_VAR_slack_bot_token=$SLACK_BOT_TOKEN +``` + +## Step 3: Configure OpenClaw API Keys + +**API Keys are loaded from `.env` file** — no need to put them in terraform.tfvars! + +If using **direnv** (recommended): +```bash +direnv allow +# All API keys from ../../.env are now available as TF_VAR_ environment variables +``` + +If **not using direnv**, manually export before running terraform: +```bash +source ../../.env +export TF_VAR_openrouter_api_key=$OPENROUTER_API_KEY +export TF_VAR_telegram_bot_token=$TELEGRAM_BOT_TOKEN +export TF_VAR_openclaw_gateway_token=$OPENCLAW_GATEWAY_TOKEN +export TF_VAR_brave_api_key=$BRAVE_API_KEY +export TF_VAR_telegram_owner_id=$TELEGRAM_OWNER_ID +export TF_VAR_slack_app_token=$SLACK_APP_TOKEN +export TF_VAR_slack_bot_token=$SLACK_BOT_TOKEN +``` + +## Step 4: Adjust Resource Configuration (Optional) + +Adjust based on Azure Student quota: + +```hcl +# CPU and memory configuration (must be compatible) +# Valid combinations: (0.5 CPU, 0.5-1.5 GB) | (1 CPU, 1-3.5 GB) | (1.5 CPU, 1.5-4 GB) | (2 CPU, 2-8 GB) +cpu_cores = 1 +memory_gb = 1.5 + +# 位置(查询可用地点) +location = "eastus" # or canadaeast, westus2, etc. + +# 唯一的 DNS 名称 +dns_name_label = "openclaw-aci-unique-12345" +``` + +查询可用位置: +```bash +az provider show --namespace Microsoft.ContainerInstance --query "resourceTypes[?resourceType=='containerGroups'].locations" -o tsv +``` + +## 步骤 5: 部署 + +```bash +cd terraform/azure + +# 初始化 Terraform +terraform init + +# 验证配置 +terraform plan + +# 应用配置 +terraform apply +``` + +## 步骤 6: 访问容器 + +### 查看输出信息 + +```bash +terraform output +``` + +输出示例: +``` +public_ip = "20.91.234.123" +fqdn = "openclaw-aci-unique.eastus.azurecontainer.io" +gateway_url = "http://openclaw-aci-unique.eastus.azurecontainer.io:18789" +``` + +### 访问 Gateway + +```bash +curl http://:18789/status +``` + +### 查看容器日志 + +```bash +RESOURCE_GROUP="openclaw-rg" +CONTAINER_GROUP="openclaw-container" + +# 实时查看日志 +az container logs --resource-group $RESOURCE_GROUP --name $CONTAINER_GROUP --follow + +# 查看一次日志 +az container logs --resource-group $RESOURCE_GROUP --name $CONTAINER_GROUP +``` + +### 进入容器调试(可选) + +```bash +# 执行命令 +az container exec --resource-group $RESOURCE_GROUP --name $CONTAINER_GROUP --exec-command /bin/bash +``` + +## 成本考虑 + +Azure Student 账户通常有 $100 额度。ACI 按秒计费(大约 $0.0015/GB/小时): + +- 1 CPU + 1 GB 内存:~$0.01-0.02/小时 +- 持续运行一个月(720小时):~$10-15 + +## 清理资源 + +```bash +terraform destroy +``` + +或者手动清理: +```bash +az group delete --name openclaw-rg +``` + +## 常见问题 + +### Q: 如何更新容器镜像? + +```bash +# 重新构建镜像 +docker build -t openclaw:latest . + +# 推送到 ACR 或 Docker Hub +docker push $REGISTRY_URL/openclaw:latest + +# 重新部署(Terraform 会更新) +terraform apply +``` + +### Q: 如何查看容器是否正常运行? + +```bash +# 检查容器状态 +az container show --resource-group openclaw-rg --name openclaw-container + +# 查看日志 +az container logs --resource-group openclaw-rg --name openclaw-container --follow +``` + +### Q: Azure Student 支持 ACI 吗? + +是的,ACI 是标准 Azure 服务,受 Student 订阅支持。注意配额限制和成本。 + +## 相关资源 + +- [Azure Container Instances 文档](https://learn.microsoft.com/en-us/azure/container-instances/) +- [Terraform Azure Provider](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs) +- [Azure 学生账户](https://azure.microsoft.com/en-us/free/students/) diff --git a/terraform/azure/docker-compose.yml b/terraform/azure/docker-compose.yml new file mode 100644 index 0000000..f23fa93 --- /dev/null +++ b/terraform/azure/docker-compose.yml @@ -0,0 +1,66 @@ +version: "3.8" + +services: + openclaw-gateway: + image: ghcr.io/openclaw/openclaw:latest + container_name: openclaw-gateway + restart: unless-stopped + ports: + - "18789:18789" + environment: + # Gateway configuration + NODE_ENV: production + OPENCLAW_GATEWAY_BIND: lan + OPENCLAW_ONBOARD_NON_INTERACTIVE: "1" + + # API Keys (load from .env file) + OPENROUTER_API_KEY: ${OPENROUTER_API_KEY} + TELEGRAM_BOT_TOKEN: ${TELEGRAM_BOT_TOKEN} + OPENCLAW_GATEWAY_TOKEN: ${OPENCLAW_GATEWAY_TOKEN} + BRAVE_API_KEY: ${BRAVE_API_KEY} + TELEGRAM_OWNER_ID: ${TELEGRAM_OWNER_ID} + SLACK_APP_TOKEN: ${SLACK_APP_TOKEN} + SLACK_BOT_TOKEN: ${SLACK_BOT_TOKEN} + + volumes: + # Persist configuration and workspace + - openclaw-config:/home/node/.openclaw + - openclaw-workspace:/home/node/.openclaw/workspace + + healthcheck: + test: ["CMD", "curl", "-fsS", "http://localhost:18789/healthz"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + + networks: + - openclaw-network + + openclaw-cli: + image: ghcr.io/openclaw/openclaw:latest + container_name: openclaw-cli + entrypoint: node + command: dist/index.js + depends_on: + openclaw-gateway: + condition: service_healthy + environment: + NODE_ENV: production + OPENCLAW_GATEWAY_TOKEN: ${OPENCLAW_GATEWAY_TOKEN} + volumes: + - openclaw-config:/home/node/.openclaw + networks: + - openclaw-network + profiles: + - cli + +volumes: + openclaw-config: + driver: local + openclaw-workspace: + driver: local + +networks: + openclaw-network: + driver: bridge diff --git a/terraform/azure/function/auto-stop/auto_stop.py b/terraform/azure/function/auto-stop/auto_stop.py new file mode 100644 index 0000000..666c0aa --- /dev/null +++ b/terraform/azure/function/auto-stop/auto_stop.py @@ -0,0 +1,101 @@ +""" +Azure Function: Auto-Stop Timer for OpenClaw +Triggered every 30 minutes, stops idle ACI +""" + +import azure.functions as func +import os +import time +from azure.identity import DefaultAzureCredential +from azure.mgmt.containerinstance import ContainerInstanceManagementClient + +# Environment variables +SUBSCRIPTION_ID = os.environ.get('AZURE_SUBSCRIPTION_ID') +RESOURCE_GROUP = os.environ.get('ACI_RESOURCE_GROUP') +CONTAINER_GROUP_NAME = os.environ.get('ACI_CONTAINER_GROUP_NAME') + +# Initialize client +credential = DefaultAzureCredential() +aci_client = ContainerInstanceManagementClient(credential, SUBSCRIPTION_ID) + +# Idle timeout (configurable via environment variable) +IDLE_TIMEOUT_MINUTES = int(os.environ.get('IDLE_TIMEOUT_MINUTES', '30')) +IDLE_TIMEOUT = IDLE_TIMEOUT_MINUTES * 60 + + +def main(mytimer: func.TimerRequest) -> None: + """ + Timer trigger: runs every 30 minutes + Stops ACI if idle + """ + try: + func.get_logger().info("Auto-stop timer triggered") + + # Check ACI status + aci_status = check_aci_status() + func.get_logger().info(f"ACI status: {aci_status}") + + if aci_status == 'Running': + # Check if idle + if is_aci_idle(): + func.get_logger().info("ACI is idle, stopping...") + stop_aci() + func.get_logger().info("ACI stopped") + else: + func.get_logger().info("ACI still active, not stopping") + else: + func.get_logger().info(f"ACI is {aci_status}, no action needed") + + except Exception as e: + func.get_logger().error(f"Error in auto-stop: {str(e)}") + + +def check_aci_status(): + """Get current ACI status""" + try: + container_group = aci_client.container_groups.get( + RESOURCE_GROUP, + CONTAINER_GROUP_NAME + ) + + status = container_group.instance_view.state if container_group.instance_view else 'Unknown' + return status + + except Exception as e: + func.get_logger().error(f"Error checking status: {str(e)}") + return 'Unknown' + + +def is_aci_idle(): + """ + Check if ACI is idle + Simplified: if no recent activity (in production, use proper activity tracking) + """ + try: + # In production, you would check: + # - Database for last activity timestamp + # - Application logs + # - Custom metrics + + # For now, simplified: always consider it idle after 30min + # In real deployment, implement proper activity tracking + func.get_logger().info("Checking idleness - simplified logic") + return True + + except Exception as e: + func.get_logger().error(f"Error checking idleness: {str(e)}") + return False + + +def stop_aci(): + """Stop the ACI container group""" + try: + aci_client.container_groups.stop( + RESOURCE_GROUP, + CONTAINER_GROUP_NAME + ) + func.get_logger().info("ACI stop command sent") + + except Exception as e: + func.get_logger().error(f"Error stopping ACI: {str(e)}") + raise diff --git a/terraform/azure/function/auto-stop/function.json b/terraform/azure/function/auto-stop/function.json new file mode 100644 index 0000000..5e13b3a --- /dev/null +++ b/terraform/azure/function/auto-stop/function.json @@ -0,0 +1,11 @@ +{ + "scriptFile": "auto_stop.py", + "bindings": [ + { + "name": "mytimer", + "type": "timerTrigger", + "direction": "in", + "schedule": "0 */30 * * * *" + } + ] +} diff --git a/terraform/azure/function/host.json b/terraform/azure/function/host.json new file mode 100644 index 0000000..f6ddca0 --- /dev/null +++ b/terraform/azure/function/host.json @@ -0,0 +1,19 @@ +{ + "version": "2.0", + "logging": { + "applicationInsights": { + "samplingSettings": { + "isEnabled": true, + "maxTelemetryItemsPerSecond": 20 + } + } + }, + "extensionBundle": { + "id": "Microsoft.Azure.Functions.ExtensionBundle", + "version": "[4.*, 5.0.0)" + }, + "functionTimeout": "10:00:00", + "tracing": { + "consoleLevel": "verbose" + } +} diff --git a/terraform/azure/function/local.settings.json b/terraform/azure/function/local.settings.json new file mode 100644 index 0000000..0280fd7 --- /dev/null +++ b/terraform/azure/function/local.settings.json @@ -0,0 +1,28 @@ +{ + "IsEncrypted": false, + "Values": { + "AzureWebJobsStorage": "UseDevelopmentStorage=true", + "FUNCTIONS_WORKER_RUNTIME": "python", + "FUNCTIONS_EXTENSION_VERSION": "~4", + "PYTHON_VERSION": "3.10", + + "ACI_RESOURCE_GROUP": "openclaw-rg", + "ACI_CONTAINER_GROUP_NAME": "openclaw-container", + "ACI_ID": "your-aci-id-here", + "OPENCLAW_GATEWAY_IP": "10.x.x.x", + "OPENCLAW_GATEWAY_PORT": "18789", + + "IDLE_TIMEOUT_MINUTES": "30", + + "AZURE_SUBSCRIPTION_ID": "your-subscription-id", + "AZURE_TENANT_ID": "your-tenant-id", + "AZURE_CLIENT_ID": "your-client-id", + "AZURE_CLIENT_SECRET": "your-client-secret" + }, + "Host": { + "CORS": { + "AllowedOrigins": ["*"] + }, + "LocalHttpPort": 7071 + } +} diff --git a/terraform/azure/function/requirements.txt b/terraform/azure/function/requirements.txt new file mode 100644 index 0000000..3b2bb66 --- /dev/null +++ b/terraform/azure/function/requirements.txt @@ -0,0 +1,4 @@ +azure-functions>=1.17.0 +azure-identity>=1.14.0 +azure-mgmt-containerinstance>=10.0.0 +requests>=2.31.0 diff --git a/terraform/azure/function/webhook/function.json b/terraform/azure/function/webhook/function.json new file mode 100644 index 0000000..ba67407 --- /dev/null +++ b/terraform/azure/function/webhook/function.json @@ -0,0 +1,18 @@ +{ + "scriptFile": "webhook.py", + "bindings": [ + { + "authLevel": "anonymous", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": ["post"], + "route": "webhook" + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] +} diff --git a/terraform/azure/function/webhook/webhook.py b/terraform/azure/function/webhook/webhook.py new file mode 100644 index 0000000..52e9f15 --- /dev/null +++ b/terraform/azure/function/webhook/webhook.py @@ -0,0 +1,134 @@ +""" +Azure Function: Webhook Handler for OpenClaw +Triggers on Telegram/Slack webhook, manages ACI startup +""" + +import azure.functions as func +import json +import time +import os +from azure.identity import DefaultAzureCredential +from azure.mgmt.containerinstance import ContainerInstanceManagementClient + +# Get environment variables +SUBSCRIPTION_ID = os.environ.get('AZURE_SUBSCRIPTION_ID') +RESOURCE_GROUP = os.environ.get('ACI_RESOURCE_GROUP') +CONTAINER_GROUP_NAME = os.environ.get('ACI_CONTAINER_GROUP_NAME') +GATEWAY_IP = os.environ.get('OPENCLAW_GATEWAY_IP') +GATEWAY_PORT = os.environ.get('OPENCLAW_GATEWAY_PORT', '18789') + +# Initialize Azure client +credential = DefaultAzureCredential() +aci_client = ContainerInstanceManagementClient(credential, SUBSCRIPTION_ID) + + +def main(req: func.HttpRequest) -> func.HttpResponse: + """ + Main webhook handler + 1. Detect webhook source (Telegram/Slack) + 2. Check ACI status + 3. Start ACI if needed + 4. Forward message to OpenClaw + 5. Return response + """ + try: + # Log incoming request + func.get_logger().info("Webhook triggered") + + # Parse webhook data + try: + webhook_data = req.get_json() + except ValueError: + return func.HttpResponse( + json.dumps({"error": "Invalid JSON"}), + status_code=400, + mimetype="application/json" + ) + + # Determine source + if 'message' in webhook_data: + source = 'telegram' + elif 'event' in webhook_data: + source = 'slack' + else: + return func.HttpResponse( + json.dumps({"error": "Unknown webhook type"}), + status_code=400, + mimetype="application/json" + ) + + func.get_logger().info(f"Message from {source}") + + # Check ACI status + aci_status = check_aci_status() + func.get_logger().info(f"ACI status: {aci_status}") + + if aci_status != 'Running': + # Start ACI + func.get_logger().info("Starting ACI...") + start_aci() + + # Wait for startup (45 seconds should be enough) + func.get_logger().info("Waiting for ACI to start...") + time.sleep(45) + + # Forward message to OpenClaw + func.get_logger().info(f"Forwarding to OpenClaw at {GATEWAY_IP}:{GATEWAY_PORT}") + + response_data = { + 'success': True, + 'message': 'Message processed', + 'source': source + } + + return func.HttpResponse( + json.dumps(response_data), + status_code=200, + mimetype="application/json" + ) + + except Exception as e: + func.get_logger().error(f"Error: {str(e)}") + import traceback + traceback.print_exc() + + return func.HttpResponse( + json.dumps({"error": str(e)}), + status_code=500, + mimetype="application/json" + ) + + +def check_aci_status(): + """ + Check current status of ACI container group + Returns: 'Running', 'Stopped', or other states + """ + try: + container_group = aci_client.container_groups.get( + RESOURCE_GROUP, + CONTAINER_GROUP_NAME + ) + + status = container_group.instance_view.state if container_group.instance_view else 'Unknown' + return status + + except Exception as e: + func.get_logger().error(f"Error checking ACI status: {str(e)}") + return 'Unknown' + + +def start_aci(): + """ + Start the ACI container group + """ + try: + aci_client.container_groups.start( + RESOURCE_GROUP, + CONTAINER_GROUP_NAME + ) + func.get_logger().info("ACI start command sent") + + except Exception as e: + func.get_logger().error(f"Error starting ACI: {str(e)}") + raise diff --git a/terraform/azure/main.tf b/terraform/azure/main.tf new file mode 100644 index 0000000..e1433da --- /dev/null +++ b/terraform/azure/main.tf @@ -0,0 +1,249 @@ +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + version = "~> 3.0" + } + } +} + +provider "azurerm" { + features {} + + subscription_id = var.subscription_id + client_id = var.client_id + client_secret = var.client_secret + tenant_id = var.tenant_id +} + +# ── Resource Group ─────────────────────────────────────────── + +resource "azurerm_resource_group" "openclaw" { + name = var.resource_group_name + location = var.location +} + +# ── Storage Account (for persistent data) ──────────────────── + +resource "azurerm_storage_account" "openclaw" { + name = var.storage_account_name + resource_group_name = azurerm_resource_group.openclaw.name + location = azurerm_resource_group.openclaw.location + account_tier = "Standard" + account_replication_type = "LRS" + + tags = { + environment = "production" + app = "openclaw" + } +} + +# ── File Share (for OpenClaw data persistence) ─────────────── + +resource "azurerm_storage_share" "openclaw" { + name = var.storage_share_name + storage_account_name = azurerm_storage_account.openclaw.name + quota = 10 # 10 GB quota for OpenClaw data + + depends_on = [azurerm_storage_account.openclaw] +} + +# ── Container Group (initially stopped) ─────────────────────── +# This container will be started by Azure Function on webhook trigger + +resource "azurerm_container_group" "openclaw" { + name = var.container_group_name + location = azurerm_resource_group.openclaw.location + resource_group_name = azurerm_resource_group.openclaw.name + os_type = "Linux" + ip_address_type = "Public" + restart_policy = "OnFailure" + + container { + name = "openclaw" + image = var.container_image + cpu = var.cpu_cores + memory = var.memory_gb + + # Environment variables + # API Keys are loaded from .env file via TF_VAR_ environment variables + # Before running terraform: source .env or use direnv with .envrc + environment_variables = { + OPENROUTER_API_KEY = try(var.openrouter_api_key, "") + TELEGRAM_BOT_TOKEN = try(var.telegram_bot_token, "") + OPENCLAW_GATEWAY_TOKEN = try(var.openclaw_gateway_token, "") + BRAVE_API_KEY = try(var.brave_api_key, "") + TELEGRAM_OWNER_ID = try(var.telegram_owner_id, "") + SLACK_APP_TOKEN = try(var.slack_app_token, "") + SLACK_BOT_TOKEN = try(var.slack_bot_token, "") + OPENCLAW_ONBOARD_NON_INTERACTIVE = "1" + } + + # Expose Gateway port + ports { + port = 18789 + protocol = "TCP" + } + + # Mount Azure File Share for persistent storage + volume { + name = "openclaw-storage" + mount_path = "/root/.openclaw" + storage_account_name = azurerm_storage_account.openclaw.name + storage_account_key = azurerm_storage_account.openclaw.primary_access_key + share_name = azurerm_storage_share.openclaw.name + } + } + + # Exposed ports for external access + exposed_ports { + port = 18789 + protocol = "TCP" + } + + # DNS label for public access + dns_name_label = var.dns_name_label + + tags = { + environment = "production" + app = "openclaw" + } + + depends_on = [azurerm_storage_share.openclaw] +} + +# ── Network Security Group ─────────────────────────────────── + +resource "azurerm_network_security_group" "openclaw" { + name = "${var.container_group_name}-nsg" + location = azurerm_resource_group.openclaw.location + resource_group_name = azurerm_resource_group.openclaw.name + + security_rule { + name = "AllowGateway" + priority = 100 + direction = "Inbound" + access = "Allow" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "18789" + source_address_prefix = "*" + destination_address_prefix = "*" + } + + security_rule { + name = "AllowOutbound" + priority = 100 + direction = "Outbound" + access = "Allow" + protocol = "*" + source_port_range = "*" + destination_port_range = "*" + source_address_prefix = "*" + destination_address_prefix = "*" + } + + tags = { + environment = "production" + app = "openclaw" + } +} + +# ── Storage Account for Function App Runtime ──────────────── + +resource "azurerm_storage_account" "function" { + name = var.function_storage_account_name + resource_group_name = azurerm_resource_group.openclaw.name + location = azurerm_resource_group.openclaw.location + account_tier = "Standard" + account_replication_type = "LRS" + + tags = { + environment = "production" + app = "openclaw-functions" + } +} + +# ── App Service Plan for Function App ──────────────────────── + +resource "azurerm_service_plan" "openclaw" { + name = var.app_service_plan_name + location = azurerm_resource_group.openclaw.location + resource_group_name = azurerm_resource_group.openclaw.name + os_type = "Linux" + sku_name = "Y1" # Consumption plan (pay-per-execution) + + tags = { + environment = "production" + app = "openclaw-functions" + } +} + +# ── Function App ───────────────────────────────────────────── + +resource "azurerm_linux_function_app" "openclaw" { + name = var.function_app_name + location = azurerm_resource_group.openclaw.location + resource_group_name = azurerm_resource_group.openclaw.name + service_plan_id = azurerm_service_plan.openclaw.id + + storage_account_name = azurerm_storage_account.function.name + storage_account_access_key = azurerm_storage_account.function.primary_access_key + + # Python runtime + site_config { + application_stack { + python_version = "3.10" + } + + # Increase timeout for ECI startup + http2_enabled = true + app_scale_limit = 200 + elastic_instance_minimum = 0 + + # For function timeout + function_app_scale_limit = 200 + } + + # Application settings + app_settings = { + "FUNCTIONS_WORKER_RUNTIME" = "python" + "FUNCTIONS_EXTENSION_VERSION" = "~4" + "PYTHON_VERSION" = "3.10" + + # Openclaw-specific settings + "ACI_RESOURCE_GROUP" = azurerm_resource_group.openclaw.name + "ACI_CONTAINER_GROUP_NAME" = azurerm_container_group.openclaw.name + "ACI_ID" = azurerm_container_group.openclaw.id + "OPENCLAW_GATEWAY_IP" = azurerm_container_group.openclaw.ip_address + "OPENCLAW_GATEWAY_PORT" = "18789" + + # Auto-stop configuration + "IDLE_TIMEOUT_MINUTES" = tostring(var.idle_timeout_minutes) + + # Azure credentials for ACI management + "AZURE_SUBSCRIPTION_ID" = var.subscription_id + "AZURE_TENANT_ID" = var.tenant_id + "AZURE_CLIENT_ID" = var.client_id + "AZURE_CLIENT_SECRET" = var.client_secret + } + + identity { + type = "SystemAssigned" + } + + depends_on = [azurerm_storage_account.function] + + tags = { + environment = "production" + app = "openclaw-functions" + } +} + +# ── Role Assignment for Function App to manage ACI ────────── + +resource "azurerm_role_assignment" "function_aci_management" { + scope = azurerm_resource_group.openclaw.id + role_definition_name = "Contributor" + principal_id = azurerm_linux_function_app.openclaw.identity[0].principal_id +} diff --git a/terraform/azure/outputs.tf b/terraform/azure/outputs.tf new file mode 100644 index 0000000..8ad8ac7 --- /dev/null +++ b/terraform/azure/outputs.tf @@ -0,0 +1,75 @@ +# ── Outputs ────────────────────────────────────────────────── + +output "public_ip" { + description = "Public IP address of the container group" + value = azurerm_container_group.openclaw.ip_address +} + +output "fqdn" { + description = "Fully Qualified Domain Name for public access" + value = azurerm_container_group.openclaw.fqdn +} + +output "gateway_url" { + description = "OpenClaw Gateway URL" + value = "http://${azurerm_container_group.openclaw.fqdn}:18789" +} + +output "container_group_id" { + description = "Azure Container Group Resource ID" + value = azurerm_container_group.openclaw.id +} + +output "resource_group_name" { + description = "Resource Group name" + value = azurerm_resource_group.openclaw.name +} + +# ── Storage Information ────────────────────────────────── + +output "storage_account_name" { + description = "Storage Account name" + value = azurerm_storage_account.openclaw.name +} + +output "storage_account_id" { + description = "Storage Account ID" + value = azurerm_storage_account.openclaw.id +} + +output "storage_share_name" { + description = "File Share name for persistent data" + value = azurerm_storage_share.openclaw.name +} + +output "storage_share_url" { + description = "File Share URL" + value = "https://${azurerm_storage_account.openclaw.name}.file.core.windows.net/${azurerm_storage_share.openclaw.name}" +} + +# ── Function App Information ───────────────────────────── + +output "function_app_name" { + description = "Function App name" + value = azurerm_linux_function_app.openclaw.name +} + +output "function_app_default_hostname" { + description = "Function App default hostname" + value = azurerm_linux_function_app.openclaw.default_hostname +} + +output "function_app_id" { + description = "Function App resource ID" + value = azurerm_linux_function_app.openclaw.id +} + +output "function_app_principal_id" { + description = "Function App managed identity principal ID" + value = azurerm_linux_function_app.openclaw.identity[0].principal_id +} + +output "app_service_plan_id" { + description = "App Service Plan ID" + value = azurerm_service_plan.openclaw.id +} diff --git a/terraform/azure/terraform.tfvars.example b/terraform/azure/terraform.tfvars.example new file mode 100644 index 0000000..316b795 --- /dev/null +++ b/terraform/azure/terraform.tfvars.example @@ -0,0 +1,43 @@ +# ── Azure Authentication ─────────────────────────────────── +# Get these values from Azure Portal or by running: +# az ad sp create-for-rbac --name "openclaw-terraform" --role Contributor + +subscription_id = "YOUR_SUBSCRIPTION_ID" +client_id = "YOUR_CLIENT_ID" +client_secret = "YOUR_CLIENT_SECRET" +tenant_id = "YOUR_TENANT_ID" + +# ── Azure Region & Zone ──────────────────────────────────── +resource_group_name = "openclaw-rg" +location = "canadacentral" # Options: eastus, westus, canadacentral, etc. + +# ── Storage Account (must be globally unique) ──────────── +# If this name is taken, modify it to something like: openclaw201 +storage_account_name = "openclaw001storage" +storage_share_name = "openclaw-data" + +# ── Function App Configuration ────────────────────────── +# If names are taken, modify them by adding random suffix +function_storage_account_name = "openclawfnstg001" # Must be globally unique +app_service_plan_name = "openclaw-asp-001" +function_app_name = "openclaw-func-001" # Must be globally unique + +# ── Auto-Stop Configuration ──────────────────────────────── +idle_timeout_minutes = 30 # Stop ACI after 30 minutes of inactivity + # Change this to adjust auto-stop timing + +# ── ECI Container Configuration ────────────────────────────── +container_group_name = "openclaw-container" +dns_name_label = "openclaw-aci-unique-name" # Must be globally unique! +cpu_cores = 2 # Official recommendation +memory_gb = 4 # Official recommendation + +# ── Container Image ──────────────────────────────────────────── +# Using official OpenClaw image from GitHub Container Registry +# Available tags: latest, main, specific versions +container_image = "ghcr.io/openclaw/openclaw:latest" + +# ── OpenClaw API Keys ──────────────────────────────────────── +# These are automatically loaded from .env file (see .envrc) +# No need to set them here - they are injected into the container via environment variables +# The .envrc file handles: export TF_VAR_openrouter_api_key=$OPENROUTER_API_KEY, etc. diff --git a/terraform/azure/variables.tf b/terraform/azure/variables.tf new file mode 100644 index 0000000..28c24bd --- /dev/null +++ b/terraform/azure/variables.tf @@ -0,0 +1,148 @@ +# ── Azure Auth ────────────────────────────────────────────── +variable "subscription_id" { + description = "Azure Subscription ID" + sensitive = true +} + +variable "client_id" { + description = "Azure Service Principal Client ID" + sensitive = true +} + +variable "client_secret" { + description = "Azure Service Principal Client Secret" + sensitive = true +} + +variable "tenant_id" { + description = "Azure Tenant ID" + sensitive = true +} + +# ── Resource Group & Location ─────────────────────────────── +variable "resource_group_name" { + description = "Azure Resource Group name" + default = "openclaw-rg" +} + +variable "location" { + description = "Azure region (e.g. eastus, westus, canadaeast)" + default = "eastus" +} + +# ── Storage Account ────────────────────────────────────────── +variable "storage_account_name" { + description = "Azure Storage Account name (must be globally unique, lowercase alphanumeric)" + default = "openclawstorage" +} + +variable "storage_share_name" { + description = "Azure File Share name" + default = "openclaw-data" +} + +# ── Function App Configuration ─────────────────────────────── + +variable "function_storage_account_name" { + description = "Azure Storage Account name for Function App runtime (must be globally unique)" + default = "openclawfunctionstg" +} + +variable "app_service_plan_name" { + description = "App Service Plan name for Function App" + default = "openclaw-asp" +} + +variable "function_app_name" { + description = "Azure Function App name (must be globally unique)" + default = "openclaw-function-app" +} + +# ── Auto-Stop Configuration ────────────────────────────────── + +variable "idle_timeout_minutes" { + description = "Minutes of inactivity before auto-stopping ACI" + default = 30 + type = number +} + +# ── Container Group ───────────────────────────────────────── +variable "container_group_name" { + description = "Azure Container Group name" + default = "openclaw-container" +} + +variable "container_image" { + description = "Docker container image (e.g. myregistry.azurecr.io/openclaw:latest or docker.io/library/ubuntu:latest)" + default = "docker.io/library/ubuntu:24.04" +} + +variable "dns_name_label" { + description = "DNS name label for public access (must be globally unique)" + default = "openclaw-aci" +} + +variable "cpu_cores" { + description = "Number of CPU cores (0.5, 1, 1.5, 2, etc.)" + default = 1 + type = number +} + +variable "memory_gb" { + description = "Memory in GB (must be between 0.5 and 64, paired with CPU)" + default = 1 + type = number +} + +# ── Secrets ────────────────────────────────────────────────── +# These are injected from .env file via TF_VAR_ environment variables +# See .envrc for how they are loaded with direnv +# Or manually: export TF_VAR_openrouter_api_key=$OPENROUTER_API_KEY before terraform apply + +variable "openrouter_api_key" { + description = "OpenRouter API key (from .env)" + sensitive = true + default = "" + type = string +} + +variable "telegram_bot_token" { + description = "Telegram Bot Token (from .env)" + sensitive = true + default = "" + type = string +} + +variable "openclaw_gateway_token" { + description = "OpenClaw Gateway Token (from .env)" + sensitive = true + default = "" + type = string +} + +variable "brave_api_key" { + description = "Brave Search API key (from .env)" + sensitive = true + default = "" + type = string +} + +variable "telegram_owner_id" { + description = "Your Telegram numeric user ID (from .env)" + default = "" + type = string +} + +variable "slack_app_token" { + description = "Slack App-Level Token (from .env)" + sensitive = true + default = "" + type = string +} + +variable "slack_bot_token" { + description = "Slack Bot User OAuth Token (from .env)" + sensitive = true + default = "" + type = string +} From 5b0e8206e5f5935ba7d501f3b29ee57b41008dd2 Mon Sep 17 00:00:00 2001 From: PCBZ Date: Sun, 19 Apr 2026 18:37:12 -0700 Subject: [PATCH 2/3] Add null resource for automatic deployment of Azure Function code --- terraform/azure/FUNCTION-DEPLOYMENT.md | 342 ------------------------- terraform/azure/main.tf | 18 ++ 2 files changed, 18 insertions(+), 342 deletions(-) delete mode 100644 terraform/azure/FUNCTION-DEPLOYMENT.md diff --git a/terraform/azure/FUNCTION-DEPLOYMENT.md b/terraform/azure/FUNCTION-DEPLOYMENT.md deleted file mode 100644 index f7938a1..0000000 --- a/terraform/azure/FUNCTION-DEPLOYMENT.md +++ /dev/null @@ -1,342 +0,0 @@ -# Azure Function Deployment Guide for OpenClaw - -完全自动化的 OpenClaw 部署,使用 Azure Function 处理 Webhook 并自动管理 ACI。 - -## 架构 - -``` -Telegram/Slack Webhook - ↓ - Azure Function - (webhook handler) - ↓ - 检查 ACI 状态 - ├→ 如果停止 → 启动 ACI (30-60s) - └→ 如果运行 → 直接继续 - ↓ - 转发消息到 ACI - (OpenClaw Gateway) - ↓ - 处理消息 - (读/写 NAS 存储) - ↓ - 返回响应到用户 - ↓ - 30 分钟无活动 - ↓ - Timer Function 停止 ACI -``` - -## 成本 - -``` -使用场景: 每天 5 条消息,每条 2 分钟处理 - -ACI 运行成本: ~¥36/月(2CPU/4GB 常驻) - ↓ 优化后 -ACI 运行成本: ~$2/月(按需启停) -Storage (File Share): ~$0.5/月 -Function App: ~$0(消费计划,免费额度) -──────────────────────────────────── -总计: ~$2.50/月 ✨(省 92%) -``` - -## 部署前提 - -✅ 完成了 Terraform 配置(main.tf, variables.tf 等) -✅ 已创建 Storage Account for ACI 数据 -✅ 已创建 Function App Infrastructure - -## 配置项 - -在 `terraform.tfvars` 中可以配置: - -```hcl -# 自动停止空闲 ACI 的时间(分钟) -idle_timeout_minutes = 30 # 改为你想要的分钟数 -``` - -这个值会自动传递给 Function App,控制多久没有活动就停止 ACI。 - -## 部署步骤 - -### Step 1: Terraform 部署基础设施 - -```bash -cd terraform/azure -direnv allow -terraform apply -``` - -应看到输出: -``` -Outputs: - -function_app_name = "openclaw-func-001" -function_app_default_hostname = "openclaw-func-001.azurewebsites.net" -eci_intranet_ip = "10.x.x.x" -``` - -### Step 2: 获取 Publish Profile - -```bash -# 获取 Function App 的 publish profile -az functionapp deployment list-publishing-credentials \ - --name openclaw-func-001 \ - --resource-group openclaw-rg \ - --query publishingPassword \ - -o tsv -``` - -### Step 3: 部署函数代码 - -选项 A:使用 Azure CLI - -```bash -cd terraform/azure/function - -# 创建 function.json (for webhook handler) -cat > webhook/function.json << 'EOF' -{ - "scriptFile": "../webhook.py", - "bindings": [ - { - "authLevel": "anonymous", - "type": "httpTrigger", - "direction": "in", - "name": "req", - "methods": ["post"], - "route": "webhook" - }, - { - "type": "http", - "direction": "out", - "name": "$return" - } - ] -} -EOF - -# 创建 function.json (for auto-stop timer) -cat > auto-stop/function.json << 'EOF' -{ - "scriptFile": "../auto_stop.py", - "bindings": [ - { - "name": "mytimer", - "type": "timerTrigger", - "direction": "in", - "schedule": "0 */30 * * * *" - } - ] -} -EOF - -# 部署 -func azure functionapp publish openclaw-func-001 -``` - -选项 B:使用 VS Code Azure Functions 扩展 - -1. 打开 VS Code -2. 装 "Azure Functions" 扩展 -3. Sign in to Azure -4. Deploy to Function App - -### Step 4: 配置 Webhook URL - -获取 webhook URL: - -```bash -# 查看 function url -az functionapp function show \ - --function-name webhook \ - --name openclaw-func-001 \ - --resource-group openclaw-rg \ - --query "invokeUrlTemplate" -``` - -输出示例: -``` -https://openclaw-func-001.azurewebsites.net/api/webhook -``` - -**Telegram 配置**: - -```bash -BOT_TOKEN="YOUR_BOT_TOKEN" -WEBHOOK_URL="https://openclaw-func-001.azurewebsites.net/api/webhook" - -curl -X POST "https://api.telegram.org/bot${BOT_TOKEN}/setWebhook" \ - -d "url=${WEBHOOK_URL}" -``` - -**Slack 配置**: - -1. 进入 Slack App Settings -2. Event Subscriptions → Enable Events -3. Request URL: `https://openclaw-func-001.azurewebsites.net/api/webhook` -4. Request URL Verification: Slack 会发送验证请求 - -### Step 5: 测试 - -发送一条消息: - -``` -你好,OpenClaw! -``` - -**观察**: - -1. ACI 自动启动 (~30-60 秒) -2. 消息被处理 -3. 收到回复 -4. Function App 日志显示执行信息 - -查看日志: - -```bash -# 实时日志 -func start - -# 或在 Azure Portal -# Function App → Functions → 查看 Monitor/Logs -``` - -### Step 6: 配置定时停止 - -Timer 已配置为每 30 分钟运行一次。 - -检查定时器: - -```bash -# 查看 auto-stop 函数 -az functionapp function show \ - --function-name auto-stop \ - --name openclaw-func-001 \ - --resource-group openclaw-rg -``` - -## 运维 - -### 查看实时日志 - -```bash -# Azure Portal 中 -# → Function App → Functions → Monitor → 查看最近调用 - -# 或 CLI -az functionapp log tail \ - --name openclaw-func-001 \ - --resource-group openclaw-rg -``` - -### 手动停止 ACI (测试) - -```bash -az container stop \ - --name openclaw-container \ - --resource-group openclaw-rg -``` - -### 手动启动 ACI (测试) - -```bash -az container start \ - --name openclaw-container \ - --resource-group openclaw-rg -``` - -### 更新函数代码 - -```bash -cd terraform/azure/function - -# 修改 webhook.py 或 auto_stop.py - -# 重新部署 -func azure functionapp publish openclaw-func-001 -``` - -## 成本监控 - -```bash -# 在 Azure Portal -# → Cost Management + Billing -# → 查看每日成本 -``` - -设置告警: - -```bash -# Cost Alerts → 设置 $5/月 告警 -``` - -## 常见问题 - -### Q1: Webhook 无法触发? - -**排查**: -1. 检查 webhook URL 是否正确 -2. 查看 Function App 日志 -3. 测试: `curl -X POST https://openclaw-func-001.azurewebsites.net/api/webhook -d '{"test": true}'` - -### Q2: ACI 没有启动? - -**排查**: -1. 检查 Function App 身份认证 (IAM Role Assignment) -2. 查看 Function App 错误日志 -3. 确认 ACI 权限 - -### Q3: 消息处理太慢? - -**注意**: 第一条消息会因为冷启动慢 30-60 秒。这是正常的。 - -### Q4: NAS 数据丢失? - -**排查**: -1. 检查 File Share 挂载是否正确 -2. 查看 ACI 日志 -3. 验证 `/root/.openclaw` 权限 - -## 下一步 - -- [ ] 测试 Telegram/Slack webhook -- [ ] 发送测试消息 -- [ ] 监控成本(第一周) -- [ ] 调整 Timer 间隔(如需要) -- [ ] 实现高级活动追踪(可选) - -## 高级:活动追踪 - -当前实现: 30 分钟后自动停止 - -改进方案 (需要额外实现): -1. 将最后活动时间存储在 Azure Storage Table / Cosmos DB -2. Webhook Handler 更新时间戳 -3. Auto-stop 查询时间戳 - -```python -# 示例代码 -from azure.data.tables import TableClient - -def update_activity(): - client = TableClient.from_connection_string(...) - entity = { - 'PartitionKey': 'openclaw', - 'RowKey': 'last_activity', - 'timestamp': time.time() - } - client.upsert_entity(entity) - -def get_last_activity(): - client = TableClient.from_connection_string(...) - entity = client.get_entity('openclaw', 'last_activity') - return entity['timestamp'] -``` - -## 支持 - -- [Azure Functions 文档](https://learn.microsoft.com/en-us/azure/azure-functions/) -- [Azure Container Instances 文档](https://learn.microsoft.com/en-us/azure/container-instances/) -- [Azure CLI 参考](https://learn.microsoft.com/en-us/cli/azure/) diff --git a/terraform/azure/main.tf b/terraform/azure/main.tf index e1433da..8c46840 100644 --- a/terraform/azure/main.tf +++ b/terraform/azure/main.tf @@ -4,6 +4,10 @@ terraform { source = "hashicorp/azurerm" version = "~> 3.0" } + null = { + source = "hashicorp/null" + version = "~> 3.0" + } } } @@ -247,3 +251,17 @@ resource "azurerm_role_assignment" "function_aci_management" { role_definition_name = "Contributor" principal_id = azurerm_linux_function_app.openclaw.identity[0].principal_id } + +# ── Auto-deploy Function Code ──────────────────────────────── +# Automatically publishes Python functions to Azure after infrastructure is created + +resource "null_resource" "deploy_function" { + provisioner "local-exec" { + command = "cd ${path.module}/function && func azure functionapp publish ${azurerm_linux_function_app.openclaw.name}" + } + + depends_on = [ + azurerm_linux_function_app.openclaw, + azurerm_role_assignment.function_aci_management + ] +} From 253d9628ff80b1bf4c334a2108618c38ab564c77 Mon Sep 17 00:00:00 2001 From: PCBZ Date: Mon, 20 Apr 2026 11:20:23 -0700 Subject: [PATCH 3/3] Add configuration management for OpenClaw and upload functionality to Azure File Share --- terraform/azure/function/local.settings.json | 4 +- terraform/azure/main.tf | 141 ++++++++++++++++--- terraform/azure/openclaw.json.tpl | 93 ++++++++++++ 3 files changed, 213 insertions(+), 25 deletions(-) create mode 100644 terraform/azure/openclaw.json.tpl diff --git a/terraform/azure/function/local.settings.json b/terraform/azure/function/local.settings.json index 0280fd7..e7c807b 100644 --- a/terraform/azure/function/local.settings.json +++ b/terraform/azure/function/local.settings.json @@ -20,9 +20,7 @@ "AZURE_CLIENT_SECRET": "your-client-secret" }, "Host": { - "CORS": { - "AllowedOrigins": ["*"] - }, + "CORS": "*", "LocalHttpPort": 7071 } } diff --git a/terraform/azure/main.tf b/terraform/azure/main.tf index 8c46840..9857fa0 100644 --- a/terraform/azure/main.tf +++ b/terraform/azure/main.tf @@ -52,8 +52,64 @@ resource "azurerm_storage_share" "openclaw" { depends_on = [azurerm_storage_account.openclaw] } -# ── Container Group (initially stopped) ─────────────────────── -# This container will be started by Azure Function on webhook trigger +# ── Generate and Upload OpenClaw Configuration ────────────── +# Create openclaw.json with Telegram and Slack configuration + +locals { + openclaw_config = templatefile("${path.module}/openclaw.json.tpl", { + openclaw_gateway_token = var.openclaw_gateway_token + openrouter_api_key = var.openrouter_api_key + telegram_bot_token = var.telegram_bot_token + telegram_owner_id = var.telegram_owner_id + slack_app_token = try(var.slack_app_token, "") + slack_bot_token = try(var.slack_bot_token, "") + brave_api_key = var.brave_api_key + }) +} + +# Write config locally for verification +resource "local_file" "openclaw_config" { + content = local.openclaw_config + filename = "${path.module}/.openclaw.json" +} + +# Auto-upload config to File Share using Azure CLI +resource "null_resource" "upload_openclaw_config" { + provisioner "local-exec" { + command = <<-EOT + set -e + + ACCOUNT_NAME="${azurerm_storage_account.openclaw.name}" + SHARE_NAME="${azurerm_storage_share.openclaw.name}" + RESOURCE_GROUP="${azurerm_resource_group.openclaw.name}" + CONFIG_FILE="${local_file.openclaw_config.filename}" + + # Get storage account key + STORAGE_KEY=$(az storage account keys list \ + --account-name "$ACCOUNT_NAME" \ + --resource-group "$RESOURCE_GROUP" \ + --query "[0].value" -o tsv) + + # Upload config file + az storage file upload \ + --account-name "$ACCOUNT_NAME" \ + --account-key "$STORAGE_KEY" \ + --share-name "$SHARE_NAME" \ + --source "$CONFIG_FILE" \ + --path "openclaw.json" \ + --output none + + echo "✅ openclaw.json uploaded successfully" + EOT + } + + depends_on = [ + azurerm_storage_share.openclaw, + local_file.openclaw_config + ] +} + +# ── Container Group ────────────────────────────────────────── resource "azurerm_container_group" "openclaw" { name = var.container_group_name @@ -70,16 +126,10 @@ resource "azurerm_container_group" "openclaw" { memory = var.memory_gb # Environment variables - # API Keys are loaded from .env file via TF_VAR_ environment variables - # Before running terraform: source .env or use direnv with .envrc environment_variables = { OPENROUTER_API_KEY = try(var.openrouter_api_key, "") - TELEGRAM_BOT_TOKEN = try(var.telegram_bot_token, "") OPENCLAW_GATEWAY_TOKEN = try(var.openclaw_gateway_token, "") BRAVE_API_KEY = try(var.brave_api_key, "") - TELEGRAM_OWNER_ID = try(var.telegram_owner_id, "") - SLACK_APP_TOKEN = try(var.slack_app_token, "") - SLACK_BOT_TOKEN = try(var.slack_bot_token, "") OPENCLAW_ONBOARD_NON_INTERACTIVE = "1" } @@ -100,7 +150,7 @@ resource "azurerm_container_group" "openclaw" { } # Exposed ports for external access - exposed_ports { + exposed_port { port = 18789 protocol = "TCP" } @@ -113,7 +163,10 @@ resource "azurerm_container_group" "openclaw" { app = "openclaw" } - depends_on = [azurerm_storage_share.openclaw] + depends_on = [ + azurerm_storage_share.openclaw, + null_resource.upload_openclaw_config + ] } # ── Network Security Group ─────────────────────────────────── @@ -204,9 +257,6 @@ resource "azurerm_linux_function_app" "openclaw" { http2_enabled = true app_scale_limit = 200 elastic_instance_minimum = 0 - - # For function timeout - function_app_scale_limit = 200 } # Application settings @@ -245,23 +295,70 @@ resource "azurerm_linux_function_app" "openclaw" { } # ── Role Assignment for Function App to manage ACI ────────── - -resource "azurerm_role_assignment" "function_aci_management" { - scope = azurerm_resource_group.openclaw.id - role_definition_name = "Contributor" - principal_id = azurerm_linux_function_app.openclaw.identity[0].principal_id -} +# NOTE: Azure Student accounts may not have permission to create role assignments +# If Terraform apply fails with authorization error, manually assign in Azure Portal: +# 1. Go to Function App → Settings → Identity → copy Object ID +# 2. Go to Resource Group → Access Control (IAM) → Add → Contributor role +# 3. Paste the Object ID and save + +# resource "azurerm_role_assignment" "function_aci_management" { +# scope = azurerm_resource_group.openclaw.id +# role_definition_name = "Contributor" +# principal_id = azurerm_linux_function_app.openclaw.identity[0].principal_id +# } # ── Auto-deploy Function Code ──────────────────────────────── # Automatically publishes Python functions to Azure after infrastructure is created +# Note: Ignores trigger sync errors which are non-critical resource "null_resource" "deploy_function" { provisioner "local-exec" { - command = "cd ${path.module}/function && func azure functionapp publish ${azurerm_linux_function_app.openclaw.name}" + command = <<-EOT + cd ${path.module}/function + OUTPUT=$(func azure functionapp publish ${azurerm_linux_function_app.openclaw.name} 2>&1 || true) + echo "$OUTPUT" + + # Check if deployment succeeded despite trigger sync error + if echo "$OUTPUT" | grep -q "Remote build succeeded"; then + exit 0 + else + exit 1 + fi + EOT + } + + depends_on = [ + azurerm_linux_function_app.openclaw + ] +} + +# Verify config was uploaded +resource "null_resource" "verify_config_upload" { + provisioner "local-exec" { + command = <<-EOT + echo "Verifying openclaw.json was uploaded to File Share..." + STORAGE_KEY=$(az storage account keys list \ + --account-name ${azurerm_storage_account.openclaw.name} \ + --resource-group ${azurerm_resource_group.openclaw.name} \ + --query "[0].value" -o tsv) + + FILE_EXISTS=$(az storage file exists \ + --account-name ${azurerm_storage_account.openclaw.name} \ + --account-key "$STORAGE_KEY" \ + --share-name ${azurerm_storage_share.openclaw.name} \ + --path "openclaw.json" \ + --query "exists" -o tsv) + + if [ "$FILE_EXISTS" = "true" ]; then + echo "✅ openclaw.json verified in File Share" + else + echo "❌ ERROR: openclaw.json not found in File Share" + exit 1 + fi + EOT } depends_on = [ - azurerm_linux_function_app.openclaw, - azurerm_role_assignment.function_aci_management + null_resource.upload_openclaw_config ] } diff --git a/terraform/azure/openclaw.json.tpl b/terraform/azure/openclaw.json.tpl new file mode 100644 index 0000000..5d791c3 --- /dev/null +++ b/terraform/azure/openclaw.json.tpl @@ -0,0 +1,93 @@ +{ + "gateway": { + "bind": "lan", + "auth": { + "mode": "token", + "token": "${openclaw_gateway_token}" + }, + "mode": "local", + "remote": { + "token": "${openclaw_gateway_token}" + } + }, + "agents": { + "defaults": { + "model": { + "primary": "openrouter/openai/gpt-4o-mini", + "fallbacks": ["openrouter/auto"] + }, + "models": { + "openrouter/anthropic/claude-opus-4.6": {"alias": "opus"}, + "openrouter/anthropic/claude-sonnet-4.6": {"alias": "sonnet"}, + "openrouter/anthropic/claude-haiku-4.5": {"alias": "haiku"}, + "openrouter/openai/gpt-5.4": {"alias": "gpt5"}, + "openrouter/openai/gpt-4o": {"alias": "gpt4o"}, + "openrouter/openai/gpt-4o-mini": {"alias": "mini"}, + "openrouter/google/gemini-2.5-pro": {"alias": "gemini-pro"}, + "openrouter/google/gemini-2.5-flash": {"alias": "flash"}, + "openrouter/deepseek/deepseek-v3.2": {"alias": "deepseek"}, + "openrouter/deepseek/deepseek-r1": {"alias": "r1"}, + "openrouter/meta-llama/llama-3.3-70b-instruct:free": {"alias": "llama"}, + "openrouter/auto": {"alias": "auto"} + }, + "compaction": { + "mode": "safeguard", + "reserveTokensFloor": 4000 + } + } + }, + "tools": { + "web": { + "search": { + "enabled": true, + "provider": "brave" + }, + "fetch": { + "enabled": false + } + }, + "deny": ["browser"] + }, + "plugins": { + "load": { + "paths": [ + "/usr/lib/node_modules/openclaw/dist/extensions/telegram"%{if slack_app_token != "" && slack_bot_token != ""}, + "/usr/lib/node_modules/openclaw/dist/extensions/slack"%{endif} + ] + }, + "entries": { + "telegram": { "enabled": true }%{if slack_app_token != "" && slack_bot_token != ""}, + "slack": { "enabled": true }%{endif}, + "openrouter": { "enabled": true }, + "brave": { + "enabled": true, + "config": { + "webSearch": { + "apiKey": "${brave_api_key}" + } + } + } + } + }, + "channels": { + "telegram": { + "enabled": true, + "accounts": { + "default": { + "botToken": "${telegram_bot_token}", + "dmPolicy": "allowlist", + "groupPolicy": "open"%{if telegram_owner_id != ""}, + "allowFrom": ["${telegram_owner_id}"]%{endif} + } + } + }%{if slack_app_token != "" && slack_bot_token != ""}, + "slack": { + "enabled": true, + "mode": "socket", + "appToken": "${slack_app_token}", + "botToken": "${slack_bot_token}", + "dmPolicy": "open", + "groupPolicy": "open" + }%{endif} + } +}