diff --git a/.claude/skills/deploy-cuga/SKILL.md b/.claude/skills/deploy-cuga/SKILL.md new file mode 100644 index 0000000..888f3f6 --- /dev/null +++ b/.claude/skills/deploy-cuga/SKILL.md @@ -0,0 +1,75 @@ +--- +name: deploy-cuga +description: Ship cuga-apps changes to the running deployments — refresh the LOCAL all-in-one container (docker compose on :8080), build+push the CODE ENGINE image and roll the `cuga-agent-apps` service, and rebuild+publish the HUGGING FACE Space. Use when the user wants to deploy, ship, release, or "push live" changes to the gallery / CE / HF, or to update the local container to reflect current code. Builds compile from the current working tree (no commit required). +--- + +# Deploy cuga-apps (local + Code Engine + Hugging Face) + +This skill ships the current working tree to three targets via the bundled +orchestrator `deploy.sh`. Each target is independent; CE always runs before HF +because the HF static build bakes in the CE service URL. + +| Target | What it does | Result | +|---|---|---| +| `local` | `build/docker-compose.yml` → rebuild + run the all-in-one container | http://localhost:8080 | +| `ce` | `build/ce/build_and_push.sh` → push image to ICR, then `build/ce/deploy.sh` → roll `cuga-agent-apps` | the CE gallery URL | +| `hf` | `build/hf/build.sh` → static UI (bakes CE URL), then clone the HF Space, copy `dist/`, commit + push | `https://-.hf.space/` | + +The all-in-one image bundles the UI + ship-ready apps + the 5 internal MCP +servers + the stats collector — so CE/local pick up app, MCP, collector, AND UI +changes in one image. (The standalone `cuga-apps-mcp-*` servers in +`build/mcp_servers/` are a SEPARATE deploy — not part of this skill.) + +## Steps to follow + +1. **Scope.** Default to all three (`local ce hf`). If the user named specific + targets (e.g. "just CE", "CE and HF"), use only those. + +2. **Show the plan first.** Run a dry-run so the user sees exactly what will + happen, then proceed (invoking this skill is the go-ahead to deploy): + ```bash + .claude/skills/deploy-cuga/deploy.sh --dry-run + ``` + +3. **Check prerequisites** for the chosen targets and surface anything missing + *before* the real run (don't half-deploy): + - `local` / `ce` build: `docker` running. + - `ce`: `ibmcloud` logged in, a CE project selected + (`ibmcloud ce project current`), and `ibmcloud cr login` done (registry + push). The script checks the project; registry auth fails loudly if absent. + - `hf`: push auth — either `HF_TOKEN` (a write token) exported, or working + git credentials / SSH for `huggingface.co`. Without either, the clone/push + fails with a clear message. The Space defaults to `anupamamurthi/cuga-agent-apps` + (override with `HF_SPACE`). + +4. **Run it** for the chosen targets: + ```bash + .claude/skills/deploy-cuga/deploy.sh + ``` + The script prints a per-target succeeded/failed summary and exits non-zero + if any target failed. The builds are heavy (the all-in-one image pre-pulls + model weights) — expect several minutes; don't abort early. + +5. **Verify** each target that ran, and report the live URL: + - `local`: `curl -sf -o /dev/null -w "%{http_code}\n" http://localhost:8080/` + (expect 200). `docker compose -f build/docker-compose.yml ps` to confirm up. + - `ce`: `deploy.sh` prints the service URL; hit `/` (expect 200). The + gallery host is the `cuga-agent-apps.…codeengine.appdomain.cloud` URL. + - `hf`: confirm the push landed; the Space rebuilds automatically at + `https://-.hf.space/` (give it a minute). + +6. **Report** the outcome plainly: which targets succeeded, the live URLs, and + any failure with the script's error output. If a target failed, do NOT + claim the deploy is live. + +## Notes & overrides + +- Useful env overrides (pass inline): `IMAGE_TAG`, `NAMESPACE`, `APP_NAME` (CE); + `HF_SPACE`, `HF_USER`, `HF_TOKEN`, `ALLINONE_BASE` (HF). See `deploy.sh -h`. +- These deploys are **outward-facing**. If the user invoked the skill with no + clear scope or seems unsure, confirm scope before the real run; otherwise the + invocation is sufficient authorization. +- Builds use the working tree directly — no `git commit`/`pull` is required + first (matches `build/DEPLOYMENT.md`). +- Secrets live in `build/.env` (gitignored) and the CE `app-env` secret — never + commit them and never echo their values. diff --git a/.claude/skills/deploy-cuga/deploy.sh b/.claude/skills/deploy-cuga/deploy.sh new file mode 100755 index 0000000..e26e872 --- /dev/null +++ b/.claude/skills/deploy-cuga/deploy.sh @@ -0,0 +1,149 @@ +#!/usr/bin/env bash +# ===================================================================== +# deploy.sh — ship cuga-apps to one or more targets: +# +# local refresh the local all-in-one (docker compose) on :8080 +# ce build+push the all-in-one image and roll the Code Engine +# service `cuga-agent-apps` +# hf rebuild the static umbrella UI and publish it to the +# Hugging Face Space (it links into the CE service) +# +# Usage: +# ./deploy.sh # all three (local, ce, hf) +# ./deploy.sh ce # just Code Engine +# ./deploy.sh ce hf # CE then HF +# ./deploy.sh local # just the local container +# ./deploy.sh --dry-run all # print the plan, run nothing +# +# Order note: when both `ce` and `hf` run, CE goes first — the HF build +# bakes in the CE service URL, so CE should be live first. +# +# Env overrides: +# # Code Engine / image +# NAMESPACE ICR namespace (default: routing_namespace) +# IMAGE_TAG image tag (default: latest) +# APP_NAME CE app name (default: cuga-agent-apps) +# # Hugging Face +# HF_SPACE owner/space (default: anupamamurthi/cuga-agent-apps) +# HF_USER git username for push (default: owner part of HF_SPACE) +# HF_TOKEN HF write token; if set it's used for the push, else your +# cached git credentials / SSH are used +# ALLINONE_BASE CE base URL baked into the HF build (default: the CE +# host for APP_NAME; see build/hf/build.sh) +# ===================================================================== +set -uo pipefail + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # .claude/skills/deploy-cuga +REPO_ROOT="$(cd "$HERE/../../.." && pwd)" # repo root + +DRY=0 +TARGETS=() +for a in "$@"; do + case "$a" in + --dry-run) DRY=1 ;; + all) TARGETS+=(local ce hf) ;; + local|ce|hf) TARGETS+=("$a") ;; + -h|--help) sed -n '2,/^# ===/p' "$0" | sed 's/^# \?//'; exit 0 ;; + *) echo "ERROR: unknown target '$a' (want: local | ce | hf | all)" >&2; exit 2 ;; + esac +done +[[ ${#TARGETS[@]} -eq 0 ]] && TARGETS=(local ce hf) + +# De-dupe while keeping a stable order, and force ce-before-hf. +have() { printf '%s\n' "${TARGETS[@]}" | grep -qx "$1"; } +ORDERED=() +for t in local ce hf; do have "$t" && ORDERED+=("$t"); done +TARGETS=("${ORDERED[@]}") + +say() { echo "── $* ──"; } +run() { echo "+ $*"; [[ $DRY == 1 ]] && return 0; "$@"; } +die() { echo "ERROR: $*" >&2; exit 1; } +need() { command -v "$1" >/dev/null 2>&1 || die "$1 not on PATH ($2)"; } + +echo "════════════════════════════════════════════════════════════════" +echo " cuga-apps deploy" +echo " repo : $REPO_ROOT" +echo " targets : ${TARGETS[*]}" +echo " dry-run : $([[ $DRY == 1 ]] && echo yes || echo no)" +echo "════════════════════════════════════════════════════════════════" + +declare -a OK=() FAIL=() +mark() { if [[ $1 == 0 ]]; then OK+=("$2"); else FAIL+=("$2"); fi; } + +# ── local: rebuild + run the all-in-one container on :8080 ──────────── +do_local() { + say "LOCAL — rebuild + run all-in-one (docker compose) on :8080" + need docker "local container build" + ( cd "$REPO_ROOT/build" || exit 1 + [[ -f .env ]] || { [[ -f .env.example ]] && cp .env.example .env && echo " (created build/.env from .env.example — add keys)"; } + run docker compose up --build -d + ) +} + +# ── ce: build+push the image, then roll the CE service ─────────────── +do_ce() { + say "CODE ENGINE — build+push image, then deploy cuga-agent-apps" + need docker "image build" + need ibmcloud "Code Engine deploy" + if [[ $DRY == 0 ]]; then + ibmcloud ce project current >/dev/null 2>&1 \ + || die "no Code Engine project selected — run: ibmcloud ce project select --name " + fi + run "$REPO_ROOT/build/ce/build_and_push.sh" || return 1 + run "$REPO_ROOT/build/ce/deploy.sh" || return 1 +} + +# ── hf: rebuild the static UI, then push it to the HF Space ─────────── +do_hf() { + say "HUGGING FACE — rebuild static umbrella UI, publish to Space" + need git "HF Space push" + local space="${HF_SPACE:-anupamamurthi/cuga-agent-apps}" + local user="${HF_USER:-${space%%/*}}" + + # Build the static SPA (ALLINONE_BASE, if set, is baked in). + if [[ -n "${ALLINONE_BASE:-}" ]]; then + run env ALLINONE_BASE="$ALLINONE_BASE" "$REPO_ROOT/build/hf/build.sh" || return 1 + else + run "$REPO_ROOT/build/hf/build.sh" || return 1 + fi + [[ $DRY == 1 ]] && { echo "+ (dry-run) would publish build/hf/dist → $space"; return 0; } + + local url="https://huggingface.co/spaces/$space" + [[ -n "${HF_TOKEN:-}" ]] && url="https://${user}:${HF_TOKEN}@huggingface.co/spaces/$space" + + local tmp; tmp="$(mktemp -d)" + trap 'rm -rf "$tmp"' RETURN + echo "+ git clone $space" + git clone --depth 1 "$url" "$tmp" \ + || die "could not clone HF Space $space — set HF_TOKEN or configure git/HF auth" + # Replace the Space contents with the fresh build (keep .git). + find "$tmp" -mindepth 1 -maxdepth 1 ! -name .git -exec rm -rf {} + + cp -r "$REPO_ROOT/build/hf/dist/." "$tmp/" + ( cd "$tmp" || exit 1 + git add -A + if git diff --cached --quiet; then + echo " (no changes to publish)" + else + git commit -m "deploy: update umbrella UI ($(date -u +%Y-%m-%dT%H:%MZ))" >/dev/null + git push || die "git push to HF Space failed — check HF_TOKEN / credentials" + echo " ✓ pushed to $space" + fi + ) +} + +for t in "${TARGETS[@]}"; do + echo + case "$t" in + local) do_local; mark $? local ;; + ce) do_ce; mark $? ce ;; + hf) do_hf; mark $? hf ;; + esac +done + +echo +echo "════════════════════════════════════════════════════════════════" +echo " Summary" +echo " succeeded: ${OK[*]:-}" +echo " failed : ${FAIL[*]:-}" +echo "════════════════════════════════════════════════════════════════" +[[ ${#FAIL[@]} -eq 0 ]] || exit 1 diff --git a/.gitignore b/.gitignore index 6c8b5cb..1817db2 100644 --- a/.gitignore +++ b/.gitignore @@ -40,4 +40,10 @@ cuga-apps/apps/arch_diagram/diagrams.db cuga-apps/apps/ibm_whats_new/.store.json cuga-apps/apps/web_researcher/.store.json CUGA_OpenClaw_NemoClaw_DeepAgents_Smolagents_Hermes_Comparison_Revised.docx -.terminal \ No newline at end of file +.terminal +cuga-apps/docs/architecture_app_anatomy_cloud_advisor.png +cuga-apps/apps/__pycache__/_llm.cpython-311.pyc +cuga-apps/apps/__pycache__/__init__.cpython-311.pyc +cuga-apps/apps/__pycache__/_llm.cpython-311.pyc +cuga-apps/apps/__pycache__/_llm.cpython-311.pyc +*.pyc \ No newline at end of file diff --git a/HF_Blog.md b/HF_Blog.md index 6552446..a341899 100644 --- a/HF_Blog.md +++ b/HF_Blog.md @@ -1,10 +1,11 @@ -# Build real agentic apps on CUGA: `CUGA-apps` two dozen working examples on a lightweight agent harness +# Build real agentic apps on CUGA: two dozen working examples on a lightweight harness -> **TL;DR** — Building an agent is mostly plumbing: tools, state, guardrails, scaling from one agent to many. CUGA (pip install cuga), the open-source harness from IBM handles that, so you write just a tool list and a prompt. We built two-dozen single-file apps to prove it. Read one end to end here, then see how the same agent runs governed in production without a rewrite. +> **TL;DR** — Building an agent is mostly plumbing: tools, state, guardrails, scaling from one agent to many. CUGA (pip install cuga), the Agent Harness for the Enterprise from IBM handles that, so you write just a tool list and a prompt. We built two-dozen single-file apps to prove it. Read one end to end here, then see how the same agent runs governed in production without a rewrite. Most agentic apps start with a week of plumbing before the agent does anything useful. You pick a framework, wire up a model client, write tool adapters, build some way to stream state to a UI, and somewhere in there you also decide what the agent is actually for. The interesting part arrives last. -[CUGA](https://github.com/cuga-project/cuga-agent) inverts that. Short for Configurable Generalist Agent, it's the open-source agent harness from IBM Research that handles the planning, the execution loop, the tool calls, and the state plumbing for you, so the part you write shrinks to a list of tools and a system prompt. To show what that feels like in practice, we built [cuga-apps](https://github.com/cuga-project/cuga-apps): 24 small, working apps, each a single FastAPI file wrapping one `CugaAgent`, from a movie recommender to an IBM Cloud architecture advisor. They exist to be read and copied. +[CUGA](https://github.com/cuga-project/cuga-agent) inverts that. Short for Configurable Generalist Agent, it's the open-source agent harness from IBM that handles the planning, the execution loop, the tool calls, and the state plumbing for you. What's left is the part that's actually yours: which tools the agent can reach, and what you tell it to do. To show what that feels like in practice, we built [cuga-apps](https://github.com/cuga-project/cuga-apps): two dozen small, working apps, each a single FastAPI file wrapping one `CugaAgent`, from a movie recommender to an IBM Cloud architecture advisor. They exist to be read and copied. + This article walks through one of them, names what the harness takes off your plate, and shows where the same code goes when you need it governed for production. No new framework to learn first. If you've written a FastAPI route, you can read every line. @@ -82,7 +83,9 @@ The split above only pays off because the generic half is already running somewh The reason there are two dozen matters more than any single one: once you've read the cloud advisor, you've read all of them. They share a skeleton — the movie recommender swaps the IBM catalog tool for the `knowledge` MCP server, the web researcher leans almost entirely on `web` — so cuga-apps is really a catalog of starting points. You clone the repo, find the app closest to your idea, and edit its tool list and prompt (`HOW_TO_BUILD_AN_APP_FAST.md` and `ADDING_AN_APP.md` walk through exactly that). A few apps were even generated by handing a coding assistant one spec file and a one-line brief — regular enough for a model to reproduce means regular enough for you to learn. You can [click through every one in the live gallery](https://cuga-agent-apps.1gxwxi8kos9y.us-east.codeengine.appdomain.cloud) before cloning anything. -And they fan out across families, so whatever you're building, one app already exercises the piece you need: a **research-and-knowledge** cluster (Paper Scout ranks arXiv papers by citation count; Wiki Dive and Web Researcher do cited synthesis; YouTube Research works from transcripts), an **everyday-productivity** cluster (city briefings, travel planning, recipe composing, trail discovery), a **document-and-media** cluster that ingests PDFs, audio, and video and answers over them with RAG, an **ops** corner watching live metrics and market prices, an **enterprise** example over real IBM product docs, **Ouroboros**, a seven-agent lead-gen system — the one to open for the multi-agent shape — and **Meetup Finder**, which drives a real headless Chromium through Playwright to pull structured events off Meetup, Luma, and Eventbrite (all of which killed their public search APIs), the one to open for browser automation — CUGA's original specialty, and the same web-agent muscle behind its #1 WebArena result. Two caveats before you clone: the real catalog lives in the inner `cuga-apps/cuga-apps/apps/` directory, not the outer one; and not every app is equally polished — the UI tags them "ship-ready," "for-later," or "exploratory" and defaults to ship-ready, so start from the cloud advisor or movie recommender for a working baseline. +They also fan out across families, so whatever you're building, one app already exercises the piece you need. There's a research cluster (Paper Scout ranks arXiv papers by citation count; Wiki Dive and Web Researcher do cited synthesis), an everyday-productivity set (city briefings, travel, recipes, trails), a document-and-media group that does RAG over PDFs, audio, and video, an ops corner watching live metrics, and an enterprise example over real IBM product docs. Ouroboros is a seven-agent lead-gen system; open it for the multi-agent shape. And Meetup Finder drives headless Chromium through Playwright to pull structured events off Meetup, Luma, and Eventbrite (all of which killed their public search APIs); open it for browser automation, which is where CUGA started and the muscle behind its strong WebArena results. + +Two caveats before you clone. The real catalog lives in the inner `cuga-apps/cuga-apps/apps/` directory, not the outer one. And not every app is equally polished, so the UI tags them ship-ready, for-later, or exploratory and defaults to ship-ready; start from the cloud advisor or movie recommender for a working baseline. ## Keeping your agent within the boundaries @@ -118,11 +121,13 @@ When one agent would drown in its own context (too many tools, too much evidence The other extension packages know-how rather than tools: Agent Skills, a folder with a `SKILL.md` playbook the agent pulls into context only when a task calls for it, so one prompt isn't carrying everything the agent might ever need to know. Both keep the same building blocks (tools, prompts, state, policies), just composed a level up. -## The moat: governed by construction +Ouroboros, the lead-gen app from earlier makes this pattern concrete. It has a supervisor over seven specialists (scout, site auditor, voice-of-customer, person finder, stack scanner, revenue estimator, and a pitch-email writer that synthesizes). Each specialist is one skill loaded into a `CugaAgent`, and the supervisor calls it through an auto-generated `delegate_to_` tool. Adding an eighth is a one-line factory, not a coordinator rewrite. Read its `main.py` and `ARCHITECTURE.md` if you want the multi-agent shape end to end. + +## Governed by construction -Where does CUGA sit relative to everything else you could build on? Most of the field splits two ways. Minimal developer libraries give you good primitives but leave you to assemble the governance — identity, audit, policy, approvals — yourself. Broad-access personal-agent runtimes demo fast precisely because they start with reach into your filesystem, shell, and browser, so the work becomes *constraining* access that already exists. +Where governance lives in the stack shapes how the production story goes. A minimal agent library hands you good primitives and leaves the governance (policy, approvals, audit, identity) for you to assemble. CUGA takes the other path: policy, human-in-the-loop approval, the `.cuga` state folder, and self-hosting are part of the harness from the first line, not a layer you add later. -CUGA is a third category: an enterprise harness where policy-as-code, human-in-the-loop approval, durable state, self-hosting, and data residency are first-class from the first line. That flips the direction of the hard work. From a personal-agent runtime you *govern upward*, retrofitting controls onto something built for access — a brittle overlay or a long-lived fork, expensive forever. From CUGA you *harden downward*: the control plane is already there, so the remaining work is tightening the sandbox around the few side-effecting tools, not inventing the governance around them. That's the moat — the governed path is the default, and the ungoverned shortcuts are the ones you opt into. It's also why the same agent definition carries from a laptop to a locked-down deployment without a rewrite. +That changes the direction of the work when you take an agent to production. You're not retrofitting controls onto something built for open access; the control plane is already there. The governed path is the default, and the ungoverned shortcuts are the ones you opt into. So the remaining job is narrow: tighten the sandbox around the few tools that actually touch the outside world, rather than invent the governance around them ## Where the same agent ends up diff --git a/build/.env.example b/build/.env.example index 2ee1cc0..29be668 100644 --- a/build/.env.example +++ b/build/.env.example @@ -2,15 +2,25 @@ # Only the LLM provider block is required; everything else is optional. # ── LLM provider (pick ONE block) ─────────────────────────────────────────── -# Option A — watsonx +# Option A — watsonx + gpt-oss-120b (default for this image). +# AGENT_SETTING_CONFIG points CUGA's internal nodes at the bundled gpt-oss +# config; LLM_MODEL sets the outer model to match. To use a different watsonx +# model, change BOTH LLM_MODEL and the model_name values in +# apps/settings.watsonx.toml. LLM_PROVIDER=watsonx -LLM_MODEL=meta-llama/llama-3-3-70b-instruct +LLM_MODEL=openai/gpt-oss-120b +AGENT_SETTING_CONFIG=/app/apps/settings.watsonx.toml WATSONX_APIKEY= WATSONX_PROJECT_ID= # or WATSONX_SPACE_ID -# Option B — RITS (IBM Research) — comment out Option A and use these +# Option B — RITS (IBM Research) — comment out Option A and use these. +# NOTE: rits's internal config (settings.rits.toml) routes through a litellm +# proxy at localhost:4000 that this image does NOT run, and the RITS endpoint +# is IBM-Research-internal — so rits will not work in a plain Code Engine +# deployment without extra setup. # LLM_PROVIDER=rits # LLM_MODEL=gpt-oss-120b +# AGENT_SETTING_CONFIG=settings.rits.toml # RITS_API_KEY= # Option C — OpenAI diff --git a/build/CHEATSHEET.md b/build/CHEATSHEET.md new file mode 100644 index 0000000..577c07f --- /dev/null +++ b/build/CHEATSHEET.md @@ -0,0 +1,81 @@ +# Deployment Cheat Sheet + +Quick reference for **what gets deployed, where it lives, and what builds it**. +For the full walkthrough (prereqs, COS setup, troubleshooting) see +[DEPLOYMENT.md](DEPLOYMENT.md). + +## Shared coordinates (all CE deployments) + +| | | +|---|---| +| CE project | `ce-project-routing` (hash `1gxwxi8kos9y`) · region **us-east** · resource group `routing` | +| Registry | `icr.io` (global ICR) · namespace `routing_namespace` · pull secret `icr-secret-1` | +| Secrets | `build/.env` → CE secret `cuga-agent-apps-env` | + +## The deployables + +### 1. All-in-one gallery — apps + UI + MCP + stats (the real backend) +- **Lives at:** CE app **`cuga-agent-apps`** → `https://cuga-agent-apps.1gxwxi8kos9y.us-east.codeengine.appdomain.cloud` (nginx :8080) +- **Image:** `icr.io/routing_namespace/cuga-agent-apps:latest` +- **Contains:** 21 ship-ready apps + `usage-collector` (stats) + the umbrella UI + **5 bundled MCP servers** (web, knowledge, geo, finance, local) on loopback. `CUGA_TARGET=local` is baked in — it never calls the standalone MCP servers. +- **Builds from:** `build/Dockerfile` via `build/ce/build_and_push.sh` → `build/ce/deploy.sh` +- **Local:** `build/docker-compose.yml`, or `docker run --network host` on **:8080** + +### 2. Umbrella UI — Hugging Face Space (static launcher, no backend) +- **Lives at:** HF Space **`anupamamurthi/cuga-agent-apps`** → `https://anupamamurthi-cuga-agent-apps.hf.space/` +- **What it is:** static React bundle that just *links into* the CE all-in-one (`/a//`); the CE URL is baked in at build time. +- **Source:** `cuga-apps/ui/` · **Builds from:** `build/hf/build.sh` → `build/hf/dist/` → git push to the Space + +### 3. Standalone MCP servers — 7 individual CE apps (separate deployment) +- **Lives at:** `cuga-apps-mcp-{web,knowledge,geo,finance,code,local,text}` → `https://cuga-apps-mcp-.1gxwxi8kos9y.us-east.codeengine.appdomain.cloud/mcp` +- **Image:** `icr.io/routing_namespace/mcp:latest` (one shared image, run per-service) +- **Used by:** apps run *from source* with `CUGA_TARGET=ce`, and the "MCP Servers" showcase. **Not** used by the all-in-one gallery. +- **Builds from:** `build/mcp_servers/build_mcp_image.sh mcp` → `build/mcp_servers/deploy_mcp.sh ` · Dockerfile `cuga-apps/Dockerfile.mcp` (context = `cuga-apps/`) + +### 4. MCP Tool Explorer — its own CE app +- **Lives at:** CE app **`cuga-apps-mcp-tool-explorer`** → `https://cuga-apps-mcp-tool-explorer.1gxwxi8kos9y.us-east.codeengine.appdomain.cloud` +- **Image:** `icr.io/routing_namespace/mcp-tool-explorer:latest` +- **Builds from:** `build/mcp_servers/build_mcp_image.sh tool-explorer` → `build/mcp_servers/deploy_mcp.sh tool-explorer` (same scripts as #3) +- **Pointed to by** the per-server "Tool Explorer ↗" links and the injected per-app banners. + +### 5. Stats storage — IBM Cloud Object Storage (durable, not a service) +- **Lives at:** COS bucket **`s3://cuga-usage`** (region **us-south**), instance `cuga-cos` +- **Used by:** the `usage-collector` app *inside* the all-in-one (dashboard at `/a/usage-collector/`). CE filesystem is ephemeral, so counters/utterances persist here. +- **Wired via:** `build/.env` → CE secret. Setup in [DEPLOYMENT.md §2b](DEPLOYMENT.md). + +## How they relate + +``` +HF Space (static UI) ──links──▶ CE: cuga-agent-apps (all-in-one) + ├─ 21 apps + usage-collector + UI + ├─ 5 MCP servers (loopback, CUGA_TARGET=local) + └─ usage-collector ──S3──▶ COS: cuga-usage + +CE: cuga-apps-mcp-{web,knowledge,geo,finance,code,local,text} ◀── from-source apps (CUGA_TARGET=ce) +CE: cuga-apps-mcp-tool-explorer ◀── "Tool Explorer" links / app banners + (both built from build/mcp_servers/, independent of the all-in-one) +``` + +## Two build families — what to run for what + +| You changed… | Rebuild | Scripts | +|---|---|---| +| Any app / umbrella UI / bundled MCP (e.g. geo) → **gallery** | All-in-one (+ HF if UI changed) | `build/ce/build_and_push.sh && build/ce/deploy.sh`; then `build/hf/build.sh` | +| A standalone MCP server or the **Tool Explorer** | Shared MCP image | `build/mcp_servers/build_mcp_image.sh …` → `build/mcp_servers/deploy_mcp.sh …` | + +The two families are independent: the all-in-one bundles its own MCP copy, so the +standalone `cuga-apps-mcp-*` set only matters for from-source `CUGA_TARGET=ce` +runs, the Tool Explorer, and the MCP-servers showcase. + +## Local dev ports + +| Thing | Port | +|---|---| +| All-in-one (nginx) | 8080 | +| MCP servers (web…text) | 29100–29106 | +| usage-collector | 28827 | +| Individual apps | 288xx (see each app's `main.py`) | + +> Names, images, project hash (`1gxwxi8kos9y`), and HF space here are taken from +> the build scripts + DEPLOYMENT.md. To reconcile against what's *actually* live: +> `ibmcloud ce app list`. diff --git a/build/DEPLOYMENT.md b/build/DEPLOYMENT.md index b8f59ac..a8f9e3e 100644 --- a/build/DEPLOYMENT.md +++ b/build/DEPLOYMENT.md @@ -6,6 +6,9 @@ lightweight launcher UI. Everything here is consolidated from [build/hf/README.md](hf/README.md) — read those for deeper detail on any single target. +> **Just want the map of what lives where + what builds it?** See +> [CHEATSHEET.md](CHEATSHEET.md). + --- ## What gets deployed diff --git a/cuga-apps/README.md b/cuga-apps/README.md index 0911a13..ffb719d 100644 --- a/cuga-apps/README.md +++ b/cuga-apps/README.md @@ -30,10 +30,21 @@ you browse and invoke every MCP tool. ## Quick start ```bash -cp apps/.env.example apps/.env # fill in keys (see docs/GETTING_STARTED.md) +cp build/.env.example build/.env # single source of truth — fill in keys docker compose up -d --build # ~5-10 min on first build ``` +> **Config lives in `build/.env`.** That one file is the source of truth for +> the LLM provider/model and all secrets, and it is shared by **every** way of +> running the apps: the Code Engine deployment, the local Docker image, and +> `python launch.py` (which now defaults to reading `build/.env`). Change the +> model once there and it applies everywhere. The default is **watsonx + +> `openai/gpt-oss-120b`** — `LLM_MODEL` sets the outer model and +> `AGENT_SETTING_CONFIG=/app/apps/settings.watsonx.toml` points CUGA's internal +> nodes at the bundled gpt-oss config (`launch.py` remaps that in-image path to +> the local `apps/` dir automatically). A local `apps/.env`, if you create one, +> overrides `build/.env`. + Then open: - **Umbrella UI** — http://localhost:3001 - **MCP Tool Explorer** — http://localhost:28900 @@ -272,7 +283,12 @@ Subsequent builds reuse the cached pip-install layer unless `requirements.apps.txt`, `requirements.apps.heavy.txt`, or `requirements.mcp.txt` change. -Environment / secrets are read at runtime from `apps/.env` — see the Quick -start at the top. They're mounted read-only as `/run/secrets/app.env` and -sourced by `entrypoint.sh`, so they never appear in `docker inspect` or in -the image itself. +Environment / secrets are read at runtime from **`build/.env`** — the single +source of truth shared by the Code Engine deployment, the local Docker image, +and `python launch.py`. In the container they're mounted read-only as +`/run/secrets/app.env` (or the CE `app-env` secret) and sourced by +`entrypoint.sh`, so they never appear in `docker inspect` or in the image +itself. `launch.py` reads the same `build/.env` directly (remapping the +in-image `/app/apps/...` paths to this checkout). To change the model or keys +for *all* run modes at once, edit `build/.env`; for Code Engine, also refresh +the secret: `ibmcloud ce secret update --name app-env --from-env-file build/.env`. diff --git a/cuga-apps/apps/__pycache__/__init__.cpython-311.pyc b/cuga-apps/apps/__pycache__/__init__.cpython-311.pyc index af3cdee..615ca2a 100644 Binary files a/cuga-apps/apps/__pycache__/__init__.cpython-311.pyc and b/cuga-apps/apps/__pycache__/__init__.cpython-311.pyc differ diff --git a/cuga-apps/apps/__pycache__/_llm.cpython-311.pyc b/cuga-apps/apps/__pycache__/_llm.cpython-311.pyc index 196929c..651794d 100644 Binary files a/cuga-apps/apps/__pycache__/_llm.cpython-311.pyc and b/cuga-apps/apps/__pycache__/_llm.cpython-311.pyc differ diff --git a/cuga-apps/apps/_chrome.py b/cuga-apps/apps/_chrome.py new file mode 100644 index 0000000..5b97b64 --- /dev/null +++ b/cuga-apps/apps/_chrome.py @@ -0,0 +1,160 @@ +"""_chrome.py — shared UI chrome injected into every cuga-app's HTML page. + +Every app calls ``install_usage(app)`` (see _usage.py), which calls +``install_chrome(app)`` here. A single response middleware rewrites +``text/html`` GET responses to add, consistently across all ~33 apps: + + • a privacy "heads up" banner pinned to the top (public demo; requests logged) + • the LLM model the app runs on + an MCP Tool Explorer link, placed INSIDE the + app's own tools row (the ``.app-intro__tools`` chip strip that 21 apps share) + so they sit with the other tool pills — not in the top banner. Apps without + that row get a small fallback badge instead. + +Doing it in ONE place beats hand-editing 33 heterogeneous app UIs and keeps +them identical. Only ``text/html`` is rewritten — JSON and ``text/event-stream`` +(SSE) pass through untouched, so streaming endpoints are never buffered. +""" +from __future__ import annotations + +import html as _html +import os +import re + +_MARKER = "cuga-chrome" # banner id — also the idempotency guard +# The shared tools strip in app-intro headers; its children are pills, so +# the first after the opening tag is the row's closer (safe to target). +_TOOLS_ROW_RE = re.compile(r'(
.*?)(
)', + re.DOTALL | re.IGNORECASE) +_BODY_RE = re.compile(r"]*>", re.IGNORECASE) + +# Public MCP Tool Explorer (standalone Code Engine app). Override per deployment +# with MCP_TOOL_EXPLORER_URL (e.g. a localhost port for local dev). +_DEFAULT_EXPLORER = ("https://cuga-apps-mcp-tool-explorer." + "1gxwxi8kos9y.us-east.codeengine.appdomain.cloud") + +_PRIVACY = ("Heads up: these are public demo apps. Your requests are logged for " + "usage analytics — please don't enter confidential information, " + "credentials, or personal data.") + + +def _model_label() -> str: + model = (os.getenv("LLM_MODEL") or "").strip() + provider = (os.getenv("LLM_PROVIDER") or "").strip() + short = model.split("/")[-1] if model else "" # drop an "openai/" routing prefix + if short and provider: + return f"{short} · {provider}" + return short or provider or "—" + + +def _explorer_url() -> str: + return (os.getenv("MCP_TOOL_EXPLORER_URL") or _DEFAULT_EXPLORER).rstrip("/") + + +# ── Top banner — privacy notice only (compliance; stays pinned at the top) ── +def _banner_fragment() -> str: + privacy = _html.escape(_PRIVACY) + return ( + "" + f'
{privacy}
' + ) + + +# ── Model + MCP Tools — as native pills inside the app's tools row ────────── +def _tool_pills() -> str: + model = _html.escape(_model_label()) + explorer = _html.escape(_explorer_url()) + return ( + f'🧠 {model}' + f'🛠 MCP Tools ↗' + ) + + +def _badge_fragment() -> str: + model = _html.escape(_model_label()) + explorer = _html.escape(_explorer_url()) + return ( + f'
' + f'🧠 {model}' + f'🛠 MCP Tools ↗' + "
" + ) + + +def _inject(markup: str) -> str: + if _MARKER in markup: + return markup # idempotent — already injected + + # 1) privacy banner right after + frag = _banner_fragment() + m = _BODY_RE.search(markup) + markup = (markup[:m.end()] + frag + markup[m.end():]) if m else (frag + markup) + + # 2) model + MCP Tools into the app's tools row, else a fallback badge + pills = _tool_pills() + markup, n = _TOOLS_ROW_RE.subn(lambda mm: mm.group(1) + pills + mm.group(2), + markup, count=1) + if n == 0: + badge = _badge_fragment() + bm = re.search(r"", markup, re.IGNORECASE) + markup = (markup[:bm.start()] + badge + markup[bm.start():]) if bm else markup + badge + return markup + + +def install_chrome(app) -> None: + """Add the HTML-injection middleware to a FastAPI/Starlette app. + + Idempotent per app, and best-effort — it must never raise into the app. + """ + if getattr(getattr(app, "state", None), "_cuga_chrome_installed", False): + return + try: + from starlette.responses import Response + except Exception: + return + + @app.middleware("http") + async def _chrome_mw(request, call_next): + response = await call_next(request) + # Only rewrite full HTML GET pages. SSE (text/event-stream) and JSON + # pass straight through, so streaming endpoints are never buffered. + if request.method != "GET": + return response + ctype = response.headers.get("content-type", "") + if "text/html" not in ctype.lower(): + return response + body = b"" + async for chunk in response.body_iterator: + body += chunk if isinstance(chunk, (bytes, bytearray)) else str(chunk).encode() + headers = dict(response.headers) + headers.pop("content-length", None) + headers.pop("content-type", None) + try: + out = _inject(body.decode("utf-8", "replace")).encode("utf-8") + except Exception: + out = body + return Response(content=out, status_code=response.status_code, + headers=headers, media_type=ctype or "text/html; charset=utf-8") + + try: + app.state._cuga_chrome_installed = True + except Exception: + pass diff --git a/cuga-apps/apps/_llm.py b/cuga-apps/apps/_llm.py index ebfa046..ca9dde9 100644 --- a/cuga-apps/apps/_llm.py +++ b/cuga-apps/apps/_llm.py @@ -211,21 +211,31 @@ def detect_provider() -> str: from langchain_core.callbacks import BaseCallbackHandler class _LLMCallCounter(BaseCallbackHandler): + """Counts each LLM call by OUTCOME: a success on completion, a failure + (with a code) on error — so the dashboard's error/limit counts for the + model provider (watsonx, …) are accurate, not assumed-OK on start.""" + def __init__(self, provider: str) -> None: self.provider = provider - def _count(self) -> None: + def _record(self, ok: bool, code: str | None = None) -> None: try: from _usage import track_call - track_call(self.provider) + track_call(self.provider, ok=ok, code=code) except Exception: # noqa: BLE001 — tracking must never break a call pass - def on_chat_model_start(self, *args: Any, **kwargs: Any) -> None: - self._count() + # Fires for both chat models and completion LLMs on success. + def on_llm_end(self, *args: Any, **kwargs: Any) -> None: + self._record(True) - def on_llm_start(self, *args: Any, **kwargs: Any) -> None: - self._count() + def on_llm_error(self, error: BaseException, *args: Any, **kwargs: Any) -> None: + try: + from _usage import classify_error + code = classify_error(error) + except Exception: # noqa: BLE001 + code = "error" + self._record(False, code) except Exception: # noqa: BLE001 — if callbacks import fails, skip LLM counting _LLMCallCounter = None # type: ignore diff --git a/cuga-apps/apps/_ratelimit.py b/cuga-apps/apps/_ratelimit.py index 270d342..8039508 100644 --- a/cuga-apps/apps/_ratelimit.py +++ b/cuga-apps/apps/_ratelimit.py @@ -27,11 +27,11 @@ and restarting — no code change, no rebuild: RL_ENABLED "1" master switch ("0" disables all limiting) - RL_PER_MIN "30" sustained POSTs per minute per IP - RL_BURST "12" token-bucket capacity (short burst) per IP - RL_PER_DAY "300" hard POSTs per day per IP (0 = off) - RL_GLOBAL_PER_MIN "150" POSTs per minute across all IPs (0 = off) - RL_CONCURRENCY "6" max concurrent POSTs (0 = off) + RL_PER_MIN "60" sustained POSTs per minute per IP + RL_BURST "24" token-bucket capacity (short burst) per IP + RL_PER_DAY "600" hard POSTs per day per IP (0 = off) + RL_GLOBAL_PER_MIN "300" POSTs per minute across all IPs (0 = off) + RL_CONCURRENCY "12" max concurrent POSTs (0 = off) RL_MAX_BODY_BYTES "32768" reject POST bodies larger than this (0 = off) RL_TRUST_FORWARDED "1" derive client IP from X-Forwarded-For (CE/proxy) RL_MAX_TRACKED_IPS "20000" LRU cap on the per-IP table (bounds memory) @@ -87,11 +87,13 @@ def take(self, now: float) -> tuple[bool, float]: class _Config: def __init__(self, **kw) -> None: self.enabled = os.getenv("RL_ENABLED", "1") != "0" - self.per_min = _env_int("RL_PER_MIN", 30) - self.burst = _env_int("RL_BURST", 12) - self.per_day = _env_int("RL_PER_DAY", 300) - self.global_per_min = _env_int("RL_GLOBAL_PER_MIN", 150) - self.concurrency = _env_int("RL_CONCURRENCY", 6) + # Defaults doubled for launch (lenient early-days posture). Dial back by + # setting the RL_* env vars in the CE app-env secret — no rebuild needed. + self.per_min = _env_int("RL_PER_MIN", 60) + self.burst = _env_int("RL_BURST", 24) + self.per_day = _env_int("RL_PER_DAY", 600) + self.global_per_min = _env_int("RL_GLOBAL_PER_MIN", 300) + self.concurrency = _env_int("RL_CONCURRENCY", 12) self.max_body = _env_int("RL_MAX_BODY_BYTES", 32768) self.trust_forwarded = os.getenv("RL_TRUST_FORWARDED", "1") != "0" self.max_tracked = _env_int("RL_MAX_TRACKED_IPS", 20000) diff --git a/cuga-apps/apps/_usage.py b/cuga-apps/apps/_usage.py index d5783f8..2fca1fe 100644 --- a/cuga-apps/apps/_usage.py +++ b/cuga-apps/apps/_usage.py @@ -176,9 +176,32 @@ def _scrub(text: str) -> str: return _SECRET_RE.sub("«redacted»", text) -def track_call(provider: str, *, app: str | None = None, ok: bool = True, n: int = 1) -> None: +def classify_error(exc: object) -> str: + """Best-effort short failure code for a provider call: the HTTP status + ("429", "404", "503", …) when the exception carries a response, else a + coarse label ("timeout", "connection", or the exception class name).""" + try: + status = getattr(getattr(exc, "response", None), "status_code", None) + if status: + return str(status) + name = type(exc).__name__.lower() + if "timeout" in name: + return "timeout" + if "connect" in name: + return "connection" + return name[:24] or "error" + except Exception: # noqa: BLE001 + return "error" + + +def track_call(provider: str, *, app: str | None = None, ok: bool = True, + n: int = 1, code: str | None = None) -> None: """Count an external/provider API call (tavily, alpha_vantage, watsonx, …). + On failure pass `code` — an HTTP status ("429", "404") or short label — so + the dashboard can show *why* calls failed (e.g. how often a rate limit was + hit). Use classify_error(exc) to derive it from an exception. + Fire-and-forget and safe from any context (async app handlers, sync MCP tools, LangChain callbacks). Never raises. """ @@ -186,6 +209,8 @@ def track_call(provider: str, *, app: str | None = None, ok: bool = True, n: int event = {"kind": "call", "provider": str(provider)[:40], "app": app or _detect_app_name(), "ok": bool(ok), "n": int(n), "ts": time.time()} + if not ok and code: + event["code"] = str(code)[:24] utt = _CUR_UTT.get() if utt: # set only for in-process LLM calls event["utt"] = utt @@ -245,3 +270,12 @@ async def _usage_mw(request, call_next): return response app.state._usage_installed = True + + # Shared UI chrome (privacy banner + model + MCP Tool Explorer link) on every + # app's HTML page — injected here so we touch one file, not ~33 app UIs. + # Best-effort; a chrome failure must never break usage tracking. + try: + from _chrome import install_chrome + install_chrome(app) + except Exception as exc: # noqa: BLE001 + log.debug("chrome install skipped: %s", exc) diff --git a/cuga-apps/apps/ai_labs_news/main.py b/cuga-apps/apps/ai_labs_news/main.py index 4855e18..7c25baa 100644 --- a/cuga-apps/apps/ai_labs_news/main.py +++ b/cuga-apps/apps/ai_labs_news/main.py @@ -43,6 +43,17 @@ if _p not in sys.path: sys.path.insert(0, _p) +# Robustness: AGENT_SETTING_CONFIG may arrive as an in-IMAGE absolute path +# (e.g. /app/apps/settings.watsonx.toml from build/.env) while running from a +# local checkout where it doesn't exist. CUGA aborts on a missing config file, +# so remap a non-existent absolute config to a local file of the same name. +_asc = os.environ.get("AGENT_SETTING_CONFIG", "") +if os.path.isabs(_asc) and not os.path.isfile(_asc): + for _cand in (_DIR / os.path.basename(_asc), _DEMOS_DIR / os.path.basename(_asc)): + if _cand.is_file(): + os.environ["AGENT_SETTING_CONFIG"] = str(_cand) + break + logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)-7s %(message)s", @@ -62,6 +73,19 @@ # slug → (display name, [candidate feed URLs tried in order]). Feeds move and # break; listing fallbacks per lab makes the tool resilient. The first feed # that parses to >0 entries wins. + +def _gnews(query: str) -> str: + """A Google News RSS search feed for `query`. Used as a reliable fallback + for labs that don't publish a working native blog feed (Anthropic, Meta AI, + IBM Research as of 2026 all 404 on their old RSS paths). Returns recent + news items about the lab — not their own blog posts — but it keeps the lab + reachable instead of silently dropping out of the digest.""" + from urllib.parse import quote_plus + return ("https://news.google.com/rss/search?q=" + + quote_plus(query) + + "&hl=en-US&gl=US&ceid=US:en") + + _LABS: dict[str, dict] = { "openai": { "name": "OpenAI", @@ -69,7 +93,9 @@ }, "anthropic": { "name": "Anthropic", - "feeds": ["https://www.anthropic.com/rss.xml", "https://www.anthropic.com/news/rss.xml"], + # Anthropic publishes no working RSS feed (all known paths 404), so we + # fall back to a Google News search scoped to Anthropic/Claude. + "feeds": [_gnews("Anthropic Claude AI")], }, "google-deepmind": { "name": "Google DeepMind", @@ -87,11 +113,15 @@ }, "ibm-research": { "name": "IBM Research", - "feeds": ["https://research.ibm.com/blog/rss.xml", "https://research.ibm.com/feed"], + # Native blog RSS paths 404 as of 2026 — fall back to Google News. + "feeds": ["https://research.ibm.com/blog/rss.xml", + _gnews('"IBM Research" AI')], }, "meta-ai": { "name": "Meta AI", - "feeds": ["https://ai.meta.com/blog/rss/", "https://ai.facebook.com/blog/rss/"], + # Native blog RSS paths 404 as of 2026 — fall back to Google News. + "feeds": ["https://ai.meta.com/blog/rss/", + _gnews('"Meta AI" OR "Meta FAIR" research')], }, "huggingface": { "name": "Hugging Face", @@ -426,6 +456,12 @@ def make_agent(): tools=_make_tools(), special_instructions=_SYSTEM, cuga_folder=str(_DIR / ".cuga"), + # Each question is independent. Disable the persistent knowledge store + # and on-disk policy auto-load so nothing learned/saved in one question + # leaks into the next via the shared .cuga folder. The output formatter + # we need is attached explicitly in _attach_policies(). + enable_knowledge=False, + auto_load_policies=False, ) @@ -477,7 +513,11 @@ async def api_ask(req: AskReq): try: agent = await _get_agent() result = await agent.invoke(augmented, thread_id=uuid.uuid4().hex) - return {"answer": str(result), "thread_id": thread_id} + # Use the agent's synthesised answer, NOT str(result): the result + # object's repr dumps the CUGA plan + generated code, which is what + # was leaking into the chat as an unformatted blob. + answer = result.answer if hasattr(result, "answer") else str(result) + return {"answer": answer, "thread_id": thread_id} except Exception as exc: log.exception("Agent invocation failed") return JSONResponse( diff --git a/cuga-apps/apps/ai_labs_news/ui.py b/cuga-apps/apps/ai_labs_news/ui.py index 6633b08..89eec08 100644 --- a/cuga-apps/apps/ai_labs_news/ui.py +++ b/cuga-apps/apps/ai_labs_news/ui.py @@ -398,12 +398,22 @@ } setInterval(fetchSession, 10000); + function resetPanel() { + // Each question is answered fresh — clear the previous digest so stale + // results from the prior question don't linger on the right. + _lastHash = ''; + dataScroll.innerHTML = ''; + dataScroll.appendChild(emptyState); + emptyState.style.display = ''; + } + async function sendMessage() { const question = inputEl.value.trim(); if (!question) return; inputEl.value = ''; sendBtn.disabled = true; setStatus(true, 'Thinking…'); + resetPanel(); addMessage(question, 'user'); const thinking = addMessage('Pulling the latest feeds…', 'thinking'); diff --git a/cuga-apps/apps/city_beat/main.py b/cuga-apps/apps/city_beat/main.py index 7a03a83..59f1401 100644 --- a/cuga-apps/apps/city_beat/main.py +++ b/cuga-apps/apps/city_beat/main.py @@ -336,6 +336,13 @@ def make_agent(): tools=_make_tools(), special_instructions=_SYSTEM, cuga_folder=str(_DIR / ".cuga"), + # Each question is independent — disable the persistent knowledge store + # and on-disk policy auto-load. CUGA's policy DB is a shared global + # sqlite store, so without this an output-formatter persisted by another + # app (e.g. meetup_finder's save_events event board) leaks in and the + # model emits that board instead of this app's answer. + enable_knowledge=False, + auto_load_policies=False, ) diff --git a/cuga-apps/apps/hiking_research/main.py b/cuga-apps/apps/hiking_research/main.py index e50cb46..363fb24 100644 --- a/cuga-apps/apps/hiking_research/main.py +++ b/cuga-apps/apps/hiking_research/main.py @@ -41,6 +41,17 @@ if _p not in sys.path: sys.path.insert(0, _p) +# Robustness: AGENT_SETTING_CONFIG may arrive as an in-IMAGE absolute path +# (e.g. /app/apps/settings.watsonx.toml from build/.env) while running from a +# local checkout where it doesn't exist. CUGA aborts on a missing config file, +# so remap a non-existent absolute config to a local file of the same name. +_asc = os.environ.get("AGENT_SETTING_CONFIG", "") +if os.path.isabs(_asc) and not os.path.isfile(_asc): + for _cand in (_DIR / os.path.basename(_asc), _DEMOS_DIR / os.path.basename(_asc)): + if _cand.is_file(): + os.environ["AGENT_SETTING_CONFIG"] = str(_cand) + break + logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)-7s %(message)s", @@ -129,10 +140,49 @@ def _overpass_post(query: str) -> dict: # Tools # --------------------------------------------------------------------------- +def _capture_hikes(result) -> None: + """Store the find_hikes tool output into the module-level right-panel + store so GET /hikes can serve it. Tolerant of dict or JSON-string shapes.""" + global _last_hikes + try: + data = result + if isinstance(data, str): + data = json.loads(data) + if isinstance(data, dict) and isinstance(data.get("hikes"), list): + _last_hikes = data["hikes"] + except Exception: # noqa: BLE001 + pass + + +def _wrap_find_hikes(tool) -> None: + """Wrap the geo MCP `find_hikes` tool so every call also populates the + right-panel store. Without this hook the agent's hike results were never + surfaced to the UI and the "Trails Found" panel stayed permanently empty.""" + import functools + orig = getattr(tool, "coroutine", None) + if orig is None: + return + + @functools.wraps(orig) + async def _wrapped(*args, **kwargs): + result = await orig(*args, **kwargs) + _capture_hikes(result) + return result + + try: + tool.coroutine = _wrapped + except Exception: # noqa: BLE001 — StructuredTool may forbid setattr + pass + + def _make_tools(): # Delegated to MCP server(s): geo, web. from _mcp_bridge import load_tools - return load_tools(["geo", "web"]) + tools = load_tools(["geo", "web"]) + for t in tools: + if getattr(t, "name", "") == "find_hikes": + _wrap_find_hikes(t) + return tools # --------------------------------------------------------------------------- @@ -202,6 +252,11 @@ def make_agent(): tools=_make_tools(), special_instructions=_SYSTEM, cuga_folder=str(_DIR / ".cuga"), + # Each question is independent — disable the persistent knowledge store + # and on-disk policy auto-load so nothing carries across questions via + # the shared .cuga folder. + enable_knowledge=False, + auto_load_policies=False, ) diff --git a/cuga-apps/apps/ibm_cloud_advisor/main.py b/cuga-apps/apps/ibm_cloud_advisor/main.py index 87f2608..cba55cc 100644 --- a/cuga-apps/apps/ibm_cloud_advisor/main.py +++ b/cuga-apps/apps/ibm_cloud_advisor/main.py @@ -43,6 +43,17 @@ if _p not in sys.path: sys.path.insert(0, _p) +# Robustness: AGENT_SETTING_CONFIG may arrive as an in-IMAGE absolute path +# (e.g. /app/apps/settings.watsonx.toml from build/.env) while running from a +# local checkout where it doesn't exist. CUGA aborts on a missing config file, +# so remap a non-existent absolute config to a local file of the same name. +_asc = os.environ.get("AGENT_SETTING_CONFIG", "") +if os.path.isabs(_asc) and not os.path.isfile(_asc): + for _cand in (_DIR / os.path.basename(_asc), _DEMOS_DIR / os.path.basename(_asc)): + if _cand.is_file(): + os.environ["AGENT_SETTING_CONFIG"] = str(_cand) + break + logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)-7s %(message)s", @@ -149,7 +160,8 @@ def search_ibm_catalog(query: str) -> str: **Architecture: [descriptive name]** **IBM Cloud Services:** -- **[Display Name]** (`[service-name]`): Role in the architecture +- **[Display Name]** (`[service-name]`): Role in the architecture. + Docs: https://cloud.ibm.com/docs/[service-name] - … **How they connect:** @@ -164,6 +176,21 @@ def search_ibm_catalog(query: str) -> str: **Cost indication:** [note Lite plan availability; link https://cloud.ibm.com/estimator] +**References & further reading:** +Always close with this section. Include, as a markdown bullet list: +- One IBM Cloud docs link per recommended service: + `https://cloud.ibm.com/docs/[service-name]` (use the exact catalog `name`). +- 2–4 relevant general references from this list, picked for the use case: + - Docs home — https://cloud.ibm.com/docs + - Architecture patterns & reference architectures — https://www.ibm.com/architectures + - Pricing & free/Lite tier — https://cloud.ibm.com/pricing + - Cost estimator — https://cloud.ibm.com/estimator + - Security & compliance — https://cloud.ibm.com/docs/overview?topic=overview-security + - High availability & DR — https://cloud.ibm.com/docs/overview?topic=overview-zero-downtime + - Terraform (IBM Cloud provider) — https://registry.terraform.io/providers/IBM-Cloud/ibm/latest/docs +- If `web_search` surfaced an especially relevant tutorial, solution guide, or + blog post, cite it here too with its title and URL. + ## Rules - Only recommend services confirmed by `search_ibm_catalog` results - Never invent IBM service names — use exact `name` values from the catalog @@ -171,6 +198,9 @@ def search_ibm_catalog(query: str) -> str: - If the user mentions AWS/Azure equivalents, map them explicitly - If `search_ibm_catalog` returns no results for a capability, say so and suggest an alternative approach +- ALWAYS include a per-service docs link and the References section — a + recommendation without documentation pointers is incomplete. +- Write all links as real markdown so they render as clickable references. """ diff --git a/cuga-apps/apps/launch.py b/cuga-apps/apps/launch.py index 4d6e34d..65e169f 100644 --- a/cuga-apps/apps/launch.py +++ b/cuga-apps/apps/launch.py @@ -39,6 +39,13 @@ PYTHON = sys.executable +# Single source of truth for runtime config is build/.env (the same file the +# Code Engine / Docker image uses), so local `launch.py` and the deployment +# stay in lock-step. A local apps/.env, if present, still wins as an override. +BUILD_ENV = REPO_ROOT.parent / "build" / ".env" +APPS_ENV = HERE / ".env" +DEFAULT_ENV = APPS_ENV if APPS_ENV.exists() else BUILD_ENV + def _has_cuga(py: str) -> bool: """True if interpreter `py` can find the `cuga` package. Uses find_spec @@ -300,6 +307,16 @@ def cmd_start(filter_names: Optional[list[str]], env_file: Path) -> None: print(f" [PYTHON] {PYTHON}") dotenv = _load_env(env_file) + if dotenv: + print(f" [ENV] loaded {len(dotenv)} vars from {env_file}") + # build/.env carries in-IMAGE absolute paths (e.g. AGENT_SETTING_CONFIG= + # /app/apps/settings.watsonx.toml). When launching from this checkout that + # path doesn't exist, so remap the /app/apps prefix to the local apps dir. + asc = dotenv.get("AGENT_SETTING_CONFIG", "") + if asc.startswith("/app/apps/"): + local = HERE / asc[len("/app/apps/"):] + dotenv["AGENT_SETTING_CONFIG"] = str(local) + print(f" [ENV] AGENT_SETTING_CONFIG → {local}") merged_env = {**os.environ, **dotenv} # Usage tracking: point every app at the local collector unless the user @@ -468,7 +485,7 @@ def main() -> None: parser.add_argument("--ship-ready", action="store_true", help="Target the ship-ready stack: the 21 ship-ready " "apps + the 7 MCP servers they depend on") - parser.add_argument("--env", type=Path, default=HERE / ".env") + parser.add_argument("--env", type=Path, default=DEFAULT_ENV) parser.add_argument("--tail", type=int, default=30) args = parser.parse_args() diff --git a/cuga-apps/apps/meetup_finder/main.py b/cuga-apps/apps/meetup_finder/main.py index ab9bbc0..0406332 100644 --- a/cuga-apps/apps/meetup_finder/main.py +++ b/cuga-apps/apps/meetup_finder/main.py @@ -35,6 +35,7 @@ import argparse import asyncio +import contextvars import html as _html import json import logging @@ -43,6 +44,7 @@ import sys import urllib.parse import uuid +from datetime import datetime, timezone from pathlib import Path # ── Path bootstrap — must come before local imports ───────────────────── @@ -52,6 +54,17 @@ if _p not in sys.path: sys.path.insert(0, _p) +# Robustness: AGENT_SETTING_CONFIG may arrive as an in-IMAGE absolute path +# (e.g. /app/apps/settings.watsonx.toml from build/.env) while running from a +# local checkout where it doesn't exist. CUGA aborts on a missing config file, +# so remap a non-existent absolute config to a local file of the same name. +_asc = os.environ.get("AGENT_SETTING_CONFIG", "") +if os.path.isabs(_asc) and not os.path.isfile(_asc): + for _cand in (_DIR / os.path.basename(_asc), _DEMOS_DIR / os.path.basename(_asc)): + if _cand.is_file(): + os.environ["AGENT_SETTING_CONFIG"] = str(_cand) + break + logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)-7s %(message)s", @@ -76,6 +89,13 @@ # ── Per-thread session store ──────────────────────────────────────────── _sessions: dict[str, dict] = {} +# The thread_id of the in-flight /ask, so fetch_events can accumulate what it +# extracts into that session WITHOUT depending on the (weak) model to thread a +# thread_id through every tool call. A ContextVar is task-local, so concurrent +# /ask requests don't clobber each other (and child tasks inherit the value). +_active_thread: contextvars.ContextVar[str] = contextvars.ContextVar( + "active_thread", default="") + def _get_session(thread_id: str) -> dict: if thread_id not in _sessions: @@ -84,10 +104,49 @@ def _get_session(thread_id: str) -> dict: "location": "", "when": "", "events": [], # ranked board the right panel renders + "_fetched": [], # raw events fetch_events extracted (safety-net source) } return _sessions[thread_id] +def _event_is_past(start: str) -> bool: + """True if an event's start date is clearly in the past. Lenient: if the + date can't be parsed, keep the event (return False) rather than drop it.""" + s = (start or "").strip() + if not s: + return False + try: + dt = datetime.fromisoformat(s.replace("Z", "+00:00")) + except ValueError: + m = re.match(r"(\d{4})-(\d{2})-(\d{2})", s) + if not m: + return False + dt = datetime(int(m[1]), int(m[2]), int(m[3])) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + # Compare on date only, so an event earlier *today* still counts. + return dt.date() < datetime.now(timezone.utc).date() + + +# Bare-metal hosts (no `playwright install --with-deps`, no root to dnf-install +# Chromium's system libs) can stage them in a local prefix and point the loader +# at it. Default matches the rootless RPM-extract recipe in the README; override +# with MEETUP_BROWSER_LIBS. No-op in the container (the image bakes the libs in) +# and on any host where the prefix doesn't exist. +def _ensure_local_browser_libs() -> None: + prefix = os.getenv("MEETUP_BROWSER_LIBS") or os.path.expanduser( + "~/.local/chromium-deps") + dirs = [os.path.join(prefix, "usr", "lib64"), os.path.join(prefix, "usr", "lib")] + dirs = [d for d in dirs if os.path.isdir(d)] + if not dirs: + return + existing = os.environ.get("LD_LIBRARY_PATH", "") + parts = [d for d in dirs if d not in existing.split(os.pathsep)] + if parts: + os.environ["LD_LIBRARY_PATH"] = os.pathsep.join( + parts + ([existing] if existing else [])) + + # ── Playwright browser pool (lazy, single browser, serialized) ─────────── # Mirrors chief_of_staff/browser_runner/executor.py: start async_playwright # once, launch one headless Chromium, and serve a fresh context per fetch @@ -103,16 +162,30 @@ async def fetch(self, url: str, timeout_ms: int = 30_000) -> tuple[str, str]: from playwright.async_api import async_playwright headless = os.getenv("MEETUP_HEADLESS", "1") != "0" async with self._lock: - if self._pw is None: - self._pw = await async_playwright().start() + # Gate on the browser, not the playwright handle: a launch that fails + # after async_playwright().start() leaves _pw set but _browser None, + # so gating on _pw would skip relaunch and every later call would die + # with a misleading "'NoneType' has no attribute 'new_context'". + if self._browser is None: + _ensure_local_browser_libs() + if self._pw is None: + self._pw = await async_playwright().start() # In a container (Docker/Code Engine) Chromium runs as root and # /dev/shm is tiny, so the sandbox + default shm break the # launch. Detect the container and pass the standard flags. in_container = bool(os.getenv("CUGA_IN_DOCKER") or os.getenv("CE_APP") or os.getenv("MEETUP_NO_SANDBOX")) launch_args = ["--no-sandbox", "--disable-dev-shm-usage"] if in_container else [] - self._browser = await self._pw.chromium.launch( - headless=headless, args=launch_args) + try: + self._browser = await self._pw.chromium.launch( + headless=headless, args=launch_args) + except Exception: + # Tear the handle down too so the next call retries cleanly + # and surfaces the real launch error, not a None deref. + await self.aclose() + self._pw = None + self._browser = None + raise ctx = await self._browser.new_context( user_agent=_UA, viewport={"width": 1280, "height": 2400}) page = await ctx.new_page() @@ -262,6 +335,59 @@ def walk(o): return out +def _board_from_fetched(raw: list[dict]) -> list[dict]: + """Build a render-ready board from the events fetch_events extracted: + normalise, drop past events, dedupe by title+date, cap. Used as the + safety net when the model never calls save_events.""" + out, seen = [], set() + for e in raw: + ev = _coerce_board_event(e) + if not ev or _event_is_past(ev["start"]): + continue + key = (ev["title"].lower(), ev["start"]) + if key in seen: + continue + seen.add(key) + out.append(ev) + return out[:30] + + +def _coerce_board_event(e: dict) -> dict | None: + """Normalise one event the agent passes to save_events into the exact + shape the right panel renders. The model often uses schema.org-ish keys + (name/date/link/organizer) instead of our title/start/url/host — before + this, those rendered as blank 'Event' cards and, once we started filtering + empties, vanished entirely. A real entry needs at least a title/name.""" + if not isinstance(e, dict): + return None + + def pick(*keys): + for k in keys: + v = e.get(k) + if isinstance(v, str) and v.strip(): + return v.strip() + return "" + + title = pick("title", "name", "headline", "event", "event_name", "summary") + if not title: + return None + att = e.get("attendees") + if not isinstance(att, (int, str)) or att == "": + att = pick("going", "going_count", "rsvps", "guest_count") or None + return { + "title": title, + "url": pick("url", "link", "permalink", "event_url", "rsvp_url"), + "start": pick("start", "start_at", "startDate", "start_time", + "starts_at", "datetime", "date", "when"), + "venue": pick("venue", "location", "place", "address"), + "city": pick("city"), + "host": pick("host", "organizer", "organiser", "group"), + "source": pick("source"), + "attendees": att, + "why": pick("why", "reason", "fit", "note"), + } + + def _extract_events(html: str, url: str) -> list[dict]: domain = urllib.parse.urlparse(url).netloc.lower().replace("www.", "") source = domain.split(".")[0] if domain else "web" @@ -378,6 +504,12 @@ async def fetch_events(url: str, limit: int = 15) -> str: "error": f"{type(exc).__name__}: {exc}"}) events = _extract_events(html, url)[: max(1, min(int(limit or 15), 40))] log.info("fetch_events %s → %d events", url, len(events)) + # Safety net: stash what we actually extracted on the in-flight session. + # If the model then forgets/comments-out save_events (the weak ones do), + # /ask falls back to this so the panel still renders real events. + tid = _active_thread.get() + if tid and events: + _get_session(tid)["_fetched"].extend(events) return json.dumps({"ok": True, "data": { "url": url, "page_title": _clean(title, 120), "count": len(events), "events": events, @@ -425,9 +557,18 @@ def save_events(thread_id: str, events_json: str) -> str: if not isinstance(events, list): return json.dumps({"ok": False, "code": "bad_input", "error": "events_json must be a JSON array"}) - session["events"] = events - log.info("[%s] saved %d events", thread_id[:8], len(events)) - return json.dumps({"ok": True, "data": {"saved": len(events)}}) + # Normalise each entry into the panel's shape (handles alternate + # key names) and drop anything without a title. This kills the + # empty-card flood AND ensures real events still render even when + # the model used name/date/link instead of title/start/url. + clean = [n for n in (_coerce_board_event(e) for e in events) if n] + clean = clean[:30] + # Don't wipe a good board with an empty/garbage submission. + if clean or not session.get("events"): + session["events"] = clean + log.info("[%s] saved %d events (%d submitted)", + thread_id[:8], len(clean), len(events)) + return json.dumps({"ok": True, "data": {"saved": len(clean)}}) except json.JSONDecodeError as exc: return json.dumps({"ok": False, "code": "bad_input", "error": f"invalid JSON: {exc}"}) @@ -538,6 +679,12 @@ def make_agent(): tools=_make_tools(), special_instructions=_SYSTEM, cuga_folder=str(_DIR / ".cuga"), + # Each question is independent. Disable the persistent knowledge store + # and on-disk policy auto-load so nothing learned/saved in one question + # leaks into the next via the shared .cuga folder. The output formatter + # we need is attached explicitly in _attach_policies(). + enable_knowledge=False, + auto_load_policies=False, ) @@ -580,16 +727,31 @@ async def index(): @app.post("/ask") async def api_ask(req: AskReq): from _usage import track_utterance; track_utterance(req.question) - # Stateless: the panel id keys the per-turn data the UI polls, but we - # reset it each turn and run the agent on a fresh memory thread, so - # nothing carries over from the previous question. + # Stateless per question: reset the panel session and run on a fresh + # memory thread. The singleton agent has its persistent knowledge store + # and on-disk policy auto-load disabled (see make_agent), so nothing + # carries over from the previous question. thread_id = req.thread_id or uuid.uuid4().hex _sessions.pop(thread_id, None) + _active_thread.set(thread_id) augmented = f"[thread:{thread_id}] {req.question}" try: agent = await _get_agent() result = await agent.invoke(augmented, thread_id=uuid.uuid4().hex) - return {"answer": str(result), "thread_id": thread_id} + # Use the agent's synthesised answer, NOT str(result): the result + # object's repr dumps the CUGA plan + generated code into the chat. + answer = result.answer if hasattr(result, "answer") else str(result) + # Safety net: if the model fetched events but never (correctly) called + # save_events, the panel would be empty. Populate it from what + # fetch_events actually extracted so the user still sees real results. + session = _sessions.get(thread_id) + if session is not None and not session.get("events") and session.get("_fetched"): + board = _board_from_fetched(session["_fetched"]) + if board: + session["events"] = board + log.info("[%s] safety-net populated %d events " + "(model skipped save_events)", thread_id[:8], len(board)) + return {"answer": answer, "thread_id": thread_id} except Exception as exc: log.exception("Agent invocation failed") return JSONResponse( diff --git a/cuga-apps/apps/meetup_finder/ui.py b/cuga-apps/apps/meetup_finder/ui.py index e2149ea..30cccc4 100644 --- a/cuga-apps/apps/meetup_finder/ui.py +++ b/cuga-apps/apps/meetup_finder/ui.py @@ -269,7 +269,6 @@
Chat with the agent
-
AI meetups in San Francisco this week
LLM and agent events near New York
Data engineering meetups in Austin this month
Startup / founder events in London
@@ -296,7 +295,7 @@
📅

Tell the agent your interests and a city. It drives a real browser over Meetup, Luma, and Eventbrite and ranks the upcoming events here.

-
Try: "AI agent meetups in San Francisco this week"
+
Try: "AI agent meetups near New York this week"
@@ -396,11 +395,15 @@ } function refreshPanel(data) { - const hash = JSON.stringify({ e: data.events, i: data.interests, l: data.location, w: data.when }); + // Only render events that actually have a title — drop the empty / + // placeholder rows that were filling the panel with blank cards. + const events = (data.events || []).filter( + ev => ev && String(ev.title || '').trim()); + const hash = JSON.stringify({ e: events, i: data.interests, l: data.location, w: data.when }); if (hash === _lastHash) return; _lastHash = hash; - const hasEvents = data.events && data.events.length > 0; + const hasEvents = events.length > 0; const ctx = renderContext(data); if (!hasEvents && !ctx) return; @@ -409,7 +412,7 @@ if (ctx) { html += '
Search
' + ctx; } if (hasEvents) { html += '
Ranked events
'; - html += renderEvents(data.events) || ''; + html += renderEvents(events) || ''; } dataScroll.innerHTML = ''; @@ -428,12 +431,22 @@ } setInterval(fetchSession, 10000); + function resetPanel() { + // Each question is answered fresh — clear the previous question's events + // so stale results don't linger on the right while the new search runs. + _lastHash = ''; + dataScroll.innerHTML = ''; + dataScroll.appendChild(emptyState); + emptyState.style.display = ''; + } + async function sendMessage() { const question = inputEl.value.trim(); if (!question) return; inputEl.value = ''; sendBtn.disabled = true; setStatus(true, 'Browsing…'); + resetPanel(); addMessage(question, 'user'); const thinking = addMessage('Opening Meetup, Luma & Eventbrite…', 'thinking'); diff --git a/cuga-apps/apps/newsletter/main.py b/cuga-apps/apps/newsletter/main.py index 44772c2..0a053fb 100644 --- a/cuga-apps/apps/newsletter/main.py +++ b/cuga-apps/apps/newsletter/main.py @@ -46,6 +46,17 @@ if _p not in sys.path: sys.path.insert(0, _p) +# Robustness: AGENT_SETTING_CONFIG may arrive as an in-IMAGE absolute path +# (e.g. /app/apps/settings.watsonx.toml from build/.env) while running from a +# local checkout where it doesn't exist. CUGA aborts on a missing config file, +# so remap a non-existent absolute config to a local file of the same name. +_asc = os.environ.get("AGENT_SETTING_CONFIG", "") +if os.path.isabs(_asc) and not os.path.isfile(_asc): + for _cand in (_DIR / os.path.basename(_asc), _DEMOS_DIR / os.path.basename(_asc)): + if _cand.is_file(): + os.environ["AGENT_SETTING_CONFIG"] = str(_cand) + break + logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)-7s %(message)s", @@ -166,6 +177,11 @@ def make_agent(): tools=make_feed_tools(), special_instructions=_SYSTEM, cuga_folder=str(_DIR / ".cuga"), + # Each question is independent — disable the persistent knowledge store + # and on-disk policy auto-load so nothing carries across questions via + # the shared .cuga folder. + enable_knowledge=False, + auto_load_policies=False, ) @@ -358,12 +374,37 @@ def feeds_list(): @app.post("/ask") async def ask(req: AskReq): from _usage import track_utterance; track_utterance(req.question) + from datetime import datetime, timezone, timedelta feeds = _load_store().get("feeds", []) + # When the user asks for a recent window ("last 24 hours", "today", + # "this week"), hand the agent a concrete cutoff timestamp + the + # current time so it can actually filter items by publish date instead + # of guessing — the root cause of wrong "last 24 hours" summaries. + ql = req.question.lower() + window_note = "" + for phrase, hours in (("last 24 hour", 24), ("past 24 hour", 24), + ("last 24h", 24), ("today", 24), + ("last 48 hour", 48), ("yesterday", 48), + ("this week", 168), ("past week", 168), + ("last week", 168), ("last 7 day", 168)): + if phrase in ql: + now = datetime.now(timezone.utc) + cutoff = now - timedelta(hours=hours) + window_note = ( + f"\n\nTIME WINDOW (strict): only include items published on or " + f"after {cutoff.isoformat()} — current time is {now.isoformat()}. " + f"Call fetch_feed with max_items=50 for each feed so nothing " + f"recent is missed, parse each item's published/updated date, and " + f"DROP anything older than the cutoff. State how many items fell " + f"in the window. If a feed has none, say so explicitly rather than " + f"padding the summary with older posts." + ) + break if feeds: feed_list = "\n".join(f"- {url}" for url in feeds) - prompt = f"Configured feeds:\n{feed_list}\n\nQuestion: {req.question}" + prompt = f"Configured feeds:\n{feed_list}\n\nQuestion: {req.question}{window_note}" else: - prompt = req.question + prompt = req.question + window_note try: result = await _agent.invoke(prompt, thread_id=uuid.uuid4().hex) return {"answer": result.answer} @@ -741,9 +782,17 @@ def ui(): } function renderAnswer(text) { - return text + return String(text == null ? '' : text) .replace(/&/g,'&').replace(//g,'>') + // [label](url) links + .replace(/\\[([^\\]]+)\\]\\((https?:[^)\\s]+)\\)/g, + '$1') + // ## / ### headings → bold section labels + .replace(/^\\s*#{1,6}\\s+(.+)$/gm,'$1') .replace(/\\*\\*(.*?)\\*\\*/g,'$1') + .replace(/`([^`]+?)`/g,'$1') + // - / * bullets → indented dots + .replace(/^\\s*[-*]\\s+(.+)$/gm,'  • $1') .replace(/\\n/g,'
') } diff --git a/cuga-apps/apps/ouroboros/main.py b/cuga-apps/apps/ouroboros/main.py index 265e1bf..800fb22 100644 --- a/cuga-apps/apps/ouroboros/main.py +++ b/cuga-apps/apps/ouroboros/main.py @@ -58,9 +58,15 @@ # make_supervisor() is too late — by then specialists.py has already # imported cuga.sdk indirectly. So we resolve it here, before the first # cuga import in this process. +# For watsonx we ship our OWN config (settings.watsonx.toml in this dir) that +# pins every internal node to gpt-oss-120b instead of cuga's packaged +# llama-4-maverick default. AGENT_SETTING_CONFIG accepts an absolute path +# (cuga does os.path.join(MODELS_DIR, value); an absolute value wins), so we +# point at the local file rather than editing site-packages. +_WATSONX_TOML = str(_DIR / "settings.watsonx.toml") _AGENT_SETTING_CONFIG = { "rits": "settings.rits.toml", - "watsonx": "settings.watsonx.toml", + "watsonx": _WATSONX_TOML, "openai": "settings.openai.toml", "groq": "settings.groq.toml", "litellm": "settings.litellm.toml", @@ -75,8 +81,25 @@ _provider = (os.getenv("LLM_PROVIDER") or "watsonx").lower() os.environ.setdefault( "AGENT_SETTING_CONFIG", - _AGENT_SETTING_CONFIG.get(_provider, "settings.watsonx.toml"), + _AGENT_SETTING_CONFIG.get(_provider, _WATSONX_TOML), ) +# Outer LangChain model (supervisor + specialists' conversational layer): on +# watsonx default to gpt-oss-120b too, so both layers match. An explicit +# LLM_MODEL still wins. +if _provider == "watsonx": + os.environ.setdefault("LLM_MODEL", "openai/gpt-oss-120b") + +# Robustness: AGENT_SETTING_CONFIG may arrive as an in-IMAGE absolute path +# (e.g. /app/apps/settings.watsonx.toml from build/.env) while running from a +# local checkout where that path doesn't exist. CUGA aborts on a missing file, +# so remap a non-existent absolute config to a local file of the same name +# (this app's dir, then the apps dir). Runs BEFORE the first cuga import below. +_asc = os.environ.get("AGENT_SETTING_CONFIG", "") +if os.path.isabs(_asc) and not os.path.isfile(_asc): + for _cand in (_DIR / os.path.basename(_asc), _DEMOS_DIR / os.path.basename(_asc)): + if _cand.is_file(): + os.environ["AGENT_SETTING_CONFIG"] = str(_cand) + break def _patch_executor_timeout(seconds: int = 180) -> None: @@ -554,6 +577,24 @@ def _writer_output_from_state(supervisor) -> str | None: return None +def _looks_like_code(text: str) -> bool: + """True if the supervisor surfaced a raw code step (or other non-answer + artifact) instead of a finished, human-readable reply. When the cascade + bails early, `result.answer` can be the generated Python from a phase — + we never want to show that to the user.""" + if not text: + return False + t = text.strip() + if "```python" in t or t.startswith("```"): + return True + # Strong code signals (no leading prose) — require a couple to avoid + # nuking a legitimate answer that merely mentions code in passing. + markers = ("import json", "json.loads(", "def ", "for i in range(", + "print(f\"", "except (", "candidates = ", "scout_result", + "enrichments[", "= []") + return sum(1 for m in markers if m in t) >= 2 + + def _format_elapsed(ms: int) -> str: s = ms / 1000.0 if s < 1: @@ -1139,21 +1180,28 @@ def make_supervisor(): ) agents = make_all(model=model) - supervisor = CugaSupervisor( - agents=agents, - model=model, - # description= is dead in this SDK branch (never rendered into - # the supervisor's prompt). We inject the cascade rules via - # _TASK_PRELUDE on the user message in /ask instead. - # Step accounting (each block = 2 steps: model + execute): - # phase 1 (scout+parse+init): 2 - # phase 2 (5 specialists × up to 3 candidates, often <15 - # due to website conditionals): 20–30 - # phase 3 (writer): 2 - # misc planner indecision/retries: 5–15 - # 100 caps comfortably over the median 35–50. - cuga_lite_max_steps=100, - ) + # description= is dead in this SDK branch (never rendered into the + # supervisor's prompt). We inject the cascade rules via _TASK_PRELUDE on + # the user message in /ask instead. + # Step accounting (each block = 2 steps: model + execute): + # phase 1 (scout+parse+init): 2 + # phase 2 (5 specialists × up to 3 candidates, often <15 + # due to website conditionals): 20–30 + # phase 3 (writer): 2 + # misc planner indecision/retries: 5–15 + # 100 caps comfortably over the median 35–50. + kwargs = dict(agents=agents, model=model, cuga_lite_max_steps=100) + # Match the specialists: keep the supervisor stateless across turns by + # disabling its persistent knowledge store + on-disk policy auto-load, so + # nothing learned in one lead-hunt leaks into the next. These kwargs may + # not exist on every SDK build — fall back cleanly if they're rejected. + try: + supervisor = CugaSupervisor( + **kwargs, enable_knowledge=False, auto_load_policies=False) + except TypeError: + log.info("CugaSupervisor doesn't accept enable_knowledge/" + "auto_load_policies on this SDK build; using defaults") + supervisor = CugaSupervisor(**kwargs) return supervisor @@ -1326,18 +1374,35 @@ async def _handle_full_turn(question: str, thread_id: str, log.warning("[%s] no leads extracted (answer length: %d chars)", thread_id[:8], len(answer or "")) + # Never surface a raw code step / unfinished artifact to the user. + # When the cascade bailed early (no leads AND the answer looks like + # generated Python), show a clear retry message instead — but keep + # the raw answer in the saved run for debugging. + display_answer = answer + if not leads and _looks_like_code(answer): + log.warning("[%s] supervisor returned a raw code step as its " + "answer; showing a friendly message instead", + thread_id[:8]) + display_answer = ( + "I couldn't finish the lead board for this request — the " + "research cascade stopped early before writing results " + f"(after {_format_elapsed(elapsed_ms)}). Please try again, " + "or rephrase with an explicit location, e.g. " + "\"clinics in Austin, TX\" or \"salons in Brooklyn, NY\"." + ) + _save_run(thread_id, question, answer, leads, supervisor, started_at=started_at, elapsed_ms=elapsed_ms, source=source, loop_id=loop_id) # Fire-and-forget email notification. Never blocks the response. asyncio.create_task(_maybe_send_email_for_run( - thread_id, question, answer, leads, + thread_id, question, display_answer, leads, source, loop_id, _format_elapsed(elapsed_ms), )) return { - "answer": answer, + "answer": display_answer, "thread_id": thread_id, "elapsed_ms": elapsed_ms, "elapsed_human": _format_elapsed(elapsed_ms), diff --git a/cuga-apps/apps/ouroboros/settings.watsonx.toml b/cuga-apps/apps/ouroboros/settings.watsonx.toml new file mode 100644 index 0000000..d48449a --- /dev/null +++ b/cuga-apps/apps/ouroboros/settings.watsonx.toml @@ -0,0 +1,94 @@ +# Ouroboros watsonx model config — gpt-oss-120b for every CUGA internal node. +# +# This OVERRIDES the llama-4-maverick default that ships in cuga's packaged +# settings.watsonx.toml. main.py points AGENT_SETTING_CONFIG at this file (by +# absolute path) when LLM_PROVIDER=watsonx, so the supervisor + all specialists +# run on gpt-oss-120b — the model the lead-hunt cascade was tuned/verified on. +# +# If your watsonx instance exposes gpt-oss under a different id, change the +# `model_name` values below (one per node). + +[agent.task_decomposition.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.shortlister.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.planner.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.chat.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.plan_controller.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.final_answer.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +api_version = "2024-08-06" +temperature = 0.1 +max_tokens = 32000 + +[agent.code.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.code_planner.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.qa.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.action.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 2000 + +[memory.mem0.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 1000 + +[memory.milvus.step_processing.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 1000 + +[memory.milvus.fact_extraction.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 1000 + +[memory.tips_extractor.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 5000 diff --git a/cuga-apps/apps/ouroboros/ui.py b/cuga-apps/apps/ouroboros/ui.py index 74dc1e4..66f9d64 100644 --- a/cuga-apps/apps/ouroboros/ui.py +++ b/cuga-apps/apps/ouroboros/ui.py @@ -1281,22 +1281,22 @@
-
Find leads in Westchester, NY
-
Restaurants in HSR Layout, Bangalore — pitch order bots
-
Salons in Brooklyn that need appointment booking
-
Independent hotels in Lisbon — concierge agent angle
-
Clinics in Austin — patient FAQ + intake
-
Real estate offices in San Mateo — lead capture pitch
-
Boutiques in Williamsburg — product Q&A
-
Veterinary clinics near Berkeley — appointment + reminders
-
Tutoring centers in Mumbai Andheri — enrollment funnel
+
Boutiques in Williamsburg, Brooklyn — product Q&A & size/fit chat
+
Restaurants in Park Slope, Brooklyn — reservations + order bot
+
Cafés in the Mission District, San Francisco — order-ahead + loyalty
+
Restaurants in the North End, Boston — bookings + waitlist
+
Boutiques in Georgetown, Washington DC — product Q&A + styling
+
Boutiques on Abbot Kinney, Venice CA — product Q&A
+
Cafés in the Pearl District, Portland — mobile order-ahead
+
Restaurants in Fitzrovia, London — table bookings + menu concierge
+
Independent hotels in Lisbon — 24/7 concierge agent
@@ -1676,25 +1676,39 @@ function refreshPanel(state) { const hash = JSON.stringify(state); if (hash === _lastHash) return; - _lastHash = hash; const b = state.leads; if (!b) return; - emptyState.style.display = 'none'; - dataScroll.innerHTML = ''; - + // Build the HTML BEFORE touching the DOM, and guard every step. A single + // malformed lead field (e.g. evidence/review_friction arriving as a + // non-array) used to throw mid-render — after the panel had already been + // cleared — leaving the right panel completely blank even though leads + // were present. Now one bad lead is skipped and the panel still renders. let html = ''; - html += renderHero(b, state); + try { + html += renderHero(b, state); + } catch (e) { console.error('renderHero failed', e); } - const leads = (b.leads || []).slice().sort((a, c) => (c.fit_score || 0) - (a.fit_score || 0)); + const leads = (b.leads || []).slice() + .sort((a, c) => (c.fit_score || 0) - (a.fit_score || 0)); if (leads.length) { html += '
Leads · ranked by fit
'; - leads.forEach((lead, i) => { html += renderLead(lead, i); }); + leads.forEach((lead, i) => { + try { html += renderLead(lead, i); } + catch (e) { console.error('renderLead failed for lead', i, e, lead); } + }); } - html += renderNextSteps(b.next_steps); + try { + html += renderNextSteps(b.next_steps); + } catch (e) { console.error('renderNextSteps failed', e); } + + if (!html) return; // nothing to show — keep whatever is on screen + _lastHash = hash; + emptyState.style.display = 'none'; + dataScroll.innerHTML = ''; const wrap = document.createElement('div'); wrap.style.display = 'contents'; wrap.innerHTML = html; @@ -1713,6 +1727,32 @@ // Auto-refresh removed — fetchSession() runs only after /ask returns, // or when the user clicks the Refresh badge in the data panel header. + // A supervisor turn takes 1–3 minutes. If a proxy/gateway drops the long + // /ask request before it finishes, the server still completes the turn and + // persists the leads to the session — so on a network error we poll + // /session for a few minutes and recover the result instead of failing. + async function recoverFromSession(thinking) { + const deadline = Date.now() + 240000; // up to 4 minutes + while (Date.now() < deadline) { + await new Promise(r => setTimeout(r, 6000)); + try { + const res = await fetch('/session/' + SESSION_ID); + if (!res.ok) continue; + const data = await res.json(); + const n = data && data.leads && (data.leads.leads || []).length; + if (n) { + thinking.remove(); + addMessage('Done — this run took a while, so it was recovered after ' + + 'the request timed out. See the board on the right.', 'agent'); + refreshPanel(data); + await refreshRunsList(); + return true; + } + } catch (_) { /* keep waiting */ } + } + return false; + } + async function sendMessage() { const question = inputEl.value.trim(); if (!question) return; @@ -1742,8 +1782,13 @@ await refreshRunsList(); } } catch (err) { - thinking.remove(); - addMessage('Network error: ' + err.message, 'error'); + // Long run may have been dropped by a proxy/gateway — try to recover + // the result from the session before declaring failure. + const recovered = await recoverFromSession(thinking); + if (!recovered) { + thinking.remove(); + addMessage('Network error: ' + err.message, 'error'); + } } finally { sendBtn.disabled = false; setStatus(false, 'Ready'); diff --git a/cuga-apps/apps/paper_scout/main.py b/cuga-apps/apps/paper_scout/main.py index dcac6b6..e32c375 100644 --- a/cuga-apps/apps/paper_scout/main.py +++ b/cuga-apps/apps/paper_scout/main.py @@ -162,6 +162,13 @@ def make_agent(): tools=_make_tools(), special_instructions=_SYSTEM, cuga_folder=str(_DIR / ".cuga"), + # Each question is independent — disable the persistent knowledge store + # and on-disk policy auto-load. CUGA's policy DB is a shared global + # sqlite store, so without this an output-formatter persisted by another + # app (e.g. meetup_finder's save_events event board) leaks in and the + # model emits that board instead of this app's answer. + enable_knowledge=False, + auto_load_policies=False, ) diff --git a/cuga-apps/apps/settings.watsonx.toml b/cuga-apps/apps/settings.watsonx.toml new file mode 100644 index 0000000..5e83dac --- /dev/null +++ b/cuga-apps/apps/settings.watsonx.toml @@ -0,0 +1,95 @@ +# Shared watsonx model config for the all-in-one deployment — gpt-oss-120b for +# every CUGA internal node, instead of cuga's packaged llama-4-maverick default. +# +# build/.env points AGENT_SETTING_CONFIG at this file's in-image absolute path +# (/app/apps/settings.watsonx.toml). AGENT_SETTING_CONFIG is read globally, so +# every ship-ready app's internal nodes use this config. The outer LangChain +# model is set to match via LLM_MODEL=openai/gpt-oss-120b in build/.env. +# +# If your watsonx instance exposes gpt-oss under a different id, change the +# `model_name` values below (one per node). + +[agent.task_decomposition.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.shortlister.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.planner.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.chat.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.plan_controller.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.final_answer.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +api_version = "2024-08-06" +temperature = 0.1 +max_tokens = 32000 + +[agent.code.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.code_planner.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.qa.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 16000 + +[agent.action.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 2000 + +[memory.mem0.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 1000 + +[memory.milvus.step_processing.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 1000 + +[memory.milvus.fact_extraction.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 1000 + +[memory.tips_extractor.model] +platform = "watsonx" +model_name = "openai/gpt-oss-120b" +temperature = 0.1 +max_tokens = 5000 diff --git a/cuga-apps/apps/travel_planner/main.py b/cuga-apps/apps/travel_planner/main.py index a435213..02a266a 100644 --- a/cuga-apps/apps/travel_planner/main.py +++ b/cuga-apps/apps/travel_planner/main.py @@ -98,12 +98,24 @@ 4. Call search_attractions(lat, lon, category) at least twice with different categories relevant to the traveller's interests (e.g. historic + cultural, or natural + amusements). -5. Call web_search(query) for at least two practical queries: +5. Call web_search(query) for practical, current facts. Each result is a + REFERENCE PAGE to read — its title/URL is a source, NOT an event or a fact in + itself. Run at least: - visa / entry requirements for international travellers - local transport options and approximate costs - - any notable events or festivals during the travel month + - notable events or festivals during the travel month 6. Only after gathering all the above, write the itinerary. +EVENTS — read this carefully. Only mention an event if you can name the SPECIFIC +event WITH its actual dates and venue, taken from the content of a page you +searched. A search frequently returns only calendar/aggregator/listing pages — +e.g. "Events in in ", a "things to do" roundup, an Instagram or +social handle, a tour-operator blog. Those are NOT events: they have no single +date or venue. NEVER list such a page as an event (a dead giveaway is a "date" +like "June 2026" with no day, or a venue like "various venues"). If you cannot +find a concrete, dated, named event, simply say you couldn't confirm specific +events for that month and move on — do not pad the itinerary with listing pages. + Itinerary format: - Brief destination intro (2–3 sentences) - Weather & packing tips for the travel month @@ -174,7 +186,19 @@ async def _build_cuga_agent(llm) -> CugaAgent: if not os.environ.get("OPENAI_API_KEY"): os.environ["OPENAI_API_KEY"] = "sk-placeholder-not-used" - agent = CugaAgent(model=llm, tools=TOOLS, special_instructions=SYSTEM_INSTRUCTIONS) + # enable_knowledge / auto_load_policies OFF: CUGA's policy DB is a shared + # global sqlite store, so without this an output-formatter persisted by + # another app (e.g. meetup_finder's save_events event board) auto-loads here + # and the model emits that board instead of an itinerary. cuga_folder keeps + # this app's own CUGA state isolated under its directory. + agent = CugaAgent( + model=llm, + tools=TOOLS, + special_instructions=SYSTEM_INSTRUCTIONS, + cuga_folder=str(Path(__file__).parent / ".cuga"), + enable_knowledge=False, + auto_load_policies=False, + ) await agent.initialize() return agent diff --git a/cuga-apps/apps/travel_planner/static/index.html b/cuga-apps/apps/travel_planner/static/index.html index 20ae6a8..c4eaaf3 100644 --- a/cuga-apps/apps/travel_planner/static/index.html +++ b/cuga-apps/apps/travel_planner/static/index.html @@ -164,70 +164,6 @@ - -
-
-
- ⚙️ -
-

Configure

-

Set your API keys to get started

-
-
- -
-
-

RITS — LLM provider

- - -
- -
- - -
- -
- - -
- -
- -
-

Travel data keys

-
- - -
- -

Used for visa requirements, transport info, and local events. Get a free key at tavily.com.

-
- -
- - -
-
- - - - -
-
-
@@ -245,13 +181,6 @@

Travel Planner

-
@@ -265,7 +194,7 @@

Travel Planner

🔎 Web search · Tavily 📚 Wikipedia 📍 Geocoding · OSM - 🎡 Attractions · OpenTripMap + 🎡 Attractions · OSM ☀ Weather · wttr.in @@ -411,101 +340,20 @@

diff --git a/cuga-apps/apps/webpage_summarizer/main.py b/cuga-apps/apps/webpage_summarizer/main.py index d850db3..995177f 100644 --- a/cuga-apps/apps/webpage_summarizer/main.py +++ b/cuga-apps/apps/webpage_summarizer/main.py @@ -35,6 +35,17 @@ if _p not in sys.path: sys.path.insert(0, _p) +# Robustness: AGENT_SETTING_CONFIG may arrive as an in-IMAGE absolute path +# (e.g. /app/apps/settings.watsonx.toml from build/.env) while running from a +# local checkout where it doesn't exist. CUGA aborts on a missing config file, +# so remap a non-existent absolute config to a local file of the same name. +_asc = os.environ.get("AGENT_SETTING_CONFIG", "") +if os.path.isabs(_asc) and not os.path.isfile(_asc): + for _cand in (_DIR / os.path.basename(_asc), _DEMOS_DIR / os.path.basename(_asc)): + if _cand.is_file(): + os.environ["AGENT_SETTING_CONFIG"] = str(_cand) + break + # --------------------------------------------------------------------------- # Logging # --------------------------------------------------------------------------- @@ -111,6 +122,11 @@ def make_agent(): tools=_make_tools(), special_instructions=_SYSTEM, cuga_folder=str(_DIR / ".cuga"), + # Each question is independent — disable the persistent knowledge store + # and on-disk policy auto-load so nothing carries across questions via + # the shared .cuga folder. + enable_knowledge=False, + auto_load_policies=False, ) @@ -148,7 +164,11 @@ async def ask(req: AskRequest): try: agent = _get_agent() result = await agent.invoke(req.question, thread_id=thread_id) - return {"answer": str(result)} + # Return the agent's synthesised answer, NOT str(result): the + # result object's repr dumps the CUGA plan + generated Python code, + # which is what was leaking into the UI as an unformatted code blob. + answer = result.answer if hasattr(result, "answer") else str(result) + return {"answer": answer} except Exception as exc: log.exception("Agent invocation failed") return JSONResponse(status_code=500, content={"answer": f"Error: {exc}"}) diff --git a/cuga-apps/apps/webpage_summarizer/ui.py b/cuga-apps/apps/webpage_summarizer/ui.py index 5a67371..62d97d2 100644 --- a/cuga-apps/apps/webpage_summarizer/ui.py +++ b/cuga-apps/apps/webpage_summarizer/ui.py @@ -406,7 +406,9 @@ function addMessage(text, cls) { const div = document.createElement('div'); div.className = 'msg ' + cls; - div.textContent = text; + // Render the agent's markdown answer; keep user/thinking text literal. + if (cls === 'agent') div.innerHTML = mdToHtml(text); + else div.textContent = text; messagesEl.appendChild(div); messagesEl.scrollTop = messagesEl.scrollHeight; return div; @@ -438,7 +440,7 @@ card.className = 'summary-card'; card.innerHTML = '
' + escHtml(url || '') + '
' + - '
' + escHtml(answer) + '
' + + '
' + mdToHtml(answer) + '
' + '
' + timeStr() + '
'; summaryContent.prepend(card); @@ -474,6 +476,24 @@ .replace(/"/g, '"'); } + // Minimal, safe markdown → HTML (escapes first, then formats). The agent + // returns a structured markdown summary; render it instead of dumping the + // raw text so headings, bullets, bold and links display properly. + function mdToHtml(text) { + return escHtml(String(text == null ? '' : text)) + .replace(/```[\s\S]*?```/g, m => + '
'
+        + m.replace(/```/g, '') + '
') + .replace(/\[([^\]]+)\]\((https?:[^)\s]+)\)/g, + '$1') + .replace(/^\s*#{1,6}\s+(.+)$/gm, '$1') + .replace(/\*\*(.+?)\*\*/g, '$1') + .replace(/(^|[^*])\*([^*\n]+?)\*/g, '$1$2') + .replace(/`([^`]+?)`/g, '$1') + .replace(/^\s*[-*]\s+(.+)$/gm, '  • $1') + .replace(/\n/g, '
'); + } + async function sendMessage() { const question = inputEl.value.trim(); if (!question) return; diff --git a/cuga-apps/apps/wiki_dive/main.py b/cuga-apps/apps/wiki_dive/main.py index 214c7f6..cdf52c2 100644 --- a/cuga-apps/apps/wiki_dive/main.py +++ b/cuga-apps/apps/wiki_dive/main.py @@ -174,6 +174,13 @@ def make_agent(): tools=_make_tools(), special_instructions=_SYSTEM, cuga_folder=str(_DIR / ".cuga"), + # Each question is independent — disable the persistent knowledge store + # and on-disk policy auto-load. CUGA's policy DB is a shared global + # sqlite store, so without this an output-formatter persisted by another + # app (e.g. meetup_finder's save_events event board) leaks in and the + # model emits that board instead of this app's answer. + enable_knowledge=False, + auto_load_policies=False, ) diff --git a/cuga-apps/apps/youtube_research/main.py b/cuga-apps/apps/youtube_research/main.py index 3dc51f0..e5506db 100644 --- a/cuga-apps/apps/youtube_research/main.py +++ b/cuga-apps/apps/youtube_research/main.py @@ -242,6 +242,13 @@ def make_agent(): tools=tools, special_instructions=_SYSTEM, cuga_folder=str(_DIR / ".cuga"), + # Each question is independent — disable the persistent knowledge store + # and on-disk policy auto-load. CUGA's policy DB is a shared global + # sqlite store, so without this an output-formatter persisted by another + # app (e.g. meetup_finder's save_events event board) leaks in and the + # model emits that board instead of this app's answer. + enable_knowledge=False, + auto_load_policies=False, ) diff --git a/cuga-apps/docker-compose.yml b/cuga-apps/docker-compose.yml index d1092fb..d79b609 100644 --- a/cuga-apps/docker-compose.yml +++ b/cuga-apps/docker-compose.yml @@ -12,10 +12,13 @@ # to 3001 / 28xxx / 29xxx. # # Quick start: -# cp apps/.env.example apps/.env +# cp build/.env.example build/.env # single source of truth (repo root) # docker compose build # docker compose up # Then: http://localhost:3001 +# +# All services mount ../build/.env — the same file Code Engine and launch.py +# use — so config lives in exactly one place. # ========================================================================= name: cuga-apps @@ -33,7 +36,7 @@ services: # rather than injected via env_file:, so they never appear in # `docker inspect` or in the image. See entrypoint.sh. volumes: - - ./apps/.env:/run/secrets/app.env:ro + - ../build/.env:/run/secrets/app.env:ro ports: ["29100:29100"] restart: unless-stopped @@ -47,7 +50,7 @@ services: # rather than injected via env_file:, so they never appear in # `docker inspect` or in the image. See entrypoint.sh. volumes: - - ./apps/.env:/run/secrets/app.env:ro + - ../build/.env:/run/secrets/app.env:ro ports: ["29101:29101"] restart: unless-stopped @@ -61,7 +64,7 @@ services: # rather than injected via env_file:, so they never appear in # `docker inspect` or in the image. See entrypoint.sh. volumes: - - ./apps/.env:/run/secrets/app.env:ro + - ../build/.env:/run/secrets/app.env:ro ports: ["29102:29102"] restart: unless-stopped @@ -75,7 +78,7 @@ services: # rather than injected via env_file:, so they never appear in # `docker inspect` or in the image. See entrypoint.sh. volumes: - - ./apps/.env:/run/secrets/app.env:ro + - ../build/.env:/run/secrets/app.env:ro ports: ["29103:29103"] restart: unless-stopped @@ -89,7 +92,7 @@ services: # rather than injected via env_file:, so they never appear in # `docker inspect` or in the image. See entrypoint.sh. volumes: - - ./apps/.env:/run/secrets/app.env:ro + - ../build/.env:/run/secrets/app.env:ro ports: ["29104:29104"] restart: unless-stopped @@ -103,7 +106,7 @@ services: # rather than injected via env_file:, so they never appear in # `docker inspect` or in the image. See entrypoint.sh. volumes: - - ./apps/.env:/run/secrets/app.env:ro + - ../build/.env:/run/secrets/app.env:ro ports: ["29105:29105"] restart: unless-stopped @@ -120,7 +123,7 @@ services: # The remaining mounts let extract_text read files written by the apps # container — same volumes the apps container mounts, RO here. volumes: - - ./apps/.env:/run/secrets/app.env:ro + - ../build/.env:/run/secrets/app.env:ro - ./apps/drop_summarizer/inbox:/app/apps/drop_summarizer/inbox:ro - ./apps/box_qa:/app/apps/box_qa:ro - ./apps/deck_forge/data:/app/apps/deck_forge/data:ro @@ -139,7 +142,7 @@ services: # Bird data lives outside the repo — bind-mount read-only via env-driven # host paths. Set BIRD_DEV_JSON_HOST and BIRD_DBS_DIR_HOST in apps/.env. volumes: - - ./apps/.env:/run/secrets/app.env:ro + - ../build/.env:/run/secrets/app.env:ro - ${BIRD_DEV_JSON_HOST:-/home/amurthi/work/dev_20240627/dev.json}:/data/bird/dev.json:ro - ${BIRD_DBS_DIR_HOST:-/home/amurthi/work/enterprise-benchmark/data/db}:/data/bird/dbs:ro - ./mcp_servers/invocable_apis/state:/app/mcp_servers/invocable_apis/state @@ -196,7 +199,7 @@ services: # instead of being injected via env_file: — keeps them out of `docker # inspect` and out of the image. See entrypoint.sh. volumes: - - ./apps/.env:/run/secrets/app.env:ro + - ../build/.env:/run/secrets/app.env:ro - ./apps/drop_summarizer/inbox:/app/apps/drop_summarizer/inbox - ./apps/drop_summarizer/data:/app/apps/drop_summarizer/data - ./apps/voice_journal/storage:/app/apps/voice_journal/storage diff --git a/cuga-apps/mcp_servers/__pycache__/__init__.cpython-311.pyc b/cuga-apps/mcp_servers/__pycache__/__init__.cpython-311.pyc index 6061ab8..2d15555 100644 Binary files a/cuga-apps/mcp_servers/__pycache__/__init__.cpython-311.pyc and b/cuga-apps/mcp_servers/__pycache__/__init__.cpython-311.pyc differ diff --git a/cuga-apps/mcp_servers/_core/__pycache__/__init__.cpython-311.pyc b/cuga-apps/mcp_servers/_core/__pycache__/__init__.cpython-311.pyc index d3dc48e..5f8373b 100644 Binary files a/cuga-apps/mcp_servers/_core/__pycache__/__init__.cpython-311.pyc and b/cuga-apps/mcp_servers/_core/__pycache__/__init__.cpython-311.pyc differ diff --git a/cuga-apps/mcp_servers/_core/__pycache__/errors.cpython-311.pyc b/cuga-apps/mcp_servers/_core/__pycache__/errors.cpython-311.pyc index 246c1fa..f239133 100644 Binary files a/cuga-apps/mcp_servers/_core/__pycache__/errors.cpython-311.pyc and b/cuga-apps/mcp_servers/_core/__pycache__/errors.cpython-311.pyc differ diff --git a/cuga-apps/mcp_servers/_core/__pycache__/http.cpython-311.pyc b/cuga-apps/mcp_servers/_core/__pycache__/http.cpython-311.pyc index 6885d17..e754adf 100644 Binary files a/cuga-apps/mcp_servers/_core/__pycache__/http.cpython-311.pyc and b/cuga-apps/mcp_servers/_core/__pycache__/http.cpython-311.pyc differ diff --git a/cuga-apps/mcp_servers/_core/__pycache__/serve.cpython-311.pyc b/cuga-apps/mcp_servers/_core/__pycache__/serve.cpython-311.pyc index 4e124ac..f657429 100644 Binary files a/cuga-apps/mcp_servers/_core/__pycache__/serve.cpython-311.pyc and b/cuga-apps/mcp_servers/_core/__pycache__/serve.cpython-311.pyc differ diff --git a/cuga-apps/mcp_servers/finance/server.py b/cuga-apps/mcp_servers/finance/server.py index 21852c2..ecec5ce 100644 --- a/cuga-apps/mcp_servers/finance/server.py +++ b/cuga-apps/mcp_servers/finance/server.py @@ -20,11 +20,14 @@ from apps._ports import MCP_FINANCE_PORT # noqa: E402 try: - from apps._usage import track_call + from apps._usage import track_call, classify_error except Exception: # noqa: BLE001 — tracking is optional, never block the server def track_call(*_a, **_k): # type: ignore pass + def classify_error(_exc): # type: ignore + return "error" + mcp = make_server("mcp-finance") _COINGECKO = "https://api.coingecko.com/api/v3" @@ -114,7 +117,7 @@ def get_stock_quote(symbol: str, api_key: str | None = None) -> str: }) track_call("alpha_vantage", app="mcp-finance") except Exception as exc: - track_call("alpha_vantage", app="mcp-finance", ok=False) + track_call("alpha_vantage", app="mcp-finance", ok=False, code=classify_error(exc)) return tool_error(f"Alpha Vantage failed: {exc}", code="upstream") if "Note" in data or "Information" in data: diff --git a/cuga-apps/mcp_servers/geo/server.py b/cuga-apps/mcp_servers/geo/server.py index 46f34be..bf1e4df 100644 --- a/cuga-apps/mcp_servers/geo/server.py +++ b/cuga-apps/mcp_servers/geo/server.py @@ -3,13 +3,14 @@ Tools: - geocode(place) Nominatim (OpenStreetMap) - find_hikes(lat, lon, radius_km, ...) Overpass API (OSM) - - search_attractions(lat, lon, category, limit) OpenTripMap + - search_attractions(lat, lon, category, limit) Overpass API (OSM) - get_weather(city, travel_month) wttr.in -All free except search_attractions (OPENTRIPMAP_API_KEY, free tier 500/day). +All free, no API keys required. """ from __future__ import annotations +import math import os import sys from pathlib import Path @@ -27,9 +28,62 @@ _NOMINATIM = "https://nominatim.openstreetmap.org/search" _OVERPASS = "https://overpass-api.de/api/interpreter" -_OPENTRIPMAP = "https://api.opentripmap.com/0.1/en/places/radius" _WTTR = "https://wttr.in" +# OpenTripMap-compatible category names → OSM tag filters (key, value-regex). +# Keyless: served from the same Overpass API that powers find_hikes. We keep +# the public category vocabulary identical to the old OpenTripMap version so +# callers (travel_planner, city_beat, hiking_research) need no changes. +_ATTRACTION_TAGS: dict[str, list[tuple[str, str]]] = { + "interesting_places": [ + ("tourism", "attraction|museum|gallery|viewpoint|artwork|theme_park|zoo|aquarium"), + ("historic", "monument|memorial|castle|ruins|archaeological_site|fort|monastery|tower"), + ("leisure", "park|garden"), + ], + "cultural": [ + ("tourism", "museum|gallery|artwork|arts_centre"), + ("amenity", "theatre|arts_centre"), + ("historic", "monument|memorial|monastery"), + ], + "historic": [ + ("historic", "monument|memorial|castle|ruins|archaeological_site|fort|" + "city_gate|tower|monastery|building|church|temple"), + ("tourism", "museum"), + ], + "natural": [ + ("leisure", "park|nature_reserve|garden"), + ("natural", "peak|beach|waterfall|cave_entrance|spring"), + ("tourism", "viewpoint"), + ], + "architecture": [ + ("tourism", "attraction"), + ("historic", "monument|castle|tower|city_gate|building"), + ("man_made", "tower|lighthouse|bridge"), + ], + "amusements": [ + ("tourism", "theme_park|zoo|aquarium"), + ("leisure", "water_park|amusement_arcade"), + ], + "sport": [ + ("leisure", "stadium|sports_centre|track|pitch|golf_course"), + ], + "foods": [ + ("amenity", "marketplace"), + ("tourism", "attraction"), + ], +} + + +def _haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> int: + """Great-circle distance in meters between two lat/lon points.""" + r = 6371000.0 + p1, p2 = math.radians(lat1), math.radians(lat2) + dphi = math.radians(lat2 - lat1) + dlmb = math.radians(lon2 - lon1) + a = (math.sin(dphi / 2) ** 2 + + math.cos(p1) * math.cos(p2) * math.sin(dlmb / 2) ** 2) + return int(round(r * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a)))) + @mcp.tool() def geocode(place: str) -> str: @@ -135,54 +189,82 @@ def search_attractions( limit: int = 15, radius_m: int = 20000, ) -> str: - """Find top attractions near a coordinate via OpenTripMap. + """Find top attractions near a coordinate via OpenStreetMap (Overpass). - Use geocode first to obtain lat/lon. Categories: + Keyless — no API key required. Use geocode first to obtain lat/lon. + Categories: interesting_places | cultural | historic | natural | architecture | amusements | sport | foods + Returns named, real places only (museums, monuments, parks, galleries, + viewpoints, etc.) sorted nearest-first, each with the distance from the + search point and any website/wikipedia tag OSM carries for grounding. + Args: lat: Latitude. lon: Longitude. category: One of the categories listed above. - limit: Max results (default 15, max 20). - radius_m: Search radius in meters (default 20000). - - Env: - OPENTRIPMAP_API_KEY required. + limit: Max results (default 15, max 40). + radius_m: Search radius in meters (default 20000, max 50000). """ - api_key = os.getenv("OPENTRIPMAP_API_KEY") - if not api_key: - return tool_error("OPENTRIPMAP_API_KEY not set on the MCP server.", code="missing_key") - try: - places = get_json( - _OPENTRIPMAP, - params={ - "radius": radius_m, - "lon": lon, - "lat": lat, - "kinds": category, - "limit": min(int(limit), 20), - "apikey": api_key, - "format": "json", - "rate": 2, - }, + tag_filters = _ATTRACTION_TAGS.get(category) + if tag_filters is None: + return tool_error( + f"Unknown category '{category}'. Valid: " + f"{', '.join(sorted(_ATTRACTION_TAGS))}.", + code="bad_input", ) + radius_m = min(max(int(radius_m), 500), 50000) + limit = min(max(int(limit), 1), 40) + + # Build a union over node+way for every (key, value-regex) in the category. + blocks = [] + for key, val in tag_filters: + for kind in ("node", "way"): + blocks.append(f'{kind}["{key}"~"^({val})$"]["name"](around:{radius_m},{lat},{lon});') + query = f"[out:json][timeout:25];({' '.join(blocks)});out tags center 80;" + + try: + data = get_json(_OVERPASS, params={"data": query}) + seen: set[str] = set() results = [] - for p in places or []: - name = (p.get("name") or "").strip() - if not name: + for el in data.get("elements", []): + tags = el.get("tags", {}) or {} + name = (tags.get("name") or "").strip() + if not name or name.lower() in seen: continue + seen.add(name.lower()) + center = el.get("center") or {} + plat = el.get("lat", center.get("lat")) + plon = el.get("lon", center.get("lon")) + dist = (_haversine_m(lat, lon, plat, plon) + if plat is not None and plon is not None else None) + # "kinds" mirrors OpenTripMap's comma-joined descriptor so existing + # callers that read .kinds keep working. + kinds = ",".join( + str(tags[k]) for k in ("tourism", "historic", "leisure", + "natural", "amenity", "man_made") + if tags.get(k) + ) results.append({ - "name": name, - "kinds": p.get("kinds", ""), - "dist_m": p.get("dist"), - "xid": p.get("xid"), - "point": p.get("point"), + "name": name, + "kinds": kinds, + "dist_m": dist, + "lat": plat, + "lon": plon, + "address": tags.get("addr:street", ""), + "website": tags.get("website") or tags.get("contact:website") or "", + "wikipedia": tags.get("wikipedia", ""), + "osm": f"https://www.openstreetmap.org/{el.get('type')}/{el.get('id')}", }) - return tool_result({"category": category, "attractions": results}) + results.sort(key=lambda r: (r["dist_m"] is None, r["dist_m"] or 0)) + return tool_result({ + "category": category, + "count": len(results[:limit]), + "attractions": results[:limit], + }) except Exception as exc: - return tool_error(f"OpenTripMap failed: {exc}", code="upstream") + return tool_error(f"Overpass query failed: {exc}", code="upstream") @mcp.tool() diff --git a/cuga-apps/mcp_servers/knowledge/server.py b/cuga-apps/mcp_servers/knowledge/server.py index 6cef4ba..f0010ed 100644 --- a/cuga-apps/mcp_servers/knowledge/server.py +++ b/cuga-apps/mcp_servers/knowledge/server.py @@ -25,6 +25,15 @@ from mcp_servers._core.serve import make_server, run from apps._ports import MCP_KNOWLEDGE_PORT # noqa: E402 +try: + from apps._usage import track_call, classify_error +except Exception: # noqa: BLE001 — tracking is optional, never block the server + def track_call(*_a, **_k): # type: ignore + pass + + def classify_error(_exc): # type: ignore + return "error" + mcp = make_server("mcp-knowledge") # ── Wikipedia ────────────────────────────────────────────────────────── @@ -55,8 +64,10 @@ def search_wikipedia(query: str, max_results: int = 6) -> str: "snippet": _strip_html(h.get("snippet", "")), "url": f"https://en.wikipedia.org/wiki/{h.get('title', '').replace(' ', '_')}", } for h in hits] + track_call("wikipedia", app="mcp-knowledge") return tool_result({"results": results}) except Exception as exc: + track_call("wikipedia", app="mcp-knowledge", ok=False, code=classify_error(exc)) return tool_error(f"Wikipedia search failed: {exc}", code="upstream") @@ -106,8 +117,10 @@ def _wiki_article(title: str, full: bool) -> str: } # Preserve the per-mode field name callers already expect. result["extract" if full else "summary"] = page.get("extract", "") + track_call("wikipedia", app="mcp-knowledge") return tool_result(result) except Exception as exc: + track_call("wikipedia", app="mcp-knowledge", ok=False, code=classify_error(exc)) return tool_error(f"Wikipedia fetch failed: {exc}", code="upstream") diff --git a/cuga-apps/mcp_servers/web/server.py b/cuga-apps/mcp_servers/web/server.py index ee292e9..96e6527 100644 --- a/cuga-apps/mcp_servers/web/server.py +++ b/cuga-apps/mcp_servers/web/server.py @@ -26,11 +26,14 @@ from apps._ports import MCP_WEB_PORT # noqa: E402 try: - from apps._usage import track_call + from apps._usage import track_call, classify_error except Exception: # noqa: BLE001 — tracking is optional, never block the server def track_call(*_a, **_k): # type: ignore pass + def classify_error(_exc): # type: ignore + return "error" + mcp = make_server("mcp-web") @@ -61,7 +64,7 @@ def web_search(query: str, max_results: int = 6) -> str: track_call("tavily", app="mcp-web") return tool_result(raw) except Exception as exc: - track_call("tavily", app="mcp-web", ok=False) + track_call("tavily", app="mcp-web", ok=False, code=classify_error(exc)) return tool_error(f"Tavily search failed: {exc}", code="upstream") diff --git a/cuga-apps/ui/src/components/Layout.tsx b/cuga-apps/ui/src/components/Layout.tsx index b19d8d0..e1cffe9 100644 --- a/cuga-apps/ui/src/components/Layout.tsx +++ b/cuga-apps/ui/src/components/Layout.tsx @@ -1,16 +1,11 @@ import { NavLink } from 'react-router-dom' import { useTheme } from '../hooks/useTheme' -import { statsDashboardUrl } from '../data/deployment' const NAV = [ { to: '/', label: 'Apps', end: true }, { to: '/mcp-servers', label: 'MCP Servers', end: false }, ] -// External link to the bundled usage/stats dashboard (resolved per deployment -// mode). Opens in a new tab since it's a separate app, not a UI route. -const STATS_URL = statsDashboardUrl() - // Where to send feedback / bug reports — the cuga-apps GitHub issue tracker. const FEEDBACK_URL = 'https://github.com/cuga-project/cuga-apps/issues/new' @@ -52,16 +47,6 @@ export default function Layout({ children }: { children: React.ReactNode }) { {item.label} ))} - {STATS_URL && ( - - Stats ↗ - - )} demoPath ? `${REPO_TREE}/${demoPath}` : 'https://github.com/cuga-project/cuga-apps' @@ -195,6 +197,22 @@ const shipOrderIndex = (id: string) => { return i === -1 ? Number.MAX_SAFE_INTEGER : i } +// Curated display order for the "Additional apps" (for-later + exploratory), +// ranked by how well they actually work (status) × capability richness / +// distinctiveness. Genuinely partial / not-working entries sink to the bottom. +// Ids not listed fall to the end, preserving source order. +const ADDITIONAL_ORDER = [ + 'code-reviewer', 'video-qa', 'bird-invocable-api', 'trip-designer', 'api-doc-gen', + 'voice-journal', 'brief-budget', 'drop-summarizer', 'smart-todo', 'ibm-whats-new', + 'chief-of-staff', 'box-qa', + // Pinned to the bottom by request. + 'deck-forge', 'code-engine-deployer', +] +const additionalOrderIndex = (id: string) => { + const i = ADDITIONAL_ORDER.indexOf(id) + return i === -1 ? Number.MAX_SAFE_INTEGER : i +} + // ── Domain buckets (mirrors docs/apps_overview.svg) ─────────────────────────── type BucketAccent = 'indigo' | 'emerald' | 'amber' | 'pink' | 'cyan' | 'violet' | 'slate' @@ -256,8 +274,6 @@ function DomainBuckets({ activeBucket: string | null onSelectBucket: (id: string | null) => void }) { - const navigate = useNavigate() - const ucById = useMemo(() => { const m = new Map() for (const uc of useCases) m.set(uc.id, uc) @@ -328,9 +344,9 @@ function DomainBuckets({ {apps.map((uc) => (