zzet · zzet · May 14, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/.gitignore b/.gitignore
@@ -27,6 +27,7 @@ Thumbs.db
 
 # Debug
 __debug_bin*
+/debug/
 
 # Python
 __pycache__/

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -20,17 +20,43 @@ go test -race ./...                 # all test packages must pass
 
 Gortex is running as an MCP server. You MUST use graph queries instead of file reads whenever possible. This saves thousands of tokens per task.
 
-### Optional: delegate research to a local agent
+### Optional: LLM features and provider selection
 
-When the daemon is built with `-tags llama` and `llm.model` is set in `.gortex.yaml` (or via the `GORTEX_LLM_MODEL` env var), the `ask` MCP tool is registered. It runs a grammar-constrained agent locally that uses gortex tools to research one question and returns a synthesized answer — useful when you'd otherwise issue many `search_symbols` / `get_callers` / `contracts` calls.
+The `ask` tool and the `search_symbols` `assist` modes are backed by an LLM provider, selected by the `llm.provider` config key (in `.gortex.yaml` or `~/.config/gortex/config.yaml`):
+
+| `llm.provider` | Backend | Requires |
+|----------------|---------|----------|
+| `local` (default) | in-process llama.cpp | a `-tags llama` build + `llm.local.model` (a `.gguf` path) |
+| `anthropic` | Anthropic Messages API | `llm.anthropic.model` + `ANTHROPIC_API_KEY` |
+| `openai` | OpenAI Chat Completions | `llm.openai.model` + `OPENAI_API_KEY` |
+| `ollama` | Ollama daemon | `llm.ollama.model` (+ `llm.ollama.host`, default `localhost:11434`) |
+
+The HTTP providers are pure Go — available without `-tags llama`. `GORTEX_LLM_PROVIDER` / `GORTEX_LLM_MODEL` env vars override the file config. If the active provider can't be constructed (missing model / API key, or `local` without `-tags llama`), the daemon logs a warning and the LLM features stay absent.
+
+### Optional: delegate research to the `ask` agent
+
+When a provider is configured, the `ask` MCP tool is registered. It runs a structured tool-calling agent that uses gortex tools to research one question and returns a synthesized answer — useful when you'd otherwise issue many `search_symbols` / `get_callers` / `contracts` calls.
 
 | When you'd otherwise...               | Consider...                              |
 |---------------------------------------|------------------------------------------|
 | Run many calls to answer one open-ended question | `ask` (one call, ~5-30s, ~200-400 token answer) |
 | Trace a request across repos (consumer → contract → handler → downstream) | `ask` with `chain: true` |
 | Look up a single known fact | Skip `ask` — direct tools are faster |
 
-If `ask` isn't in `tools/list`, gortex was built without `-tags llama` or `llm.model` is unset. Fall through to direct tools.
+If `ask` isn't in `tools/list`, no LLM provider is configured (or it failed to construct). Fall through to direct tools.
+
+### Optional: LLM-assisted search ranking (`search_symbols` `assist:` arg)
+
+When a provider is configured, `search_symbols` accepts an `assist` argument that engages the model in the search pipeline. The default `auto` is sub-100 ms on identifier lookups; the active modes add latency but materially improve precision on natural-language queries.
+
+| `assist` value | Behaviour | Cost |
+|----------------|-----------|------|
+| `auto` (default) | NL heuristic decides per-query. Identifier-shaped queries (`Server.handleAsk`, `parseToolCall`) skip the LLM. NL queries (≥3 tokens with a stop word, or ≥4 plain-word tokens) trigger query expansion + name+sig rerank. | None for identifier lookups; +200–500 ms for NL. |
+| `on` | Forces expansion + name+sig rerank regardless of shape. Use when you know the query is fuzzy. | +200–500 ms. |
+| `off` | Pure BM25 + combo/frecency. No LLM. | None. |
+| `deep` | `on` plus a body-grounded verification pass — reads each top candidate's body + callers and HONESTLY drops candidates whose code isn't about the query. May return zero results when nothing genuinely matches; that's the load-bearing honest-negative signal. | +1.5–4 s. Quality is **highly model-dependent**: small local models (Qwen2.5-Coder 3B) are unreliable on disambiguation cases (e.g. "hash passwords" vs functions that hash other data); a 7B-class local model or any hosted provider produces stable, useful results. The assist prompts are tiered automatically — terser for hosted frontier models, rule-heavy for small local ones. |
+
+The response gains an `assist` debug block when an active mode engaged: `terms` (expansion words), `primary_count` (raw BM25 hits on the original query), `merged_count` (after expansion union), `final_count` (after filter/rerank), plus `verify_kept_ids` / `verify_dropped` for `deep`.
 
 ### Navigation and Reading
 

diff --git a/README.md b/README.md
@@ -57,6 +57,7 @@ For Homebrew, package managers (`.deb` / `.rpm` / `.apk`), direct binary downloa
 - **Infrastructure graph layer** — first-class `KindResource` (Kubernetes Deployments, Services, Ingresses, ConfigMaps, Secrets, CronJobs), `KindKustomization` (overlay tree), and `KindImage` (Dockerfile FROM targets and K8s `container.image`) with `depends_on` / `configures` / `mounts` / `exposes` / `uses_env` edges. Cross-references with code-side `os.Getenv` calls automatically. Surfaced via `analyze` `kind: "k8s_resources" / "kustomize" / "images"`
 - **CPG-lite dataflow** — `value_flow` (intra-procedural assignment / return / range), `arg_of` (caller arg → callee param), and `returns_to` (callee → assignment LHS) edges built at index time. `flow_between` returns ranked dataflow paths between two symbol IDs; `taint_paths` does pattern-driven source→sink sweeps for security audits
 - **3 MCP prompts** — `pre_commit`, `orientation`, `safe_to_change` for guided workflows
+- **LLM features (optional)** — opt-in `ask` research agent + LLM-assisted `search_symbols` ranking, behind a pluggable provider (`local` llama.cpp / Anthropic / OpenAI / Ollama). Off by default; the HTTP providers need no native dependencies. See [LLM Features](#llm-features-optional)
 - **Two-tier config** — global config (`~/.config/gortex/config.yaml`) for projects and repo lists, per-repo `.gortex.yaml` for guards, excludes, and local overrides
 - **Guard rules** — project-specific constraints (co-change, boundary) enforced via `check_guards`
 - **Watch mode** — surgical graph updates on file change across all tracked repos, live sync with agents
@@ -565,6 +566,60 @@ go build -tags embeddings_onnx ./cmd/gortex/   # needs: brew install onnxruntime
 go build -tags embeddings_gomlx ./cmd/gortex/  # auto-downloads XLA plugin
 ```
 
+## LLM Features (optional)
+
+Gortex can delegate code-intelligence work to an LLM. Two features, both **off by default** and gated on configuring a provider:
+
+- **`ask` MCP tool** — a research agent that drives Gortex's own tools (search, callers, contracts, dependencies) to answer an open-ended question and returns a synthesized answer, instead of the calling agent issuing many tool calls itself. `chain: true` traces cross-system call chains.
+- **`search_symbols` `assist` arg** — LLM-assisted ranking on `search_symbols`: `auto` (engage on natural-language queries only), `on`, `off`, `deep` (adds a body-grounded verification pass that reads candidate code + callers and honestly drops irrelevant matches).
+
+### Providers
+
+The backend is chosen by the `llm.provider` key. The three HTTP providers are pure Go — available in any build; only `local` needs a `-tags llama` build (it embeds llama.cpp).
+
+| `llm.provider` | Backend | Needs |
+|----------------|---------|-------|
+| `local` | in-process llama.cpp | a `-tags llama` build + a `.gguf` model file |
+| `anthropic` | Anthropic Messages API | `ANTHROPIC_API_KEY` |
+| `openai` | OpenAI Chat Completions | `OPENAI_API_KEY` |
+| `ollama` | Ollama daemon | a running Ollama + a pulled model |
+
+### Configuration
+
+The `llm:` block goes in `~/.config/gortex/config.yaml` or a per-repo `.gortex.yaml` (repo-local wins per field, global fills the rest). Configure only the provider you use:
+
+```yaml
+# ~/.config/gortex/config.yaml (or per-repo .gortex.yaml)
+llm:
+  provider: local            # local | anthropic | openai | ollama
+  max_steps: 16              # agent tool-loop cap (provider-agnostic)
+
+  local:                     # provider: local — requires a `-tags llama` build
+    model: ~/models/qwen2.5-coder-7b-instruct-q4_k_m.gguf
+    ctx: 4096                # context window in tokens
+    gpu_layers: 999          # layers to offload to GPU (0 = CPU-only)
+    template: chatml         # chatml | llama3
+
+  anthropic:                 # provider: anthropic
+    model: claude-sonnet-4-6
+    api_key_env: ANTHROPIC_API_KEY   # env var holding the key (this is the default)
+    # base_url: https://api.anthropic.com
+
+  openai:                    # provider: openai
+    model: gpt-4o
+    api_key_env: OPENAI_API_KEY
+
+  ollama:                    # provider: ollama
+    model: qwen2.5-coder:7b
+    host: http://localhost:11434
+```
+
+Env overrides: `GORTEX_LLM_PROVIDER`, `GORTEX_LLM_MODEL` (targets the active provider's model), `GORTEX_LLM_MAX_STEPS`. API keys are read from the env var named by `api_key_env` — never stored in the config file.
+
+If the active provider can't be constructed (missing model or API key, or `local` without a `-tags llama` build), the daemon logs a warning and the LLM features stay absent — the rest of Gortex is unaffected. If the `ask` tool isn't in `tools/list`, no provider is configured.
+
+The `assist` prompts are tiered automatically — terser for hosted frontier models, rule-heavy for small local ones. `deep` mode in particular benefits from a 7B-class or hosted model; small local models are unreliable on its disambiguation cases.
+
 ## Token Savings
 
 Gortex tracks how many tokens it saves compared to naive file reads — per-call, per-session, and cumulative across restarts:

diff --git a/cmd/gortex/daemon_state.go b/cmd/gortex/daemon_state.go
@@ -302,13 +302,17 @@ func buildDaemonState(logger *zap.Logger) (*daemonState, error) {
 		logger.Warn("daemon: savings persistence disabled", zap.Error(err))
 	}
 
-	// In-process LLM service (opt-in via `.gortex.yaml` `llm.model:` or
-	// GORTEX_LLM_MODEL env var). Builds and attaches an in-process
-	// backend wired to this engine + contract registry, then registers
-	// the `ask` MCP tool. No-op when cfg.LLM is empty after env-merge,
-	// or when gortex was built without `-tags llama` (stub service +
-	// stub registerLLMTools).
-	srv.SetupLLM(cfg.LLM)
+	// LLM service (opt-in via the `.gortex.yaml` `llm:` block,
+	// `~/.config/gortex/config.yaml::llm:`, or GORTEX_LLM_* env vars).
+	// Repo-local config wins per non-zero field; the global config
+	// fills the rest; env overrides land last inside SetupLLM via
+	// MergeEnv. The active provider is chosen by `llm.provider`
+	// (local / anthropic / openai / ollama). No-op when the active
+	// provider has no model configured; a provider that fails to
+	// construct (e.g. "local" without `-tags llama`, or a missing API
+	// key) is logged and the service stays disabled.
+	gc, _ := config.LoadGlobal()
+	srv.SetupLLM(gc.MergeLLMInto(cfg.LLM))
 
 	// MultiWatcher is created in warmupDaemonState after tracked repos
 	// have been re-indexed — NewMultiWatcher needs mi.AllMetadata() to be

diff --git a/cmd/gortex/mcp.go b/cmd/gortex/mcp.go
@@ -345,9 +345,12 @@ func runMCP(cmd *cobra.Command, args []string) error {
 		fmt.Fprintf(os.Stderr, "[gortex] savings persistence disabled: %v\n", err)
 	}
 
-	// In-process LLM service — same wiring as the daemon path. No-op
-	// when cfg.LLM is empty or gortex was built without `-tags llama`.
-	srv.SetupLLM(cfg.LLM)
+	// LLM service — same wiring as the daemon path: repo config wins
+	// per non-zero field, global ~/.config/gortex/config.yaml fills the
+	// rest, env vars override last inside SetupLLM. The active provider
+	// is chosen by `llm.provider` (local / anthropic / openai / ollama).
+	gc, _ := config.LoadGlobal()
+	srv.SetupLLM(gc.MergeLLMInto(cfg.LLM))
 
 	fmt.Fprintf(os.Stderr, "[gortex] MCP server ready (transport: %s)\n", mcpTransport)
 

diff --git a/internal/config/config.go b/internal/config/config.go
@@ -230,10 +230,11 @@ type Config struct {
 	Guards   GuardsConfig    `mapstructure:"guards"   yaml:"guards,omitempty"`
 	Multi    MultiRepoConfig `mapstructure:"multi"    yaml:"multi,omitempty"`
 	Semantic SemanticConfig  `mapstructure:"semantic" yaml:"semantic,omitempty"`
-	// LLM configures the in-process local-LLM service that backs the
-	// `ask` MCP tool (and future wiki / doc generators). Empty by
-	// default — daemon skips LLM wiring entirely when llm.model is
-	// unset. Env vars GORTEX_LLM_* override file values; see
+	// LLM configures the LLM service that backs the `ask` MCP tool and
+	// the search-assist passes. Empty by default — daemon skips LLM
+	// wiring entirely when the active provider has no model configured.
+	// The `llm.provider` key selects the backend (local / anthropic /
+	// openai / ollama); env vars GORTEX_LLM_* override file values; see
 	// internal/llm/config.go::Config.MergeEnv.
 	LLM llm.Config `mapstructure:"llm" yaml:"llm,omitempty"`
 }

diff --git a/internal/config/global.go b/internal/config/global.go
@@ -9,6 +9,8 @@ import (
 	"sync"
 
 	"gopkg.in/yaml.v3"
+
+	"github.com/zzet/gortex/internal/llm"
 )
 
 var (
@@ -54,10 +56,55 @@ type GlobalConfig struct {
 	// baseline and below per-RepoEntry / workspace lists.
 	Exclude []string `mapstructure:"exclude" yaml:"exclude,omitempty"`
 
+	// LLM is the user-level local-LLM service config (`llm.model:` etc.).
+	// Merged into the repo-local Config.LLM at daemon startup via
+	// MergeLLMInto — local non-zero fields win, global fills the rest.
+	// Lets users keep model paths and tuning in one place across repos
+	// without duplicating an `llm:` block in every `.gortex.yaml`.
+	LLM llm.Config `mapstructure:"llm" yaml:"llm,omitempty"`
+
 	// configPath stores the file path used for Save(). Set by LoadGlobal or SetConfigPath.
 	configPath string `yaml:"-"`
 }
 
+// MergeLLMInto layers a repo-local llm.Config over the global user
+// config: each zero-valued field of local is filled from gc.LLM,
+// per provider sub-block. Local non-zero values always win — including
+// an explicit per-repo override of an inherited global model path.
+// Safe to call on a nil receiver (returns local unchanged), so daemon
+// startup paths don't need separate nil-checks for the global config.
+//
+// The local provider's model path additionally gets `~/` expanded
+// against $HOME so users can write portable paths in either config.
+func (gc *GlobalConfig) MergeLLMInto(local llm.Config) llm.Config {
+	if gc != nil {
+		local = local.MergedWith(gc.LLM)
+	}
+	local.Local.Model = expandHome(local.Local.Model)
+	return local
+}
+
+// expandHome resolves a leading `~/` in a path against $HOME so users
+// can write portable model paths in their global config. No-op when
+// the path is empty, absolute without `~`, or `~` is not the first
+// character. Returns the input unchanged on any os.UserHomeDir error.
+func expandHome(p string) string {
+	if p == "" || !strings.HasPrefix(p, "~") {
+		return p
+	}
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return p
+	}
+	if p == "~" {
+		return home
+	}
+	if strings.HasPrefix(p, "~/") {
+		return filepath.Join(home, p[2:])
+	}
+	return p
+}
+
 // DefaultGlobalConfigPath returns the default path: ~/.config/gortex/config.yaml.
 //
 // Resolved fresh on every call so HOME changes (notably t.Setenv in tests)

diff --git a/internal/config/global_llm_test.go b/internal/config/global_llm_test.go
@@ -0,0 +1,143 @@
+package config
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/zzet/gortex/internal/llm"
+)
+
+func TestLoadGlobal_LLMSectionRoundTrip(t *testing.T) {
+	dir := t.TempDir()
+	cfgPath := filepath.Join(dir, "config.yaml")
+	require.NoError(t, os.WriteFile(cfgPath, []byte(`active_project: ""
+repos: []
+llm:
+    provider: local
+    max_steps: 12
+    local:
+        model: /opt/models/qwen.gguf
+        template: chatml
+        ctx: 4096
+        gpu_layers: 999
+    anthropic:
+        model: claude-sonnet-4-6
+`), 0o644))
+
+	gc, err := LoadGlobal(cfgPath)
+	require.NoError(t, err)
+	require.NotNil(t, gc)
+	assert.Equal(t, "local", gc.LLM.Provider)
+	assert.Equal(t, 12, gc.LLM.MaxSteps)
+	assert.Equal(t, "/opt/models/qwen.gguf", gc.LLM.Local.Model)
+	assert.Equal(t, "chatml", gc.LLM.Local.Template)
+	assert.Equal(t, 4096, gc.LLM.Local.Ctx)
+	assert.Equal(t, 999, gc.LLM.Local.GPULayers)
+	assert.Equal(t, "claude-sonnet-4-6", gc.LLM.Anthropic.Model)
+}
+
+func TestGlobalConfig_MergeLLMInto_FillsZeroFields(t *testing.T) {
+	gc := &GlobalConfig{LLM: llm.Config{
+		Provider: "local",
+		MaxSteps: 16,
+		Local: llm.LocalConfig{
+			Model:     "/global/qwen.gguf",
+			Template:  "chatml",
+			Ctx:       4096,
+			GPULayers: 999,
+		},
+	}}
+
+	got := gc.MergeLLMInto(llm.Config{})
+	assert.Equal(t, "local", got.Provider)
+	assert.Equal(t, 16, got.MaxSteps)
+	assert.Equal(t, "/global/qwen.gguf", got.Local.Model)
+	assert.Equal(t, "chatml", got.Local.Template)
+	assert.Equal(t, 4096, got.Local.Ctx)
+	assert.Equal(t, 999, got.Local.GPULayers)
+}
+
+func TestGlobalConfig_MergeLLMInto_LocalWinsPerField(t *testing.T) {
+	gc := &GlobalConfig{LLM: llm.Config{
+		Provider: "local",
+		MaxSteps: 16,
+		Local: llm.LocalConfig{
+			Model:    "/global/qwen.gguf",
+			Template: "chatml",
+			Ctx:      4096,
+		},
+	}}
+
+	got := gc.MergeLLMInto(llm.Config{
+		Local: llm.LocalConfig{
+			Model: "/repo/override.gguf", // local wins
+			Ctx:   8192,                  // local wins
+		},
+	})
+	assert.Equal(t, "/repo/override.gguf", got.Local.Model)
+	assert.Equal(t, 8192, got.Local.Ctx)
+	// Unset locals fall through to global.
+	assert.Equal(t, "chatml", got.Local.Template)
+	assert.Equal(t, 16, got.MaxSteps)
+	assert.Equal(t, "local", got.Provider)
+}
+
+func TestGlobalConfig_MergeLLMInto_PerProviderSubBlocks(t *testing.T) {
+	gc := &GlobalConfig{LLM: llm.Config{
+		Anthropic: llm.RemoteConfig{Model: "claude-sonnet-4-6", APIKeyEnv: "ANTHROPIC_API_KEY"},
+		Ollama:    llm.OllamaConfig{Host: "http://localhost:11434"},
+	}}
+
+	// Repo selects a different provider and overrides only one field.
+	got := gc.MergeLLMInto(llm.Config{
+		Provider:  "anthropic",
+		Anthropic: llm.RemoteConfig{Model: "claude-opus-4-7"},
+	})
+	assert.Equal(t, "anthropic", got.Provider)
+	assert.Equal(t, "claude-opus-4-7", got.Anthropic.Model)       // local wins
+	assert.Equal(t, "ANTHROPIC_API_KEY", got.Anthropic.APIKeyEnv) // global fills
+	assert.Equal(t, "http://localhost:11434", got.Ollama.Host)    // unrelated block still merges
+}
+
+func TestGlobalConfig_MergeLLMInto_NilReceiver(t *testing.T) {
+	var gc *GlobalConfig // nil
+	local := llm.Config{Local: llm.LocalConfig{Model: "/repo/x.gguf"}}
+	got := gc.MergeLLMInto(local)
+	assert.Equal(t, "/repo/x.gguf", got.Local.Model)
+}
+
+func TestGlobalConfig_MergeLLMInto_ExpandsHomeInModelPath(t *testing.T) {
+	home, err := os.UserHomeDir()
+	require.NoError(t, err)
+
+	gc := &GlobalConfig{LLM: llm.Config{Local: llm.LocalConfig{Model: "~/models/qwen.gguf"}}}
+	got := gc.MergeLLMInto(llm.Config{})
+	assert.Equal(t, filepath.Join(home, "models/qwen.gguf"), got.Local.Model)
+
+	// Local override also gets expanded.
+	got = gc.MergeLLMInto(llm.Config{Local: llm.LocalConfig{Model: "~/repo-override.gguf"}})
+	assert.Equal(t, filepath.Join(home, "repo-override.gguf"), got.Local.Model)
+}
+
+func TestExpandHome(t *testing.T) {
+	home, err := os.UserHomeDir()
+	require.NoError(t, err)
+
+	cases := []struct {
+		in, want string
+	}{
+		{"", ""},
+		{"/abs/path", "/abs/path"},
+		{"relative/path", "relative/path"},
+		{"~", home},
+		{"~/models/foo.gguf", filepath.Join(home, "models/foo.gguf")},
+		{"~weird", "~weird"}, // only `~/` form is expanded
+	}
+	for _, tc := range cases {
+		assert.Equal(t, tc.want, expandHome(tc.in), "in=%q", tc.in)
+	}
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -27,6 +27,7 @@ Thumbs.db @@
     # Debug
     __debug_bin*
+    /debug/
     # Python
     __pycache__/
@@ Expand Down @@