diff --git a/agent-schema.json b/agent-schema.json
index 758f5a361..c546e6e12 100644
--- a/agent-schema.json
+++ b/agent-schema.json
@@ -1402,24 +1402,26 @@
},
"allowed_domains": {
"type": "array",
- "description": "Allow-list of domains the fetch tool is permitted to fetch (only valid for type 'fetch'). A pattern matches the host exactly (case-insensitive) and any of its subdomains; e.g. 'example.com' matches 'example.com' and 'docs.example.com' but not 'badexample.com'. A leading dot ('.example.com') restricts the match to strict subdomains. Mutually exclusive with 'blocked_domains'.",
+ "description": "Allow-list of domains the fetch tool is permitted to fetch (only valid for type 'fetch'). Patterns are case-insensitive. A bare host ('example.com') matches the apex AND any subdomain. A leading dot ('.example.com') or wildcard ('*.example.com') matches strict subdomains only. CIDR ranges ('10.0.0.0/8', '::1/128') match when the URL host parses as an IP inside the network. Mutually exclusive with 'blocked_domains'.",
"items": {
"type": "string"
},
"examples": [
["docker.com", "docs.docker.com"],
- ["github.com", "raw.githubusercontent.com"]
+ ["github.com", "raw.githubusercontent.com"],
+ ["*.example.com"]
]
},
"blocked_domains": {
"type": "array",
- "description": "Deny-list of domains the fetch tool is forbidden to fetch (only valid for type 'fetch'). Uses the same matching rules as 'allowed_domains'. Mutually exclusive with 'allowed_domains'.",
+ "description": "Deny-list of domains the fetch tool is forbidden to fetch (only valid for type 'fetch'). Uses the same matching rules as 'allowed_domains' (bare host, leading-dot or '*.' subdomain wildcard, and CIDR ranges for IP hosts). Mutually exclusive with 'allowed_domains'.",
"items": {
"type": "string"
},
"examples": [
["internal.example.com"],
- ["169.254.169.254"]
+ ["169.254.169.254"],
+ ["169.254.0.0/16", "10.0.0.0/8"]
]
},
"url": {
diff --git a/docs/tools/fetch/index.md b/docs/tools/fetch/index.md
index ad64ffba2..d465ccd13 100644
--- a/docs/tools/fetch/index.md
+++ b/docs/tools/fetch/index.md
@@ -40,7 +40,9 @@ Domain patterns in `allowed_domains` and `blocked_domains` use the following rul
- **Bare domain** — `example.com` matches the host `example.com` _and_ any subdomain such as `docs.example.com`. It does **not** match unrelated hosts that share a suffix (e.g. `badexample.com`).
- **Leading dot** — `.example.com` matches **only** strict subdomains (`docs.example.com`, `a.b.example.com`), not the apex `example.com`.
+- **Wildcard glob** — `*.example.com` is an alias for the leading-dot form; the apex is excluded. The `*` is only valid as a leading `*.` token (entries like `foo.*`, `*.*.example.com`, or a bare `*` are rejected at config-load time).
- **IP literal** — IP addresses are matched exactly (`169.254.169.254`).
+- **CIDR range** — `169.254.0.0/16`, `10.0.0.0/8`, `::1/128`, `fc00::/7`. Matches when the URL's host parses as an IP inside the network. Hostname hosts never match a CIDR pattern. Malformed CIDRs are rejected at config-load time.
- **Trailing dots** in FQDN-form URLs (`http://example.com./`) are stripped before matching, so they cannot bypass a deny-list entry.
The lists are mutually exclusive: a single fetch toolset may set either `allowed_domains` or `blocked_domains`, but not both.
@@ -50,7 +52,7 @@ When a list is configured, every redirect target is re-checked against the same
⚠️ Limitations
-
Matching is purely string-based on the URL host. It does not perform DNS resolution and does not normalise alternative IP encodings (decimal 2852039166, hex 0xa9.0xfe.0xa9.0xfe, octal, IPv4-mapped IPv6, etc.). If you need to deny access to a specific IP, also list its alternative encodings, or block at the network layer.
+
Matching is purely string-based on the URL host. It does not perform DNS resolution and does not normalise alternative IP encodings (decimal 2852039166, hex 0xa9.0xfe.0xa9.0xfe, octal, etc. IPv4-mapped IPv6 addresses ARE normalized to their IPv4 form). If you need to deny access to a specific IP, also list its alternative encodings, or block at the network layer.
### Custom Timeout
@@ -78,8 +80,11 @@ toolsets:
toolsets:
- type: fetch
blocked_domains:
- - 169.254.169.254 # cloud metadata endpoint
- - internal.example.com # internal corporate hostnames
+ - 169.254.169.254 # cloud metadata endpoint (literal IP)
+ - 169.254.0.0/16 # entire link-local range (CIDR)
+ - 10.0.0.0/8 # RFC1918 private range
+ - "*.internal.example.com" # any subdomain (wildcard)
+ - internal.example.com # internal corporate hostname
```
## Tool Interface
diff --git a/examples/fetch_domain_filtering.yaml b/examples/fetch_domain_filtering.yaml
index cb1c8c3fa..9da8d11a0 100644
--- a/examples/fetch_domain_filtering.yaml
+++ b/examples/fetch_domain_filtering.yaml
@@ -36,6 +36,9 @@ agents:
toolsets:
- type: fetch
blocked_domains:
- - 169.254.169.254 # cloud instance metadata
- - 100.100.100.200 # alibaba/oracle metadata
+ - 169.254.169.254 # cloud instance metadata (literal IP)
+ - 169.254.0.0/16 # whole link-local range (CIDR)
+ - 10.0.0.0/8 # RFC1918 private network
+ - 100.100.100.200 # alibaba/oracle metadata
- metadata.google.internal
+ - "*.internal.example.com" # any subdomain (wildcard glob)
diff --git a/pkg/config/latest/validate.go b/pkg/config/latest/validate.go
index 42787f87b..859fd16c1 100644
--- a/pkg/config/latest/validate.go
+++ b/pkg/config/latest/validate.go
@@ -215,14 +215,46 @@ func (t *Toolset) validate() error {
return nil
}
-// validateDomainPatterns rejects empty / whitespace-only entries in a fetch
-// allow- or block-list, since they silently match nothing and turn the list
-// into a foot-gun (e.g. allowed_domains: [""] would reject every URL).
+// validateDomainPatterns rejects empty / whitespace-only entries and
+// malformed wildcard or CIDR patterns in a fetch allow- or block-list.
+//
+// Catching these at config-load time turns silent foot-guns (e.g.
+// `allowed_domains: [""]` rejecting every URL, `*.foo.*` matching nothing)
+// into actionable errors. Plain hostnames and the leading-dot subdomain form
+// are intentionally not validated for syntax — the matcher is purely
+// string-based and any non-conforming entry simply never matches.
func validateDomainPatterns(field string, patterns []string) error {
for i, p := range patterns {
- if strings.TrimSpace(p) == "" {
+ trimmed := strings.TrimSpace(p)
+ if trimmed == "" {
return fmt.Errorf("%s[%d] must not be empty", field, i)
}
+ if err := validateDomainPattern(trimmed); err != nil {
+ return fmt.Errorf("%s[%d] %q is invalid: %w", field, i, p, err)
+ }
+ }
+ return nil
+}
+
+// validateDomainPattern checks a single (already trimmed, non-empty) entry.
+func validateDomainPattern(p string) error {
+ // CIDR notation: must parse cleanly. We deliberately accept any /-bearing
+ // string as "intended to be a CIDR" so a typo like "10.0.0.0/33" is
+ // reported instead of being silently treated as a hostname.
+ if strings.Contains(p, "/") {
+ if _, _, err := net.ParseCIDR(p); err != nil {
+ return fmt.Errorf("not a valid CIDR: %w", err)
+ }
+ return nil
+ }
+ // Wildcards: only the leading "*." form is supported. Anything else
+ // ("foo.*", "*foo*", "**.example.com") would silently match nothing
+ // under the current matcher, which is almost never what the user wants.
+ if strings.Contains(p, "*") {
+ rest, ok := strings.CutPrefix(p, "*.")
+ if !ok || rest == "" || strings.Contains(rest, "*") {
+ return errors.New("'*' is only allowed as a leading '*.' wildcard, e.g. '*.example.com'")
+ }
}
return nil
}
diff --git a/pkg/config/latest/validate_test.go b/pkg/config/latest/validate_test.go
index 278aa3d70..e5945e252 100644
--- a/pkg/config/latest/validate_test.go
+++ b/pkg/config/latest/validate_test.go
@@ -327,6 +327,106 @@ agents:
`,
wantErr: "blocked_domains[0] must not be empty",
},
+ {
+ name: "fetch with wildcard subdomain pattern",
+ config: `
+version: "8"
+agents:
+ root:
+ model: "openai/gpt-4"
+ toolsets:
+ - type: fetch
+ allowed_domains:
+ - "*.example.com"
+`,
+ wantErr: "",
+ },
+ {
+ name: "fetch with ipv4 CIDR pattern",
+ config: `
+version: "8"
+agents:
+ root:
+ model: "openai/gpt-4"
+ toolsets:
+ - type: fetch
+ blocked_domains:
+ - 169.254.0.0/16
+ - 10.0.0.0/8
+`,
+ wantErr: "",
+ },
+ {
+ name: "fetch with ipv6 CIDR pattern",
+ config: `
+version: "8"
+agents:
+ root:
+ model: "openai/gpt-4"
+ toolsets:
+ - type: fetch
+ blocked_domains:
+ - "fc00::/7"
+ - "::1/128"
+`,
+ wantErr: "",
+ },
+ {
+ name: "malformed CIDR is rejected",
+ config: `
+version: "8"
+agents:
+ root:
+ model: "openai/gpt-4"
+ toolsets:
+ - type: fetch
+ blocked_domains:
+ - 10.0.0.0/33
+`,
+ wantErr: "not a valid CIDR",
+ },
+ {
+ name: "interior wildcard is rejected",
+ config: `
+version: "8"
+agents:
+ root:
+ model: "openai/gpt-4"
+ toolsets:
+ - type: fetch
+ allowed_domains:
+ - "foo.*"
+`,
+ wantErr: "'*' is only allowed as a leading '*.' wildcard",
+ },
+ {
+ name: "double wildcard is rejected",
+ config: `
+version: "8"
+agents:
+ root:
+ model: "openai/gpt-4"
+ toolsets:
+ - type: fetch
+ allowed_domains:
+ - "*.*.example.com"
+`,
+ wantErr: "'*' is only allowed as a leading '*.' wildcard",
+ },
+ {
+ name: "bare star is rejected",
+ config: `
+version: "8"
+agents:
+ root:
+ model: "openai/gpt-4"
+ toolsets:
+ - type: fetch
+ allowed_domains:
+ - "*"
+`,
+ wantErr: "'*' is only allowed as a leading '*.' wildcard",
+ },
}
for _, tt := range tests {
diff --git a/pkg/tools/builtin/fetch.go b/pkg/tools/builtin/fetch.go
index af85c994f..235608e70 100644
--- a/pkg/tools/builtin/fetch.go
+++ b/pkg/tools/builtin/fetch.go
@@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"io"
+ "net"
"net/http"
"net/url"
"slices"
@@ -296,18 +297,85 @@ func (h *fetchHandler) checkDomainAllowed(u *url.URL) error {
// matchesDomain reports whether host matches pattern (case-insensitive).
//
-// A bare pattern ("example.com") matches the host exactly or any subdomain
-// ("docs.example.com"); it does NOT match unrelated hosts that share a suffix
-// ("badexample.com"). A pattern with a leading dot (".example.com") matches
-// strict subdomains only — the apex "example.com" is excluded.
+// Supported pattern shapes:
+//
+// - **Bare domain** ("example.com") matches the host exactly _or_ any
+// subdomain ("docs.example.com"); it does NOT match unrelated hosts that
+// share a suffix ("badexample.com").
+// - **Leading dot** (".example.com") matches strict subdomains only — the
+// apex "example.com" is excluded.
+// - **Wildcard glob** ("*.example.com") is an alias for the leading-dot
+// form: it matches strict subdomains only. No other use of "*" is
+// supported (e.g. "foo.*", "*foo*" are rejected by validation and would
+// never match here).
+// - **CIDR** ("10.0.0.0/8", "169.254.0.0/16", "::1/128", "fc00::/7")
+// matches when the host parses as an IP address inside the network.
+// Hostname hosts never match a CIDR pattern.
//
// Trailing dots used in FQDN form ("example.com.") are stripped from both
// host and pattern before matching, so a URL like http://example.com./ cannot
// be used to bypass a deny-list entry for example.com.
func matchesDomain(host, pattern string) bool {
- host = strings.TrimSuffix(strings.ToLower(strings.TrimSpace(host)), ".")
- pattern = strings.TrimSuffix(strings.ToLower(strings.TrimSpace(pattern)), ".")
- if host == "" || pattern == "" || pattern == "." {
+ host = strings.TrimSpace(host)
+ pattern = strings.TrimSpace(pattern)
+ if host == "" || pattern == "" {
+ return false
+ }
+
+ // CIDR pattern: the host must parse as an IP address inside the network.
+ // CIDRs always contain '/', so we can detect them cheaply before any other
+ // normalisation. Hostname-style hosts never match a CIDR pattern.
+ if strings.Contains(pattern, "/") {
+ if _, ipNet, err := net.ParseCIDR(pattern); err == nil {
+ // url.Hostname() already strips IPv6 brackets, but be defensive.
+ ipStr := strings.TrimSuffix(strings.Trim(host, "[]"), ".")
+ if ip := net.ParseIP(ipStr); ip != nil {
+ // Normalize IPv4-mapped IPv6 addresses (::ffff:a.b.c.d) to their
+ // IPv4 form before checking CIDR membership. Without this, an
+ // attacker can bypass an IPv4 deny-list like "169.254.0.0/16" by
+ // using the IPv6-mapped form "::ffff:169.254.169.254".
+ //
+ // net.IP.To4() returns nil for "true" IPv6 addresses and the
+ // 4-byte IPv4 form for IPv4 or IPv4-mapped-IPv6.
+ if ipv4 := ip.To4(); ipv4 != nil {
+ return ipNet.Contains(ipv4)
+ }
+ return ipNet.Contains(ip)
+ }
+ return false
+ }
+ // Malformed CIDRs are rejected at config-load time; if one slips
+ // through (e.g. via the programmatic API), fall through to the
+ // string matcher below, which will never match a host.
+ }
+
+ // Normalize IPv4-mapped IPv6 addresses to their IPv4 form for string
+ // comparison. This ensures that "::ffff:169.254.169.254" matches a
+ // literal pattern "169.254.169.254" (and vice versa).
+ if ip := net.ParseIP(strings.Trim(host, "[]")); ip != nil {
+ if ipv4 := ip.To4(); ipv4 != nil {
+ host = ipv4.String()
+ } else {
+ host = ip.String()
+ }
+ }
+ if ip := net.ParseIP(strings.Trim(pattern, "[]")); ip != nil {
+ if ipv4 := ip.To4(); ipv4 != nil {
+ pattern = ipv4.String()
+ } else {
+ pattern = ip.String()
+ }
+ }
+
+ host = strings.TrimSuffix(strings.ToLower(host), ".")
+ pattern = strings.TrimSuffix(strings.ToLower(pattern), ".")
+
+ // Wildcard glob "*.example.com" is an alias for ".example.com".
+ if rest, ok := strings.CutPrefix(pattern, "*."); ok {
+ pattern = "." + rest
+ }
+
+ if pattern == "" || pattern == "." {
return false
}
if strings.HasPrefix(pattern, ".") {
diff --git a/pkg/tools/builtin/fetch_test.go b/pkg/tools/builtin/fetch_test.go
index 2f71dd080..3bcc8e564 100644
--- a/pkg/tools/builtin/fetch_test.go
+++ b/pkg/tools/builtin/fetch_test.go
@@ -439,6 +439,33 @@ func TestMatchesDomain(t *testing.T) {
{"trailing dot host matches subdomain pattern", "docs.example.com.", "example.com", true},
{"trailing dot pattern matches apex host", "example.com", "example.com.", true},
{"trailing dot host matches strict-subdomain pattern", "docs.example.com.", ".example.com", true},
+
+ // Wildcard glob form: alias for the leading-dot strict-subdomain match.
+ {"wildcard matches subdomain", "docs.example.com", "*.example.com", true},
+ {"wildcard matches deep subdomain", "a.b.example.com", "*.example.com", true},
+ {"wildcard does NOT match apex", "example.com", "*.example.com", false},
+ {"wildcard does NOT match unrelated suffix", "badexample.com", "*.example.com", false},
+ {"wildcard with trailing dot host", "docs.example.com.", "*.example.com", true},
+ {"interior wildcard never matches (defense in depth)", "foo.example.com", "foo.*", false},
+
+ // CIDR form.
+ {"ipv4 inside /16", "169.254.169.254", "169.254.0.0/16", true},
+ {"ipv4 outside /16", "10.0.0.1", "169.254.0.0/16", false},
+ {"ipv4 /32 exact", "169.254.169.254", "169.254.169.254/32", true},
+ {"ipv4 /32 mismatch", "169.254.169.255", "169.254.169.254/32", false},
+ {"private /8", "10.1.2.3", "10.0.0.0/8", true},
+ {"hostname does not match cidr", "example.com", "10.0.0.0/8", false},
+ {"ipv6 loopback", "::1", "::1/128", true},
+ {"ipv6 ula", "fc00::1234", "fc00::/7", true},
+ {"ipv6 outside ula", "2001:db8::1", "fc00::/7", false},
+ {"malformed cidr never matches", "169.254.169.254", "10.0.0.0/33", false},
+
+ // IPv4-mapped IPv6 bypass prevention (SSRF defense).
+ {"ipv4-mapped ipv6 matches ipv4 cidr", "::ffff:169.254.169.254", "169.254.0.0/16", true},
+ {"ipv4-mapped ipv6 matches ipv4 /32", "::ffff:10.0.0.1", "10.0.0.1/32", true},
+ {"ipv4-mapped ipv6 outside ipv4 cidr", "::ffff:10.0.0.1", "169.254.0.0/16", false},
+ {"ipv4-mapped ipv6 matches ipv4 literal", "::ffff:169.254.169.254", "169.254.169.254", true},
+ {"ipv4 literal matches ipv4-mapped ipv6 cidr (edge case)", "169.254.169.254", "::ffff:169.254.0.0/112", true},
}
for _, tc := range tests {
@@ -585,3 +612,35 @@ func TestFetch_BlockedDomains_RejectsRedirectToBlockedHost(t *testing.T) {
assert.Contains(t, result.Output, "is blocked by blocked_domains")
assert.Contains(t, result.Output, "169.254.169.254")
}
+
+// Additional edge case tests for security review
+func TestMatchesDomain_EdgeCases(t *testing.T) {
+ tests := []struct {
+ name string
+ host string
+ pattern string
+ want bool
+ }{
+ // Port handling (should be stripped by url.Hostname() before matchesDomain is called)
+ {"host with port should not match", "example.com:8080", "example.com", false},
+
+ // IPv6 with zone ID (should be stripped by url.Hostname())
+ {"ipv6 with zone id should not match", "fe80::1%eth0", "fe80::1", false},
+
+ // Empty CIDR prefix
+ {"empty after wildcard", "example.com", "*.", false},
+
+ // Case sensitivity in IPv6
+ {"ipv6 case insensitive", "2001:DB8::1", "2001:db8::1", true},
+
+ // Brackets in pattern (defensive)
+ {"brackets in pattern", "::1", "[::1]", true},
+ {"brackets in host", "[::1]", "::1", true},
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.name, func(t *testing.T) {
+ assert.Equal(t, tc.want, matchesDomain(tc.host, tc.pattern))
+ })
+ }
+}