From 57375cb12d7940cde3d19a53603274edd7cdf3c8 Mon Sep 17 00:00:00 2001
From: Paulo Fidalgo <paulo.fidalgo.pt@gmail.com>
Date: Mon, 1 Jun 2026 22:15:06 +0100
Subject: [PATCH 1/3] feat: add bounded async crawl execution

Introduce threaded and async fetch executors, route crawl, sitemap, link target, and selected rule work through bounded parallel execution, and document the Ruby 3.3 async transport contract.

Also clarify validation reporting so warnings-only runs are reported as warnings, sampled in large groups, and do not fail the CLI exit status.

Verification: bundle exec rake test; bundle exec rake standard; mise exec ruby@3.4.8 -- bundle exec rake test; mise exec ruby@3.4.8 -- bundle exec rake standard; bundle exec rake build; bundle exec rake crawlscope:validate URL=http://localhost:3000
---
 README.md                                     |   8 +
 Rakefile                                      |   2 +-
 UPGRADE.md                                    |  16 +-
 async-performance-assessment.md               | 166 ++++++++
 crawlscope.gemspec                            |   4 +-
 lib/crawlscope/cli.rb                         |   9 +
 lib/crawlscope/configuration.rb               |   9 +-
 lib/crawlscope/context.rb                     |   2 +-
 lib/crawlscope/crawl.rb                       |  80 +++-
 lib/crawlscope/crawler.rb                     |  20 +-
 lib/crawlscope/fetch_executor.rb              |  36 ++
 lib/crawlscope/fetch_executor/async.rb        |  32 ++
 lib/crawlscope/fetch_executor/threaded.rb     |  32 ++
 lib/crawlscope/http.rb                        |   8 +-
 lib/crawlscope/reporter.rb                    |  26 +-
 lib/crawlscope/result.rb                      |   2 +-
 lib/crawlscope/rules/links.rb                 |  92 ++++-
 lib/crawlscope/rules/uniqueness.rb            |  29 +-
 lib/crawlscope/sitemap.rb                     |  41 +-
 lib/tasks/crawlscope_tasks.rake               |   2 +-
 perf-improvement.md                           | 365 ++++++++++++++++++
 test/crawlscope/cli_test.rb                   |  19 +-
 test/crawlscope/configuration_test.rb         |  14 +
 test/crawlscope/crawl_test.rb                 | 114 +++++-
 test/crawlscope/crawler_test.rb               |  61 +++
 test/crawlscope/links_rule_test.rb            |  53 +++
 test/crawlscope/reporter_test.rb              |  86 ++++-
 test/crawlscope/result_test.rb                |  35 ++
 test/crawlscope/sitemap_test.rb               |  52 +++
 test/performance/async_fetch_benchmark.rb     | 127 ++++++
 test/performance/fetch_executor_matrix.rb     | 162 ++++++++
 .../sitemap_expansion_benchmark.rb            | 121 ++++++
 32 files changed, 1744 insertions(+), 81 deletions(-)
 create mode 100644 async-performance-assessment.md
 create mode 100644 lib/crawlscope/fetch_executor.rb
 create mode 100644 lib/crawlscope/fetch_executor/async.rb
 create mode 100644 lib/crawlscope/fetch_executor/threaded.rb
 create mode 100644 perf-improvement.md
 create mode 100644 test/crawlscope/result_test.rb
 create mode 100644 test/performance/async_fetch_benchmark.rb
 create mode 100644 test/performance/fetch_executor_matrix.rb
 create mode 100644 test/performance/sitemap_expansion_benchmark.rb

diff --git a/README.md b/README.md
index e172560..c338f15 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,8 @@ The default rule set includes:
 
 ## Installation
 
+Crawlscope requires Ruby 3.3 or newer.
+
 Add this line to your application's Gemfile:
 
 ```ruby
@@ -166,6 +168,7 @@ Available environment overrides:
 - `TIMEOUT=30`
 - `NETWORK_IDLE_TIMEOUT=10`
 - `CONCURRENCY=5`
+- `FETCH_EXECUTOR=threaded` or `FETCH_EXECUTOR=async`
 
 Available tasks:
 
@@ -196,6 +199,11 @@ bundle exec rake 'crawlscope:validate:ldjson[https://example.com/article]'
 Plain `rake` does not pass `--url` style flags to tasks. Use `URL=...` or the
 task-argument form above instead.
 
+`FETCH_EXECUTOR=threaded` is the default and works with Crawlscope's required
+runtime dependencies. `FETCH_EXECUTOR=async` uses Ruby's fiber scheduler and
+Async::HTTP through Faraday, preserving the same `CONCURRENCY` bound. The CLI
+accepts the same setting with `--fetch-executor async`.
+
 `crawlscope:validate:ldjson` is separate because it directly checks the URL or semicolon-separated URLs in `URL`; it does not crawl the sitemap. Without `URL`, it checks the configured base URL, falling back to `http://localhost:3000`.
 
 ### Structured Data URL Audit
diff --git a/Rakefile b/Rakefile
index 840ad74..7f3ae01 100644
--- a/Rakefile
+++ b/Rakefile
@@ -183,7 +183,7 @@ namespace :release do
 end
 
 namespace :crawlscope do
-  desc "Validate URLs with all default Crawlscope rules. Args: [url,sitemap,rules]. ENV: URL, SITEMAP, RULES, JS=1, TIMEOUT, NETWORK_IDLE_TIMEOUT, CONCURRENCY"
+  desc "Validate URLs with all default Crawlscope rules. Args: [url,sitemap,rules]. ENV: URL, SITEMAP, RULES, JS=1, TIMEOUT, NETWORK_IDLE_TIMEOUT, CONCURRENCY, FETCH_EXECUTOR"
   task :validate, [:url, :sitemap, :rules] do |_task, args|
     Crawlscope::RakeTasks.validate(url: args[:url], sitemap_path: args[:sitemap], rule_names: args[:rules])
   end
diff --git a/UPGRADE.md b/UPGRADE.md
index 3a6cb49..bbc6f6a 100644
--- a/UPGRADE.md
+++ b/UPGRADE.md
@@ -4,4 +4,18 @@ Use this file for host-app migration notes when a release changes public
 contracts, required setup, component locals, generated assets, or runtime
 behavior.
 
-No special upgrade notes have been published yet.
+## Next Release
+
+### Ruby 3.3 is now required
+
+Crawlscope now depends on the current Async runtime for production async HTTP
+fetching. Host applications must run Ruby 3.3 or newer before upgrading.
+
+Recommended migration:
+
+1. Upgrade the host application runtime to Ruby 3.3 or newer.
+2. Run `bundle update crawlscope async async-http async-http-faraday`.
+3. Keep the default `FETCH_EXECUTOR=threaded` for the first deploy if you want
+   a conservative rollout.
+4. Enable async fetching with `FETCH_EXECUTOR=async` or
+   `--fetch-executor async` after the app is running on the new Ruby version.
diff --git a/async-performance-assessment.md b/async-performance-assessment.md
new file mode 100644
index 0000000..e38b1a7
--- /dev/null
+++ b/async-performance-assessment.md
@@ -0,0 +1,166 @@
+# Async Performance Assessment
+
+Date: 2026-06-01
+
+## Conclusion
+
+Async HTTP is not at least 2x faster than Crawlscope's existing threaded fetch
+baseline at the same concurrency.
+
+It is much faster than sequential fetching, but Crawlscope was already parallel
+before this work. The meaningful comparison is therefore:
+
+- `fetch_executor: :threaded`, `CONCURRENCY=N`
+- `fetch_executor: :async`, `CONCURRENCY=N`
+
+On that comparison, async is roughly 1.01x to 1.11x faster in most local
+delayed-response scenarios, with one Ruby 3.4.8 short-delay/high-concurrency
+case reaching 1.35x. It does not reach 2x.
+
+The additional production work did produce a 2x+ improvement in a different
+place: child sitemap expansion now uses the bounded fetch executor instead of
+walking sitemap indexes serially. In the local delayed sitemap benchmark,
+bounded expansion is about 7.4x to 7.6x faster than sequential expansion on
+Ruby 3.4.8 and Ruby 4.0.3.
+
+## Benchmark Setup
+
+Benchmarks use a local delayed HTTP server and full `Crawlscope::Crawl` runs,
+not isolated HTTP calls.
+
+Measured scenarios:
+
+- direct sitemap pages,
+- uncrawled internal link targets resolved by the links rule,
+- 48 delayed pages or targets,
+- 20ms and 80ms response delays,
+- concurrency 8 and 16,
+- median of 3 runs per executor.
+
+Scripts:
+
+- `test/performance/async_fetch_benchmark.rb`
+- `test/performance/fetch_executor_matrix.rb`
+- `test/performance/sitemap_expansion_benchmark.rb`
+
+## Results
+
+### Ruby 4.0.3
+
+| Scenario | Threaded | Async | Async vs Threaded |
+| --- | ---: | ---: | ---: |
+| direct pages, 20ms, c8 | 0.159s | 0.147s | 1.08x |
+| direct pages, 20ms, c16 | 0.092s | 0.088s | 1.04x |
+| direct pages, 80ms, c8 | 0.528s | 0.521s | 1.01x |
+| direct pages, 80ms, c16 | 0.280s | 0.275s | 1.02x |
+| link targets, 20ms, c8 | 0.179s | 0.170s | 1.06x |
+| link targets, 80ms, c8 | 0.616s | 0.601s | 1.02x |
+
+### Ruby 3.4.8
+
+| Scenario | Threaded | Async | Async vs Threaded |
+| --- | ---: | ---: | ---: |
+| direct pages, 20ms, c8 | 0.163s | 0.159s | 1.03x |
+| direct pages, 20ms, c16 | 0.110s | 0.082s | 1.35x |
+| direct pages, 80ms, c8 | 0.524s | 0.518s | 1.01x |
+| direct pages, 80ms, c16 | 0.278s | 0.271s | 1.02x |
+| link targets, 20ms, c8 | 0.190s | 0.170s | 1.11x |
+| link targets, 80ms, c8 | 0.610s | 0.607s | 1.01x |
+
+The earlier simple benchmark also showed the same pattern:
+
+| Runtime | Sequential threaded | Threaded c8 | Async c8 |
+| --- | ---: | ---: | ---: |
+| Ruby 4.0.3 | 2.141s | 0.284s | 0.305s |
+| Ruby 3.4.8 | 2.133s | 0.272s | 0.328s |
+
+### Child Sitemap Expansion
+
+This benchmark measures a sitemap index with eight delayed child sitemaps.
+The first row forces a sequential executor by setting concurrency to 1. The
+bounded rows use the same crawl path with concurrency 8.
+
+| Runtime | Sequential | Threaded c8 | Async c8 | Best Speedup |
+| --- | ---: | ---: | ---: | ---: |
+| Ruby 4.0.3 | 2.123s | 0.280s | 0.285s | 7.58x |
+| Ruby 3.4.8 | 2.100s | 0.284s | 0.276s | 7.61x |
+
+## Why Async Is Not 2x Faster
+
+The current threaded implementation is already near the latency lower bound.
+
+For 48 pages at 80ms delay and concurrency 16, the theoretical network floor is
+roughly:
+
+```text
+ceil(48 / 16) * 0.08s = 0.24s
+```
+
+Measured results:
+
+- threaded: 0.279s on Ruby 4.0.3
+- async: 0.267s on Ruby 4.0.3
+
+That leaves only about 39ms of overhead for threaded execution to eliminate.
+Async cannot produce a 2x improvement when the threaded baseline is already
+within about 16% of the ideal network floor.
+
+The main reasons:
+
+1. Crawlscope already had bounded parallel fetching through
+   `Concurrent::FixedThreadPool`.
+2. The benchmark is I/O latency dominated, not CPU or thread scheduling
+   dominated.
+3. Faraday still adds request/response abstraction overhead in both modes.
+4. Local HTTP/1.1 requests do not create a major multiplexing advantage for
+   async.
+5. The async executor improves resource shape more than wall-clock time when
+   the thread pool is already sized correctly.
+
+## What Did Improve
+
+This work still improves the architecture and specific crawl paths:
+
+- async HTTP is now a real transport through `async-http-faraday`,
+- fetch executor selection is explicit and documented,
+- browser rendering is guarded from async misuse,
+- output ordering remains stable,
+- uncrawled link targets are resolved as a bounded batch instead of repeatedly
+  through the single-target path,
+- child sitemap indexes are expanded through the same bounded executor,
+- canonical target resolution reuses the link-target cache,
+- per-page link extraction and uniqueness summaries can run in bounded parallel
+  when the selected executor supports it.
+
+The biggest speed improvement is not async versus threads. It is batching extra
+network-bound and per-page rule work through the same bounded executor. In slow
+child-sitemap or link-target cases, the batch shape turns what would otherwise
+be a serial second wave into roughly `ceil(items / concurrency) * latency`.
+
+## Recommendation
+
+Keep the async implementation, but do not claim a 2x wall-clock speedup over
+the current threaded executor.
+
+Position it as:
+
+- production-ready for HTTP crawling,
+- useful for fiber-scheduler deployments,
+- potentially better at higher concurrency with lower thread pressure,
+- equivalent-to-slightly-faster than threaded for the measured local workloads.
+
+If the product goal is a reliable 2x improvement over current default threaded
+crawls, the next performance work should not be "more async." It should target:
+
+1. persistent fetch/result caching across rule phases,
+2. optional higher concurrency with per-host rate limits,
+3. streaming per-page analysis so CPU work overlaps network waits,
+4. reducing retained response bodies when rules only need parsed document state.
+
+## Decision
+
+Async is production-ready, but it is not a 2x speed feature against the existing
+threaded baseline. The production claim should be about scalability and
+executor choice. The measured 2x+ speedup comes from applying bounded
+parallelism to previously serial crawl phases, especially child sitemap
+expansion.
diff --git a/crawlscope.gemspec b/crawlscope.gemspec
index ccc80c5..5306e0c 100644
--- a/crawlscope.gemspec
+++ b/crawlscope.gemspec
@@ -12,7 +12,7 @@ Gem::Specification.new do |spec|
   spec.description = "A small Ruby gem for sitemap-driven SEO validation with structured issues, configurable rules and schema registries, optional browser rendering, and Rails rake task integration."
   spec.homepage = "https://www.ethos-link.com/opensource/crawlscope"
   spec.license = "MIT"
-  spec.required_ruby_version = ">= 3.2.0"
+  spec.required_ruby_version = ">= 3.3.0"
 
   repo = "https://github.com/ethos-link/crawlscope"
   branch = "main"
@@ -46,6 +46,8 @@ Gem::Specification.new do |spec|
   spec.require_paths = ["lib"]
 
   spec.add_dependency "concurrent-ruby", ">= 1.3"
+  spec.add_dependency "async", ">= 2.0"
+  spec.add_dependency "async-http-faraday", ">= 0.22"
   spec.add_dependency "faraday", ">= 2.0"
   spec.add_dependency "faraday-follow_redirects", ">= 0.3"
   spec.add_dependency "json-schema", ">= 5.0"
diff --git a/lib/crawlscope/cli.rb b/lib/crawlscope/cli.rb
index 39f9ac3..dbd2ec4 100644
--- a/lib/crawlscope/cli.rb
+++ b/lib/crawlscope/cli.rb
@@ -134,6 +134,7 @@ def run_validate
 
       configure_renderer(resolved_renderer)
       @configuration.concurrency = resolved_concurrency
+      @configuration.fetch_executor = resolved_fetch_executor
       @configuration.network_idle_timeout_seconds = resolved_integer("NETWORK_IDLE_TIMEOUT", default: @configuration.network_idle_timeout_seconds, minimum: 1)
       @configuration.timeout_seconds = resolved_integer("TIMEOUT", default: @configuration.timeout_seconds, minimum: 1)
 
@@ -167,6 +168,10 @@ def run_validate
         opts.on("--concurrency COUNT", Integer, "Set crawl concurrency") do |value|
           @configuration.concurrency = integer_option(value, minimum: 1, name: "concurrency")
         end
+
+        opts.on("--fetch-executor NAME", "Use threaded or async fetch execution") do |value|
+          @configuration.fetch_executor = value
+        end
       end
 
       parser.parse!(@argv)
@@ -221,6 +226,10 @@ def resolved_concurrency
       end
     end
 
+    def resolved_fetch_executor
+      normalized_string(ENV["FETCH_EXECUTOR"]) || @configuration.fetch_executor
+    end
+
     def resolved_integer(name, default:, minimum:)
       raw_value = normalized_string(ENV[name])
       return default if raw_value.nil?
diff --git a/lib/crawlscope/configuration.rb b/lib/crawlscope/configuration.rb
index dd6af3e..afb3e8b 100644
--- a/lib/crawlscope/configuration.rb
+++ b/lib/crawlscope/configuration.rb
@@ -7,10 +7,11 @@ class Configuration
     DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS = 5
     DEFAULT_BROWSER_SCROLL_PAGE = true
     DEFAULT_CONCURRENCY = 10
+    DEFAULT_FETCH_EXECUTOR = :threaded
     RENDERERS = %i[http browser].freeze
     DEFAULT_TIMEOUT_SECONDS = 20
 
-    attr_writer :allowed_statuses, :base_url, :browser_factory, :concurrency, :network_idle_timeout_seconds, :output, :renderer, :rule_registry, :schema_registry, :scroll_page, :site_name, :sitemap_path, :timeout_seconds
+    attr_writer :allowed_statuses, :base_url, :browser_factory, :concurrency, :fetch_executor, :network_idle_timeout_seconds, :output, :renderer, :rule_registry, :schema_registry, :scroll_page, :site_name, :sitemap_path, :timeout_seconds
 
     def allowed_statuses
       value = resolve(@allowed_statuses)
@@ -30,6 +31,11 @@ def concurrency
       positive_integer(value, default: DEFAULT_CONCURRENCY, name: "concurrency")
     end
 
+    def fetch_executor
+      value = resolve(@fetch_executor)
+      FetchExecutor.normalize(value.nil? ? DEFAULT_FETCH_EXECUTOR : value)
+    end
+
     def browser_concurrency
       value = concurrency
       default_value = DEFAULT_BROWSER_CONCURRENCY
@@ -83,6 +89,7 @@ def audit(base_url: self.base_url, sitemap_path: self.sitemap_path, rule_names:
         sitemap_path: sitemap_path,
         browser_factory: browser_factory,
         concurrency: concurrency,
+        fetch_executor: fetch_executor,
         network_idle_timeout_seconds: network_idle_timeout_seconds,
         renderer: renderer,
         timeout_seconds: timeout_seconds,
diff --git a/lib/crawlscope/context.rb b/lib/crawlscope/context.rb
index 5335eca..1e9d2c4 100644
--- a/lib/crawlscope/context.rb
+++ b/lib/crawlscope/context.rb
@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 
 module Crawlscope
-  Context = Data.define(:allowed_statuses, :base_url, :resolve_target, :schema_registry) do
+  Context = Data.define(:allowed_statuses, :base_url, :concurrency, :fetch_executor, :resolve_target, :resolve_targets, :schema_registry) do
     def fetch(name)
       public_send(name)
     end
diff --git a/lib/crawlscope/crawl.rb b/lib/crawlscope/crawl.rb
index ad3af48..26fd1da 100644
--- a/lib/crawlscope/crawl.rb
+++ b/lib/crawlscope/crawl.rb
@@ -2,13 +2,14 @@
 
 module Crawlscope
   class Crawl
-    def initialize(base_url:, sitemap_path:, rules:, schema_registry:, browser_factory: nil, concurrency: Configuration::DEFAULT_CONCURRENCY, network_idle_timeout_seconds: Configuration::DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS, renderer: :http, scroll_page: Configuration::DEFAULT_BROWSER_SCROLL_PAGE, timeout_seconds: Configuration::DEFAULT_TIMEOUT_SECONDS, allowed_statuses: Configuration::DEFAULT_ALLOWED_STATUSES)
+    def initialize(base_url:, sitemap_path:, rules:, schema_registry:, browser_factory: nil, concurrency: Configuration::DEFAULT_CONCURRENCY, fetch_executor: Configuration::DEFAULT_FETCH_EXECUTOR, network_idle_timeout_seconds: Configuration::DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS, renderer: :http, scroll_page: Configuration::DEFAULT_BROWSER_SCROLL_PAGE, timeout_seconds: Configuration::DEFAULT_TIMEOUT_SECONDS, allowed_statuses: Configuration::DEFAULT_ALLOWED_STATUSES)
       @base_url = base_url
       @sitemap_path = sitemap_path
       @rules = Array(rules)
       @schema_registry = schema_registry
       @browser_factory = browser_factory
       @concurrency = concurrency
+      @fetch_executor = fetch_executor
       @network_idle_timeout_seconds = network_idle_timeout_seconds
       @renderer = renderer.to_sym
       @scroll_page = scroll_page
@@ -17,10 +18,12 @@ def initialize(base_url:, sitemap_path:, rules:, schema_registry:, browser_facto
     end
 
     def call
+      validate_fetch_executor!
+
       urls = sitemap_urls
 
       @page_fetcher = page
-      pages = Crawler.new(page_fetcher: @page_fetcher, concurrency: @concurrency).call(urls)
+      pages = Crawler.new(page_fetcher: @page_fetcher, concurrency: @concurrency, fetch_executor: @fetch_executor).call(urls)
       issues = IssueCollection.new
 
       collect(pages, issues)
@@ -41,7 +44,13 @@ def call
     private
 
     def sitemap_urls
-      urls = Sitemap.new(path: @sitemap_path).urls(base_url: @base_url)
+      urls = Sitemap.new(
+        path: @sitemap_path,
+        adapter: http_adapter,
+        concurrency: @concurrency,
+        fetch_executor: @fetch_executor,
+        timeout_seconds: @timeout_seconds
+      ).urls(base_url: @base_url)
       raise ValidationError, "No URLs found in sitemap: #{@sitemap_path}" if urls.empty?
 
       urls
@@ -62,15 +71,31 @@ def page
       if @renderer == :browser
         (@browser_factory || method(:browser)).call
       else
-        Http.new(base_url: @base_url, timeout_seconds: @timeout_seconds)
+        Http.new(base_url: @base_url, timeout_seconds: @timeout_seconds, adapter: http_adapter)
       end
     end
 
+    def http_adapter
+      return unless FetchExecutor.normalize(@fetch_executor) == :async
+
+      require "async/http/faraday"
+      :async_http
+    end
+
+    def validate_fetch_executor!
+      return unless @renderer == :browser && FetchExecutor.normalize(@fetch_executor) == :async
+
+      raise ConfigurationError, "Async fetch execution is only supported with http rendering"
+    end
+
     def context
       Context.new(
         allowed_statuses: @allowed_statuses,
         base_url: @base_url,
+        concurrency: @concurrency,
+        fetch_executor: @fetch_executor,
         resolve_target: method(:resolve),
+        resolve_targets: method(:resolve_all),
         schema_registry: @schema_registry
       )
     end
@@ -93,11 +118,15 @@ def cache(pages)
       @targets = {}
 
       pages.each do |page|
-        @pages[page.normalized_url] = page unless page.normalized_url.to_s.empty?
-        @pages[page.normalized_final_url] = page unless page.normalized_final_url.to_s.empty?
+        cache_page(page)
       end
     end
 
+    def cache_page(page)
+      @pages[page.normalized_url] = page unless page.normalized_url.to_s.empty?
+      @pages[page.normalized_final_url] = page unless page.normalized_final_url.to_s.empty?
+    end
+
     def scan(urls, pages, issues)
       @rules.each do |rule|
         rule.call(urls: urls, pages: pages, issues: issues, context: context)
@@ -105,17 +134,40 @@ def scan(urls, pages, issues)
     end
 
     def resolve(target_url)
-      normalized_url = Url.normalize(target_url, base_url: @base_url)
-      return @targets[normalized_url] if @targets.key?(normalized_url)
+      resolve_all([target_url]).fetch(target_url)
+    end
 
-      @targets[normalized_url] = resolved_page(normalized_url) || fetched_page(normalized_url)
+    def resolve_all(target_urls)
+      normalized_by_url = Array(target_urls).to_h do |target_url|
+        [target_url, Url.normalize(target_url, base_url: @base_url)]
+      end
+      normalized_urls = normalized_by_url.values.compact.uniq
+      missing_urls = []
+
+      normalized_urls.each do |normalized_url|
+        next if @targets.key?(normalized_url)
+
+        resolved = resolved_page(normalized_url)
+        if resolved
+          @targets[normalized_url] = resolved
+        else
+          missing_urls << normalized_url
+        end
+      end
+
+      fetched_pages(missing_urls).each do |page|
+        normalized_url = Url.normalize(page.url, base_url: @base_url)
+        cache_page(page)
+        @targets[normalized_url] = resolution(page, normalized_url, crawled: false)
+      end
+
+      normalized_by_url.to_h { |target_url, normalized_url| [target_url, @targets[normalized_url]] }
     end
 
-    def fetched_page(normalized_url)
-      page = @page_fetcher.fetch(normalized_url)
-      @pages[page.normalized_url] = page unless page.normalized_url.to_s.empty?
-      @pages[page.normalized_final_url] = page unless page.normalized_final_url.to_s.empty?
-      resolution(page, normalized_url, crawled: false)
+    def fetched_pages(normalized_urls)
+      return [] if normalized_urls.empty?
+
+      Crawler.new(page_fetcher: @page_fetcher, concurrency: @concurrency, fetch_executor: @fetch_executor).call(normalized_urls)
     end
 
     def resolved_page(normalized_url)
diff --git a/lib/crawlscope/crawler.rb b/lib/crawlscope/crawler.rb
index 5da2007..967c308 100644
--- a/lib/crawlscope/crawler.rb
+++ b/lib/crawlscope/crawler.rb
@@ -1,28 +1,14 @@
 # frozen_string_literal: true
 
-require "concurrent"
-
 module Crawlscope
   class Crawler
-    def initialize(page_fetcher:, concurrency:)
+    def initialize(page_fetcher:, concurrency:, fetch_executor: :threaded)
       @page_fetcher = page_fetcher
-      @concurrency = concurrency
+      @fetch_executor = FetchExecutor.build(name: fetch_executor, concurrency: concurrency)
     end
 
     def call(urls)
-      pages = Concurrent::Array.new
-      pool = Concurrent::FixedThreadPool.new(@concurrency)
-
-      urls.each do |url|
-        pool.post do
-          pages << fetch(url)
-        end
-      end
-
-      pool.shutdown
-      pool.wait_for_termination
-
-      pages.to_a
+      @fetch_executor.call(urls) { |url| fetch(url) }
     end
 
     private
diff --git a/lib/crawlscope/fetch_executor.rb b/lib/crawlscope/fetch_executor.rb
new file mode 100644
index 0000000..50b2980
--- /dev/null
+++ b/lib/crawlscope/fetch_executor.rb
@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+
+module Crawlscope
+  module FetchExecutor
+    NAMES = %i[threaded async].freeze
+
+    module_function
+
+    def build(name:, concurrency:)
+      return name if name.respond_to?(:call)
+
+      case normalized_name(name)
+      when :threaded
+        Threaded.new(concurrency: concurrency)
+      when :async
+        Async.new(concurrency: concurrency)
+      end
+    end
+
+    def normalize(name)
+      return name if name.respond_to?(:call)
+
+      normalized_name(name)
+    end
+
+    def normalized_name(name)
+      normalized = name.to_s.strip
+      normalized = "threaded" if normalized.empty?
+
+      value = normalized.to_sym
+      return value if NAMES.include?(value)
+
+      raise ConfigurationError, "Crawlscope fetch_executor must be threaded or async"
+    end
+  end
+end
diff --git a/lib/crawlscope/fetch_executor/async.rb b/lib/crawlscope/fetch_executor/async.rb
new file mode 100644
index 0000000..fc7cfdb
--- /dev/null
+++ b/lib/crawlscope/fetch_executor/async.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+require "async"
+require "async/semaphore"
+
+module Crawlscope
+  module FetchExecutor
+    class Async
+      def initialize(concurrency:)
+        @concurrency = concurrency
+      end
+
+      def call(urls)
+        indexed_urls = Array(urls).each_with_index.to_a
+        pages = Array.new(indexed_urls.size)
+
+        Sync do |parent|
+          semaphore = ::Async::Semaphore.new(@concurrency)
+          tasks = indexed_urls.map do |url, index|
+            semaphore.async(parent: parent) do
+              pages[index] = yield(url)
+            end
+          end
+
+          tasks.each(&:wait)
+        end
+
+        pages
+      end
+    end
+  end
+end
diff --git a/lib/crawlscope/fetch_executor/threaded.rb b/lib/crawlscope/fetch_executor/threaded.rb
new file mode 100644
index 0000000..e9410f8
--- /dev/null
+++ b/lib/crawlscope/fetch_executor/threaded.rb
@@ -0,0 +1,32 @@
+# frozen_string_literal: true
+
+require "concurrent"
+
+module Crawlscope
+  module FetchExecutor
+    class Threaded
+      def initialize(concurrency:)
+        @concurrency = concurrency
+      end
+
+      def call(urls)
+        indexed_urls = Array(urls).each_with_index.to_a
+        pages = Array.new(indexed_urls.size)
+        mutex = Mutex.new
+        pool = Concurrent::FixedThreadPool.new(@concurrency)
+
+        indexed_urls.each do |url, index|
+          pool.post do
+            page = yield(url)
+            mutex.synchronize { pages[index] = page }
+          end
+        end
+
+        pool.shutdown
+        pool.wait_for_termination
+
+        pages
+      end
+    end
+  end
+end
diff --git a/lib/crawlscope/http.rb b/lib/crawlscope/http.rb
index c170770..d9eaed7 100644
--- a/lib/crawlscope/http.rb
+++ b/lib/crawlscope/http.rb
@@ -10,13 +10,18 @@ class Http
     MAX_REDIRECTS = 5
     USER_AGENT = "Mozilla/5.0 (compatible; Crawlscope/1.0)"
 
-    def initialize(base_url:, timeout_seconds:)
+    def initialize(base_url:, timeout_seconds:, adapter: nil)
       @base_url = base_url
       @timeout_seconds = timeout_seconds
+      @adapter = adapter
       @connections_by_thread = Concurrent::Map.new
     end
 
     def close
+      @connections_by_thread.each_value do |connection|
+        connection.close if connection.respond_to?(:close)
+      end
+
       @connections_by_thread.clear
     end
 
@@ -65,6 +70,7 @@ def connection
           faraday.response :follow_redirects, limit: MAX_REDIRECTS
           faraday.options.timeout = @timeout_seconds
           faraday.options.open_timeout = @timeout_seconds
+          faraday.adapter @adapter if @adapter
         end
       end
     end
diff --git a/lib/crawlscope/reporter.rb b/lib/crawlscope/reporter.rb
index 9425c6f..4c8e1c0 100644
--- a/lib/crawlscope/reporter.rb
+++ b/lib/crawlscope/reporter.rb
@@ -4,6 +4,8 @@
 
 module Crawlscope
   class Reporter
+    MAX_ISSUES_PER_GROUP = 20
+
     def initialize(io:)
       @io = io
     end
@@ -15,13 +17,13 @@ def report(result)
       @io.puts("URLs: #{result.urls.size}")
       @io.puts("Pages: #{result.pages.size}")
 
-      if result.ok?
+      if result.issues.size.zero?
         @io.puts("Status: OK")
         return
       end
 
-      @io.puts("Status: FAILED")
-      @io.puts("Issues: #{result.issues.size} #{severity_summary(result.issues)}")
+      @io.puts("Status: #{status_for(result.issues)}")
+      @io.puts("Issues: #{result.issues.size} total (#{severity_summary(result.issues)})")
       @io.puts("")
 
       report_summary(result.issues)
@@ -31,6 +33,18 @@ def report(result)
 
     private
 
+    def status_for(issues)
+      grouped = issues.by_severity
+
+      if grouped.key?(:error)
+        "FAILED"
+      elsif grouped.key?(:warning)
+        "WARNINGS"
+      else
+        "NOTICES"
+      end
+    end
+
     def severity_summary(issues)
       grouped = issues.by_severity
       return "" if grouped.empty?
@@ -59,10 +73,12 @@ def report_issue_groups(issues, base_url:)
         .each do |(category, code), grouped_issues|
           @io.puts("#{category} / #{code}: #{grouped_issues.size}")
 
-          grouped_issues.each do |issue|
+          grouped_issues.first(MAX_ISSUES_PER_GROUP).each do |issue|
             @io.puts("  - #{compact_issue(issue, base_url: base_url)}")
           end
 
+          remaining_count = grouped_issues.size - MAX_ISSUES_PER_GROUP
+          @io.puts("  ... #{remaining_count} more") if remaining_count.positive?
           @io.puts("")
         end
     end
@@ -127,7 +143,7 @@ def relative_url(url, base_url:)
     end
 
     def format_number(value)
-      return format("%.2f", value) if value.is_a?(Float)
+      return format("%.3f", value) if value.is_a?(Float)
 
       value.to_s
     end
diff --git a/lib/crawlscope/result.rb b/lib/crawlscope/result.rb
index eef888e..bf3f522 100644
--- a/lib/crawlscope/result.rb
+++ b/lib/crawlscope/result.rb
@@ -3,7 +3,7 @@
 module Crawlscope
   Result = Data.define(:base_url, :sitemap_path, :urls, :pages, :issues) do
     def ok?
-      issues.none?(&:error?) && issues.none?(&:warning?) && issues.none?(&:notice?)
+      issues.none?(&:error?)
     end
   end
 end
diff --git a/lib/crawlscope/rules/links.rb b/lib/crawlscope/rules/links.rb
index 86cbe75..d725853 100644
--- a/lib/crawlscope/rules/links.rb
+++ b/lib/crawlscope/rules/links.rb
@@ -21,8 +21,13 @@ def initialize
       def call(urls:, pages:, issues:, context:)
         @allowed_statuses = context.fetch(:allowed_statuses)
         @base_url = context.fetch(:base_url)
+        @concurrency = context_value(context, :concurrency, default: 1)
+        @fetch_executor = context_value(context, :fetch_executor)
         @resolve_target = context.fetch(:resolve_target)
+        @resolve_targets = context.resolve_targets if context.respond_to?(:resolve_targets)
+        @resolve_targets ||= context[:resolve_targets] if context.respond_to?(:[])
         @base_host = URI.parse(@base_url).host
+        @resolved_targets_by_url = {}
 
         links = extract_links(pages)
         validate_url_hygiene(urls, links, issues)
@@ -42,7 +47,8 @@ def contextual_links(doc)
       end
 
       def extract_links(pages)
-        pages.select(&:html?).flat_map { |page| page_links(page) }
+        html_pages = pages.select(&:html?)
+        parallel_map(html_pages) { |page| page_links(page) }.flatten
       end
 
       def page_links(page)
@@ -175,9 +181,12 @@ def report_unresolved_target(target_url, grouped_links, issues, resolution)
 
       def resolve_links(links, issues)
         resolved_links = []
+        grouped_links_by_target = links.group_by { |link| link[:target_url] }
+        targets_by_url = resolve_targets(grouped_links_by_target.keys)
+        @resolved_targets_by_url.merge!(targets_by_url)
 
-        links.group_by { |link| link[:target_url] }.each do |target_url, grouped_links|
-          target = resolve_target(target_url)
+        grouped_links_by_target.each do |target_url, grouped_links|
+          target = target_for(target_url)
 
           if target.unresolved?
             report_unresolved_target(target_url, grouped_links, issues, target.resolution)
@@ -221,6 +230,23 @@ def resolve_target(target_url)
         LinkTarget.new(target_url: target_url, resolution: resolution)
       end
 
+      def resolve_targets(target_urls)
+        if @resolve_targets
+          @resolve_targets.call(target_urls).to_h do |target_url, resolution|
+            [target_url, LinkTarget.new(target_url: target_url, resolution: resolution)]
+          end
+        else
+          target_urls.to_h { |target_url| [target_url, resolve_target(target_url)] }
+        end
+      end
+
+      def target_for(target_url)
+        @resolved_targets_by_url.fetch(target_url) do
+          target = resolve_target(target_url)
+          @resolved_targets_by_url[target_url] = target
+        end
+      end
+
       LinkTarget = Data.define(:target_url, :resolution) do
         def allowed?(statuses)
           statuses.include?(status)
@@ -424,7 +450,7 @@ def validate_indexable_pages_missing_from_sitemap(urls, resolved_links, issues)
           next if reported_urls.include?(final_url)
           next unless crawlable_path?(link[:final_path])
 
-          target = resolve_target(final_url)
+          target = target_for(final_url)
           next unless target.allowed?(@allowed_statuses) && target.html?
           next if target.noindex?
 
@@ -506,16 +532,14 @@ def validate_canonical_targets(urls, pages, resolved_links, issues)
         return if sitemap_pages.size < 2
 
         dofollow_counts_by_path = dofollow_counts_by_final_path(resolved_links)
-
-        sitemap_pages.each do |page|
-          canonical_url = canonical_url_for(page)
-          next if canonical_url.nil?
-
-          target_uri = URI.parse(canonical_url)
-          next if target_uri.host != @base_host
-
-          canonical_path = Url.path(canonical_url)
-          if canonical_path && dofollow_counts_by_path[canonical_path].zero?
+        canonical_entries = canonical_entries_for(sitemap_pages)
+        @resolved_targets_by_url.merge!(resolve_targets(canonical_entries.map { |entry| entry.fetch(:canonical_url) }))
+
+        canonical_entries.each do |entry|
+          page = entry.fetch(:page)
+          canonical_url = entry.fetch(:canonical_url)
+          canonical_path = entry.fetch(:canonical_path)
+          if canonical_path && !root_path?(canonical_path) && dofollow_counts_by_path[canonical_path].zero?
             issues.add(
               code: :canonical_no_internal_inlinks,
               severity: :warning,
@@ -527,8 +551,24 @@ def validate_canonical_targets(urls, pages, resolved_links, issues)
           end
 
           validate_canonical_target_status(page, canonical_url, issues)
+        end
+      end
+
+      def canonical_entries_for(pages)
+        pages.filter_map do |page|
+          canonical_url = canonical_url_for(page)
+          next if canonical_url.nil?
+
+          target_uri = URI.parse(canonical_url)
+          next if target_uri.host != @base_host
+
+          {
+            canonical_path: Url.path(canonical_url),
+            canonical_url: canonical_url,
+            page: page
+          }
         rescue URI::InvalidURIError
-          next
+          nil
         end
       end
 
@@ -548,8 +588,12 @@ def canonical_url_for(page)
         Url.normalize(canonical, base_url: page.url)
       end
 
+      def root_path?(path)
+        path == "/"
+      end
+
       def validate_canonical_target_status(page, canonical_url, issues)
-        target = resolve_target(canonical_url)
+        target = target_for(canonical_url)
 
         if target.unresolved? || target.ignored_error?
           return
@@ -575,6 +619,22 @@ def validate_canonical_target_status(page, canonical_url, issues)
           )
         end
       end
+
+      def context_value(context, name, default: nil)
+        if context.respond_to?(name)
+          context.public_send(name)
+        elsif context.respond_to?(:key?) && context.key?(name)
+          context[name]
+        else
+          default
+        end
+      end
+
+      def parallel_map(items, &block)
+        return items.map(&block) if items.size < 2 || @fetch_executor.nil? || @concurrency.to_i <= 1
+
+        FetchExecutor.build(name: @fetch_executor, concurrency: @concurrency).call(items, &block)
+      end
     end
   end
 end
diff --git a/lib/crawlscope/rules/uniqueness.rb b/lib/crawlscope/rules/uniqueness.rb
index 95519f9..baa43a6 100644
--- a/lib/crawlscope/rules/uniqueness.rb
+++ b/lib/crawlscope/rules/uniqueness.rb
@@ -26,11 +26,10 @@ def initialize(
       end
 
       def call(urls:, pages:, issues:, context:)
-        page_summaries = pages.filter_map do |page|
-          next unless page.html?
+        @concurrency = context_value(context, :concurrency, default: 1)
+        @fetch_executor = context_value(context, :fetch_executor)
 
-          summary_for(page)
-        end
+        page_summaries = summarize_pages(pages)
 
         validate_duplicates(page_summaries, issues)
         validate_near_duplicates(page_summaries, issues)
@@ -66,6 +65,12 @@ def summary_for(page)
         }
       end
 
+      def summarize_pages(pages)
+        html_pages = pages.select(&:html?)
+
+        parallel_map(html_pages) { |page| summary_for(page) }
+      end
+
       def validate_duplicates(page_summaries, issues)
         duplicates_for(page_summaries, :title).each do |value, urls|
           issues.add(
@@ -177,6 +182,22 @@ def shingle_similarity(left, right)
 
         intersection_size.to_f / smaller_set_size
       end
+
+      def context_value(context, name, default: nil)
+        if context.respond_to?(name)
+          context.public_send(name)
+        elsif context.respond_to?(:key?) && context.key?(name)
+          context[name]
+        else
+          default
+        end
+      end
+
+      def parallel_map(items, &block)
+        return items.map(&block) if items.size < 2 || @fetch_executor.nil? || @concurrency.to_i <= 1
+
+        FetchExecutor.build(name: @fetch_executor, concurrency: @concurrency).call(items, &block)
+      end
     end
   end
 end
diff --git a/lib/crawlscope/sitemap.rb b/lib/crawlscope/sitemap.rb
index b2b1eef..8ebfb33 100644
--- a/lib/crawlscope/sitemap.rb
+++ b/lib/crawlscope/sitemap.rb
@@ -9,20 +9,31 @@ module Crawlscope
   class Sitemap
     SITEMAP_NAMESPACE = {"xmlns" => "http://www.sitemaps.org/schemas/sitemap/0.9"}.freeze
 
-    def initialize(path:)
+    def initialize(path:, adapter: nil, concurrency: Configuration::DEFAULT_CONCURRENCY, fetch_executor: Configuration::DEFAULT_FETCH_EXECUTOR, timeout_seconds: Configuration::DEFAULT_TIMEOUT_SECONDS)
       @path = path
+      @adapter = adapter
+      @concurrency = concurrency
+      @fetch_executor = fetch_executor
+      @timeout_seconds = timeout_seconds
     end
 
     def urls(base_url:)
-      collect_urls(@path, base_url: base_url, visited: Set.new).uniq
+      collect_urls(@path, base_url: base_url, visited: Set.new, visited_mutex: Mutex.new).uniq
     end
 
     private
 
-    def collect_urls(source, base_url:, visited:)
-      return [] if visited.include?(source)
+    def collect_urls(source, base_url:, visited:, visited_mutex:)
+      already_visited = visited_mutex.synchronize do
+        if visited.include?(source)
+          true
+        else
+          visited.add(source)
+          false
+        end
+      end
+      return [] if already_visited
 
-      visited.add(source)
       document = Nokogiri::XML(read(source))
       root_name = document.root&.name
       unless %w[sitemapindex urlset].include?(root_name)
@@ -30,10 +41,13 @@ def collect_urls(source, base_url:, visited:)
       end
 
       if root_name == "sitemapindex"
-        document.xpath("//xmlns:sitemap/xmlns:loc", SITEMAP_NAMESPACE).flat_map do |node|
-          child_source = resolve_child_source(source, node.text.to_s.strip, base_url: base_url)
-          collect_urls(child_source, base_url: base_url, visited: visited)
+        child_sources = document.xpath("//xmlns:sitemap/xmlns:loc", SITEMAP_NAMESPACE).map do |node|
+          resolve_child_source(source, node.text.to_s.strip, base_url: base_url)
         end
+
+        fetch_executor.call(child_sources) do |child_source|
+          collect_urls(child_source, base_url: base_url, visited: visited, visited_mutex: visited_mutex)
+        end.flatten
       else
         document.xpath("//xmlns:url/xmlns:loc", SITEMAP_NAMESPACE).map do |node|
           Url.normalize_for_base(node.text.to_s.strip, base_url: base_url)
@@ -77,11 +91,16 @@ def local_child_path(parent_source, child_loc)
     end
 
     def connection
-      @connection ||= Faraday.new do |faraday|
+      Faraday.new do |faraday|
         faraday.response :follow_redirects, limit: Http::MAX_REDIRECTS
-        faraday.options.timeout = 20
-        faraday.options.open_timeout = 20
+        faraday.options.timeout = @timeout_seconds
+        faraday.options.open_timeout = @timeout_seconds
+        faraday.adapter @adapter if @adapter
       end
     end
+
+    def fetch_executor
+      @fetch_executor_instance ||= FetchExecutor.build(name: @fetch_executor, concurrency: @concurrency)
+    end
   end
 end
diff --git a/lib/tasks/crawlscope_tasks.rake b/lib/tasks/crawlscope_tasks.rake
index 80ad80c..ae12dd1 100644
--- a/lib/tasks/crawlscope_tasks.rake
+++ b/lib/tasks/crawlscope_tasks.rake
@@ -1,5 +1,5 @@
 namespace :crawlscope do
-  desc "Validate URLs with all default Crawlscope rules. Args: [url,sitemap,rules]. ENV: URL, SITEMAP, RULES, JS=1, TIMEOUT, NETWORK_IDLE_TIMEOUT, CONCURRENCY"
+  desc "Validate URLs with all default Crawlscope rules. Args: [url,sitemap,rules]. ENV: URL, SITEMAP, RULES, JS=1, TIMEOUT, NETWORK_IDLE_TIMEOUT, CONCURRENCY, FETCH_EXECUTOR"
   task :validate, [:url, :sitemap, :rules] => :environment do |_task, args|
     Crawlscope::RakeTasks.validate(url: args[:url], sitemap_path: args[:sitemap], rule_names: args[:rules])
   end
diff --git a/perf-improvement.md b/perf-improvement.md
new file mode 100644
index 0000000..6b7dd0d
--- /dev/null
+++ b/perf-improvement.md
@@ -0,0 +1,365 @@
+# Crawlscope Performance Improvement Plans
+
+Date: 2026-06-01
+
+## Executive Summary
+
+Crawlscope is already partially parallel: `Crawlscope::Crawler` uses a
+`Concurrent::FixedThreadPool` to fetch sitemap URLs, and HTTP connections are
+cached per worker thread. The remaining time growth is likely coming from three
+places:
+
+1. Sitemap expansion is recursive and sequential.
+2. Rule execution is sequential after the crawl completes.
+3. The links rule can trigger additional target resolution fetches after the
+   main crawl, also through a sequential rule path.
+
+The best next step is not "parallelize everything." The best next step is a
+bounded async fetch layer that unifies page fetches, child sitemap fetches, and
+link target resolution behind one concurrency budget, while keeping analysis
+mostly deterministic and sequential until rules are explicitly classified as
+parallel-safe.
+
+Recommendation: implement Plan 1 first, then adopt the safe parts of Plan 2.
+Defer Plan 3 until benchmarks prove the graph-level complexity is needed.
+
+## Implementation Update
+
+Plan 1 has been implemented with the deliberate Ruby `>= 3.3` contract change.
+The default executor remains threaded, and `fetch_executor: :async` is now
+available for HTTP crawls through `async-http-faraday`.
+
+The safe follow-on parallelization work is also implemented:
+
+- child sitemap indexes expand through the bounded fetch executor,
+- extra link target fetches are resolved in bounded batches,
+- canonical target checks reuse the link target cache,
+- link extraction and uniqueness summaries use bounded parallel maps.
+
+Measured outcome: async is production-ready, but async alone is not 2x faster
+than the existing threaded executor. The 2x+ improvement comes from applying
+bounded parallelism to previously serial phases, especially child sitemap
+expansion. See `async-performance-assessment.md` for current benchmark numbers.
+
+## Current Implementation Evidence
+
+- `lib/crawlscope/crawler.rb` delegates URL scheduling to
+  `Crawlscope::FetchExecutor`.
+- `lib/crawlscope/http.rb` uses Faraday and can run through the async HTTP
+  adapter for `fetch_executor: :async`.
+- `lib/crawlscope/crawl.rb` currently runs in this order: read sitemap URLs,
+  crawl all pages, collect fetch errors, cache pages, then scan rules.
+- `lib/crawlscope/crawl.rb` calls each rule sequentially in `scan`.
+- `lib/crawlscope/sitemap.rb` recursively expands child sitemaps in sequence.
+- `lib/crawlscope/rules/links.rb` groups links and resolves unique internal
+  targets through the crawl context, which can batch-fetch uncrawled target
+  pages.
+- `lib/crawlscope/issue_collection.rb` is a simple collection around an Array,
+  suitable for current sequential rule execution but not safe as a shared sink
+  for parallel rule writers without synchronization or per-task collections.
+- The gemspec currently requires Ruby `>= 3.3.0`.
+
+## Async Ecosystem Notes
+
+Async is now a mature fiber-scheduler stack, not a speculative experiment. Its
+current docs describe it as a composable asynchronous I/O framework based on
+`io-event`, with lightweight fiber concurrency and multi-thread/process
+containers available when actual parallelism is needed.
+
+Relevant current constraints:
+
+- `async` 2.39.0 was released on April 5, 2026 and requires Ruby `>= 3.3`.
+- `async-http` 0.95.1 was released on May 6, 2026 and requires Ruby `>= 3.3`.
+- `async-http-faraday` 0.22.2 was released on April 10, 2026 and requires Ruby
+  `>= 3.3`.
+- Current Crawlscope now supports Ruby `>= 3.3.0`, so adding the latest Async
+  gems as runtime dependencies is an explicit public compatibility change.
+
+Implication: async work should either:
+
+- bump Crawlscope's Ruby requirement to `>= 3.3` in a planned contract change,
+  with `UPGRADE.md` guidance; or
+- keep the current threaded Faraday path as the default while exposing async as
+  an opt-in executor.
+
+## Plan 1: Bounded Async Fetch Layer
+
+Replace the current fetch executor with a small fetch orchestration abstraction:
+
+- Keep `Crawlscope::Crawler.call(urls)` as the public boundary.
+- Add a `FetchExecutor` interface with threaded and async implementations.
+- Use the threaded implementation as the default.
+- Add an async implementation using `async` and either `async-http` directly or
+  `async-http-faraday`.
+- Apply one shared concurrency budget to:
+  - sitemap URL page fetches,
+  - child sitemap fetches,
+  - link target resolution fetches.
+- Deduplicate normalized URLs before scheduling fetches.
+- Preserve deterministic output by sorting returned pages back to input URL
+  order, or by sorting by normalized URL before rule analysis.
+- Keep rule execution sequential for now.
+
+### Why This Fits The Current Code
+
+The existing code already has a natural page-fetch boundary:
+`Crawler.new(page_fetcher:, concurrency:).call(urls)`. The HTTP fetcher already
+encapsulates transport details. The crawl context already resolves extra link
+targets through one `resolve_target` callable. That means the highest-leverage
+change is to make all URL I/O share a smarter scheduler, not to rewrite every
+rule.
+
+### Expected Measurable Effect
+
+- Wall-clock time should improve most on I/O-heavy crawls with many URLs,
+  redirects, missing pages, or internal links not already in the sitemap.
+- CPU-heavy rule time will not improve much.
+- Memory usage should stay bounded because one concurrency budget controls all
+  network fan-out.
+- Determinism can remain high if page ordering and issue ordering are
+  normalized before reporting.
+
+### Reductio Ad Absurdum
+
+Assume this plan is pushed to the extreme: every discovered URL and link target
+is scheduled immediately with unbounded async tasks. The result is faster only
+on a toy site. On a real site it creates too many open sockets, hits rate
+limits, destabilizes browser mode, and makes failures look nondeterministic.
+
+Therefore the core of the plan cannot be "async means unlimited." It must be
+"async with explicit global and per-host bounds." If that constraint is kept,
+the contradiction disappears and the plan remains valid.
+
+### Implementation Steps
+
+1. Add a benchmark fixture with delayed sitemap pages and delayed link targets.
+2. Extract current thread-pool logic into `Crawlscope::FetchExecutor::Threaded`.
+3. Add a shared URL cache keyed by normalized requested URL and normalized final
+   URL.
+4. Route `Crawler` and `Crawl#resolve` through the shared executor/cache.
+5. Add optional `Crawlscope::FetchExecutor::Async` behind a feature flag or
+   renderer/transport setting.
+6. Keep issue collection and rule execution unchanged.
+7. Document Ruby 3.3 requirements for the async transport and release
+   contract.
+
+## Plan 2: Split Crawl And Analyze Pipeline
+
+Turn the current "crawl everything, then analyze everything" flow into a
+two-phase pipeline:
+
+- Fetch pages concurrently.
+- As each page finishes, run per-page rules immediately:
+  - indexability,
+  - metadata,
+  - structured data,
+  - content quality checks that inspect only one page.
+- Store per-page issues in page-local collections.
+- After all pages finish, run aggregate rules:
+  - uniqueness,
+  - link graph checks,
+  - orphan/internal inlink checks.
+- Merge issue collections in deterministic order.
+
+### Why This Fits The Current Code
+
+Several rules already accept the same broad signature:
+
+```ruby
+rule.call(urls: urls, pages: pages, issues: issues, context: context)
+```
+
+That interface is convenient, but it hides whether a rule is per-page or
+aggregate. Splitting rules by capability would let Crawlscope overlap useful
+analysis with network wait time while keeping global checks accurate.
+
+### Expected Measurable Effect
+
+- Users get earlier partial results in future streaming/reporting modes.
+- Wall-clock time improves when per-page parsing/checking is non-trivial and
+  network latency leaves idle CPU time.
+- Total runtime may improve less than Plan 1 if most time is still network.
+- Implementation risk is moderate because rule contracts need classification.
+
+### Reductio Ad Absurdum
+
+Assume every rule is treated as page-local and run as soon as each page arrives.
+Uniqueness becomes wrong because it needs all pages. Link graph checks become
+wrong because inbound counts need all resolved links. Orphan-page detection can
+report false positives before all links have been seen.
+
+Therefore the plan is only valid if Crawlscope makes rule phase explicit:
+`per_page`, `post_crawl`, or `graph`. Without that classification, faster
+analysis produces faster wrong answers.
+
+### Implementation Steps
+
+1. Add rule metadata, for example `phase: :per_page` or `phase: :aggregate`.
+2. Keep existing rules aggregate by default.
+3. Move only obviously local rules into `:per_page` after focused tests.
+4. Change `IssueCollection` usage to per-task collections, then merge in stable
+   order.
+5. Add tests proving aggregate rules still see the complete page set.
+6. Add timing instrumentation for crawl, per-page analysis, and aggregate
+   analysis.
+
+## Plan 3: Unified Async Crawl Graph
+
+Model the crawl as a bounded async graph rather than a linear crawl:
+
+- Fetch the root sitemap.
+- Fetch child sitemaps concurrently.
+- Schedule page fetches as URLs are discovered.
+- Extract links as pages arrive.
+- Schedule uncrawled internal link targets through the same bounded queue.
+- Run page-local checks as soon as a page arrives.
+- Run graph/global checks after the queue drains.
+- Maintain one canonical `PageStore` and `TargetResolutionStore`.
+
+### Why This Fits The Problem
+
+The slowest large crawls are not just "N sitemap URLs." They are closer to a
+graph:
+
+- a root sitemap points to child sitemaps,
+- pages point to internal targets,
+- redirects create final URLs,
+- rules need both requested and final URL identity.
+
+A graph executor can remove duplicate work and prevent the current shape where
+extra link-target fetches happen later as a second network wave.
+
+### Expected Measurable Effect
+
+- Best theoretical wall-clock reduction for large sites with many child
+  sitemaps and link targets.
+- Better dedupe because sitemap URLs, final URLs, and link targets share one
+  store.
+- More complex failure semantics: cancellation, retries, partial graph state,
+  and stable issue ordering must be designed up front.
+
+### Reductio Ad Absurdum
+
+Assume the crawl graph eagerly follows every internal link, every redirect, and
+every discovered target until no new URL remains. Crawlscope stops being a
+sitemap validation tool and becomes a general web crawler. Runtime becomes
+unbounded, robots/rate-limit behavior becomes harder to reason about, and the
+reported URL set drifts away from the sitemap contract.
+
+Therefore the graph must be constrained by Crawlscope's product boundary:
+sitemap URLs are the primary audit set; extra target resolution exists only to
+validate links from those pages. The graph executor is valid only if it refuses
+to become an unlimited site crawler.
+
+### Implementation Steps
+
+1. Define graph scope formally:
+   - audit URLs from sitemap,
+   - child sitemap URLs,
+   - internal link targets from audited pages,
+   - no recursive full-site expansion unless explicitly configured.
+2. Add `PageStore` and `ResolutionStore` with stable URL normalization.
+3. Add a bounded async work queue with global and per-host limits.
+4. Add cancellation and timeout policy at the graph level.
+5. Add stable result materialization before calling aggregate rules.
+6. Add benchmark coverage for:
+   - many child sitemaps,
+   - many repeated internal links,
+   - redirects,
+   - broken targets,
+   - slow responses.
+
+## Ratings
+
+Scores are 1-5, where 5 is best.
+
+| Aspect | Plan 1: Bounded Async Fetch Layer | Plan 2: Split Crawl/Analyze | Plan 3: Unified Async Crawl Graph |
+| --- | ---: | ---: | ---: |
+| Expected wall-clock improvement | 4 | 3 | 5 |
+| Time to implement | 4 | 3 | 1 |
+| Correctness risk | 4 | 3 | 2 |
+| Deterministic reporting | 4 | 3 | 2 |
+| Public API stability | 4 | 3 | 2 |
+| Ruby 3.3 compatibility path | 4 | 5 | 3 |
+| Async ecosystem leverage | 4 | 2 | 5 |
+| Memory/socket boundability | 4 | 4 | 3 |
+| Testability | 4 | 3 | 2 |
+| Observability potential | 4 | 5 | 5 |
+| Overall score | 40 | 34 | 30 |
+
+## Recommendation
+
+Implement Plan 1 first.
+
+It attacks the most likely bottleneck, keeps the current public shape intact,
+and can be shipped in small steps. It also creates the infrastructure needed by
+the other two plans: a shared fetch executor, URL dedupe, timing metrics, and
+transport abstraction.
+
+Use this sequence:
+
+1. Add timing instrumentation and benchmark fixtures before changing behavior.
+2. Refactor the existing threaded fetcher into an explicit executor.
+3. Share the executor/cache between initial sitemap page fetches and link
+   target resolution.
+4. Add async transport with the deliberate Ruby `>= 3.3` contract change
+   documented in `UPGRADE.md`.
+5. After the fetch layer is measurable, implement the safe subset of Plan 2:
+   classify rules by phase and move only clearly per-page rules into overlapped
+   analysis.
+
+Do not start with Plan 3. It is attractive architecturally, but it creates too
+many new semantics before we have measurements. The reductio failure mode is
+also severe: Crawlscope could quietly become an unbounded crawler instead of a
+sitemap-driven validator.
+
+## Metrics To Track
+
+Add these measurements before and after each step:
+
+- total runtime,
+- sitemap expansion time,
+- audited page fetch time,
+- link target resolution time,
+- per-rule analysis time,
+- total requested URLs,
+- unique fetched URLs,
+- duplicate suppressed URLs,
+- max active fetches,
+- max active fetches per host,
+- socket/open connection count if available,
+- issue count by code,
+- output ordering stability across repeated runs.
+
+## Acceptance Criteria
+
+Plan 1 is successful when:
+
+- a delayed-response benchmark shows reduced wall-clock time at the same issue
+  count,
+- output is stable across repeated runs,
+- `CONCURRENCY=1` preserves current sequential semantics,
+- `CONCURRENCY=N` never schedules more than N active network fetches globally,
+- link target resolution reuses already fetched pages,
+- Ruby 3.3+ users have a working threaded default and an opt-in async executor.
+
+## Sources
+
+- Local implementation: `lib/crawlscope/crawler.rb`,
+  `lib/crawlscope/crawl.rb`, `lib/crawlscope/http.rb`,
+  `lib/crawlscope/sitemap.rb`, `lib/crawlscope/rules/links.rb`,
+  `lib/crawlscope/issue_collection.rb`, `crawlscope.gemspec`.
+- Async docs: https://socketry.github.io/async/
+- Async getting started: https://socketry.github.io/async/guides/getting-started/
+- Async scheduler docs:
+  https://socketry.github.io/async/source/Async/Scheduler/index.html
+- Async::HTTP getting started:
+  https://socketry.github.io/async-http/guides/getting-started/index
+- Async::HTTP::Faraday getting started:
+  https://socketry.github.io/async-http-faraday/guides/getting-started/index.html
+- RubyGems async 2.39.0:
+  https://rubygems.org/gems/async/versions/2.39.0
+- RubyGems async-http 0.95.1:
+  https://rubygems.org/gems/async-http
+- RubyGems async-http-faraday 0.22.2:
+  https://rubygems.org/gems/async-http-faraday/versions/0.22.2
diff --git a/test/crawlscope/cli_test.rb b/test/crawlscope/cli_test.rb
index b520755..a91a92a 100644
--- a/test/crawlscope/cli_test.rb
+++ b/test/crawlscope/cli_test.rb
@@ -4,11 +4,12 @@
 
 class CrawlscopeCliTest < Minitest::Test
   class FakeConfiguration
-    attr_accessor :base_url, :concurrency, :network_idle_timeout_seconds, :output, :renderer, :timeout_seconds
+    attr_accessor :base_url, :concurrency, :fetch_executor, :network_idle_timeout_seconds, :output, :renderer, :timeout_seconds
 
     def initialize
       @base_url = nil
       @concurrency = 10
+      @fetch_executor = :threaded
       @network_idle_timeout_seconds = 5
       @renderer = :http
       @timeout_seconds = 20
@@ -95,7 +96,7 @@ def test_validate_passes_arguments_to_task
     err = StringIO.new
 
     status = Crawlscope::Cli.start(
-      ["validate", "--url", "https://example.com", "--sitemap", "https://example.com/sitemap-pages.xml", "--rules", "metadata,links", "--renderer", "browser", "--timeout", "30", "--network-idle-timeout", "9", "--concurrency", "3"],
+      ["validate", "--url", "https://example.com", "--sitemap", "https://example.com/sitemap-pages.xml", "--rules", "metadata,links", "--renderer", "browser", "--timeout", "30", "--network-idle-timeout", "9", "--concurrency", "3", "--fetch-executor", "async"],
       out: out,
       err: err,
       configuration: configuration,
@@ -115,10 +116,24 @@ def test_validate_passes_arguments_to_task
     assert_equal 30, configuration.timeout_seconds
     assert_equal 9, configuration.network_idle_timeout_seconds
     assert_equal 3, configuration.concurrency
+    assert_equal "async", configuration.fetch_executor
     assert_same out, configuration.output
     assert_empty err.string
   end
 
+  def test_validate_reads_fetch_executor_from_environment
+    configuration = FakeConfiguration.new
+    task = FakeTask.new
+
+    with_env("FETCH_EXECUTOR" => "async") do
+      status = Crawlscope::Cli.start(["validate", "--url", "https://example.com"], out: StringIO.new, err: StringIO.new, configuration: configuration, task: task)
+
+      assert_equal 0, status
+    end
+
+    assert_equal "async", configuration.fetch_executor
+  end
+
   def test_ldjson_reads_urls_from_environment
     configuration = FakeConfiguration.new
     task = FakeTask.new
diff --git a/test/crawlscope/configuration_test.rb b/test/crawlscope/configuration_test.rb
index 31ac7ed..2b5efa2 100644
--- a/test/crawlscope/configuration_test.rb
+++ b/test/crawlscope/configuration_test.rb
@@ -13,6 +13,7 @@ def test_audit_builds_from_configured_callables
       config.sitemap_path = -> { "/tmp/sitemap.xml" }
       config.site_name = -> { "Example" }
       config.concurrency = -> { 4 }
+      config.fetch_executor = -> { :threaded }
     end
 
     audit = Crawlscope.configuration.audit
@@ -20,6 +21,7 @@ def test_audit_builds_from_configured_callables
     assert_equal "https://example.com", audit.instance_variable_get(:@base_url)
     assert_equal "/tmp/sitemap.xml", audit.instance_variable_get(:@sitemap_path)
     assert_equal 4, audit.instance_variable_get(:@concurrency)
+    assert_equal :threaded, audit.instance_variable_get(:@fetch_executor)
     assert_equal %i[
       indexability
       metadata
@@ -55,6 +57,7 @@ def test_defaults_are_normalized
 
     assert_equal [200, 301, 302], config.allowed_statuses
     assert_equal 10, config.concurrency
+    assert_equal :threaded, config.fetch_executor
     assert_equal 4, config.browser_concurrency
     assert_equal 5, config.network_idle_timeout_seconds
     assert_equal :http, config.renderer
@@ -67,6 +70,7 @@ def test_configured_values_are_normalized
     config = Crawlscope::Configuration.new
     config.allowed_statuses = ["200", "404"]
     config.concurrency = "2"
+    config.fetch_executor = "async"
     config.network_idle_timeout_seconds = "7"
     config.renderer = "browser"
     config.timeout_seconds = "9"
@@ -74,6 +78,7 @@ def test_configured_values_are_normalized
 
     assert_equal [200, 404], config.allowed_statuses
     assert_equal 2, config.concurrency
+    assert_equal :async, config.fetch_executor
     assert_equal 2, config.browser_concurrency
     assert_equal 7, config.network_idle_timeout_seconds
     assert_equal :browser, config.renderer
@@ -90,6 +95,15 @@ def test_renderer_must_be_supported
     assert_equal "Crawlscope renderer must be http or browser", error.message
   end
 
+  def test_fetch_executor_must_be_supported
+    config = Crawlscope::Configuration.new
+    config.fetch_executor = "processes"
+
+    error = assert_raises(Crawlscope::ConfigurationError) { config.fetch_executor }
+
+    assert_equal "Crawlscope fetch_executor must be threaded or async", error.message
+  end
+
   def test_numeric_values_must_be_positive_integers
     config = Crawlscope::Configuration.new
     config.concurrency = "0"
diff --git a/test/crawlscope/crawl_test.rb b/test/crawlscope/crawl_test.rb
index 6940107..8b94255 100644
--- a/test/crawlscope/crawl_test.rb
+++ b/test/crawlscope/crawl_test.rb
@@ -3,6 +3,36 @@
 require "test_helper"
 
 class CrawlscopeCrawlTest < Minitest::Test
+  class RecordingExecutor
+    attr_reader :batches
+
+    def initialize
+      @batches = []
+    end
+
+    def call(urls)
+      @batches << urls
+      urls.map { |url| yield(url) }
+    end
+  end
+
+  class PageMapFetcher
+    attr_reader :closed
+
+    def initialize(pages)
+      @pages = pages
+      @closed = false
+    end
+
+    def close
+      @closed = true
+    end
+
+    def fetch(url)
+      @pages.fetch(url)
+    end
+  end
+
   def setup
     @tmp_dir = Dir.mktmpdir
     @sitemap_path = File.join(@tmp_dir, "sitemap.xml")
@@ -100,7 +130,7 @@ def test_collects_metadata_issues_for_invalid_page
       schema_registry: Crawlscope::SchemaRegistry.default
     ).call
 
-    refute result.ok?
+    assert result.ok?
     assert_equal %i[
       incomplete_open_graph_tags
       meta_description_too_long
@@ -189,6 +219,31 @@ def fetch(url)
     assert fake_browser.closed
   end
 
+  def test_async_executor_requires_http_renderer
+    File.write(
+      @sitemap_path,
+      <<~XML
+        <?xml version="1.0" encoding="UTF-8"?>
+        <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+          <url><loc>https://example.com/pricing</loc></url>
+        </urlset>
+      XML
+    )
+
+    error = assert_raises(Crawlscope::ConfigurationError) do
+      Crawlscope::Crawl.new(
+        base_url: "https://example.com",
+        sitemap_path: @sitemap_path,
+        rules: [],
+        schema_registry: Crawlscope::SchemaRegistry.default,
+        renderer: :browser,
+        fetch_executor: :async
+      ).call
+    end
+
+    assert_equal "Async fetch execution is only supported with http rendering", error.message
+  end
+
   def test_reports_sitemap_redirect_url
     File.write(
       @sitemap_path,
@@ -214,4 +269,61 @@ def test_reports_sitemap_redirect_url
 
     assert_includes result.issues.to_a.map(&:code), :sitemap_redirect_url
   end
+
+  def test_resolves_uncrawled_link_targets_as_a_bounded_batch
+    File.write(
+      @sitemap_path,
+      <<~XML
+        <?xml version="1.0" encoding="UTF-8"?>
+        <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+          <url><loc>https://example.com/guide</loc></url>
+        </urlset>
+      XML
+    )
+
+    executor = RecordingExecutor.new
+    fetcher = PageMapFetcher.new(
+      "https://example.com/guide" => page(
+        "https://example.com/guide",
+        "<main><a href=\"/one\">One</a><a href=\"/two\">Two</a></main>"
+      ),
+      "https://example.com/one" => page("https://example.com/one", "<main>One</main>"),
+      "https://example.com/two" => page("https://example.com/two", "<main>Two</main>")
+    )
+
+    Crawlscope::Crawl.new(
+      base_url: "https://example.com",
+      sitemap_path: @sitemap_path,
+      rules: [Crawlscope::Rules::Links.new],
+      schema_registry: Crawlscope::SchemaRegistry.default,
+      renderer: :browser,
+      browser_factory: -> { fetcher },
+      fetch_executor: executor,
+      concurrency: 2
+    ).call
+
+    assert_equal(
+      [
+        ["https://example.com/guide"],
+        ["https://example.com/one", "https://example.com/two"]
+      ],
+      executor.batches
+    )
+    assert fetcher.closed
+  end
+
+  private
+
+  def page(url, body)
+    Crawlscope::Page.new(
+      url: url,
+      normalized_url: url,
+      final_url: url,
+      normalized_final_url: url,
+      status: 200,
+      headers: {"content-type" => "text/html"},
+      body: body,
+      doc: Nokogiri::HTML(body)
+    )
+  end
 end
diff --git a/test/crawlscope/crawler_test.rb b/test/crawlscope/crawler_test.rb
index cce4573..fc7b774 100644
--- a/test/crawlscope/crawler_test.rb
+++ b/test/crawlscope/crawler_test.rb
@@ -31,4 +31,65 @@ def test_returns_error_page_when_fetcher_raises
     assert_nil error_page.status
     assert_equal "Timeout::Error: fetch timed out", error_page.error
   end
+
+  def test_preserves_input_order
+    pages = Crawlscope::Crawler.new(page_fetcher: RaisingFetcher.new, concurrency: 2).call(
+      ["https://example.com/one", "https://example.com/two", "https://example.com/three"]
+    )
+
+    assert_equal(
+      ["https://example.com/one", "https://example.com/two", "https://example.com/three"],
+      pages.map(&:url)
+    )
+  end
+
+  class AsyncFetcher
+    attr_reader :active_fetches
+
+    def initialize
+      @active_fetches = 0
+      @max_active_fetches = 0
+      @mutex = Mutex.new
+    end
+
+    def fetch(url)
+      @mutex.synchronize do
+        @active_fetches += 1
+        @max_active_fetches = [@max_active_fetches, @active_fetches].max
+      end
+
+      Async::Task.current.sleep(0.01)
+
+      Crawlscope::Page.new(
+        url: url,
+        normalized_url: url,
+        final_url: url,
+        normalized_final_url: url,
+        status: 200,
+        headers: {},
+        body: "<html></html>",
+        doc: Nokogiri::HTML("<html></html>")
+      )
+    ensure
+      @mutex.synchronize { @active_fetches -= 1 }
+    end
+
+    def max_active_fetches
+      @mutex.synchronize { @max_active_fetches }
+    end
+  end
+
+  def test_async_executor_respects_concurrency_and_preserves_order
+    fetcher = AsyncFetcher.new
+
+    pages = Crawlscope::Crawler.new(page_fetcher: fetcher, concurrency: 2, fetch_executor: :async).call(
+      ["https://example.com/one", "https://example.com/two", "https://example.com/three"]
+    )
+
+    assert_equal(
+      ["https://example.com/one", "https://example.com/two", "https://example.com/three"],
+      pages.map(&:url)
+    )
+    assert_operator fetcher.max_active_fetches, :<=, 2
+  end
 end
diff --git a/test/crawlscope/links_rule_test.rb b/test/crawlscope/links_rule_test.rb
index 78119ac..c779e72 100644
--- a/test/crawlscope/links_rule_test.rb
+++ b/test/crawlscope/links_rule_test.rb
@@ -218,6 +218,33 @@ def test_reports_canonical_target_link_issues
     assert_includes codes, :canonical_points_to_redirect
   end
 
+  def test_does_not_report_missing_inlinks_for_root_canonical
+    issues = Crawlscope::IssueCollection.new
+    resolver = lambda do |target_url|
+      {crawled: true, error: nil, final_url: target_url, html: true, status: 200}
+    end
+
+    Crawlscope::Rules::Links.new.call(
+      urls: ["https://example.com/", "https://example.com/about"],
+      pages: [
+        page(
+          url: "https://example.com/",
+          body: <<~HTML
+            <html>
+              <head><link rel="canonical" href="https://example.com/"></head>
+              <body><main><a href="/about">About</a></main></body>
+            </html>
+          HTML
+        ),
+        page(url: "https://example.com/about", body: "<main><a href=\"/\">Home</a></main>")
+      ],
+      issues: issues,
+      context: context(resolver: resolver)
+    )
+
+    refute_includes issues.to_a.map(&:code), :canonical_no_internal_inlinks
+  end
+
   def test_reports_indexable_internal_pages_missing_from_sitemap
     issues = Crawlscope::IssueCollection.new
     resolver = lambda do |target_url|
@@ -348,6 +375,32 @@ def test_reports_internal_links_that_redirect
     assert_includes redirect_issue.message, "https://example.com/pricing"
   end
 
+  def test_reuses_link_target_resolution_for_later_link_checks
+    issues = Crawlscope::IssueCollection.new
+    resolution_counts = Hash.new(0)
+    resolver = lambda do |target_url|
+      resolution_counts[target_url] += 1
+      {
+        crawled: false,
+        doc: Nokogiri::HTML("<main>Hidden</main>"),
+        error: nil,
+        final_url: target_url,
+        headers: {},
+        html: true,
+        status: 200
+      }
+    end
+
+    Crawlscope::Rules::Links.new.call(
+      urls: ["https://example.com/guide"],
+      pages: [page(url: "https://example.com/guide", body: "<main><a href=\"/hidden\">Hidden</a></main>")],
+      issues: issues,
+      context: context(resolver: resolver)
+    )
+
+    assert_equal 1, resolution_counts.fetch("https://example.com/hidden")
+  end
+
   def test_ignores_links_that_should_not_be_crawled
     issues = Crawlscope::IssueCollection.new
 
diff --git a/test/crawlscope/reporter_test.rb b/test/crawlscope/reporter_test.rb
index 6a5c3d1..1d7d620 100644
--- a/test/crawlscope/reporter_test.rb
+++ b/test/crawlscope/reporter_test.rb
@@ -23,7 +23,7 @@ def test_reports_ok_result
     refute_includes output, "Status: FAILED"
   end
 
-  def test_reports_failed_result_with_grouped_one_line_issues
+  def test_reports_warning_result_with_grouped_one_line_issues
     io = StringIO.new
     issues = Crawlscope::IssueCollection.new
     4.times do |index|
@@ -54,8 +54,9 @@ def test_reports_failed_result_with_grouped_one_line_issues
 
     output = io.string
 
-    assert_includes output, "Status: FAILED"
-    assert_includes output, "Issues: 5 5 warnings"
+    assert_includes output, "Status: WARNINGS"
+    refute_includes output, "Status: FAILED"
+    assert_includes output, "Issues: 5 total (5 warnings)"
     assert_includes output, "Summary:"
     assert_includes output, "links / low_dofollow_inlinks: 4"
     assert_includes output, "  - /page-1  inbound 1/2  sources: /source-1"
@@ -66,6 +67,85 @@ def test_reports_failed_result_with_grouped_one_line_issues
     refute_includes output, "... 1 more"
   end
 
+  def test_reports_failed_status_when_errors_are_present
+    io = StringIO.new
+    issues = Crawlscope::IssueCollection.new
+    issues.add(code: :fetch_failed, severity: :error, category: :crawl, url: "https://example.com/a", message: "timeout", details: {})
+    issues.add(code: :missing_title, severity: :warning, category: :metadata, url: "https://example.com/a", message: "missing <title>", details: {})
+
+    result = Crawlscope::Result.new(
+      base_url: "https://example.com",
+      sitemap_path: "/tmp/sitemap.xml",
+      urls: ["https://example.com/a"],
+      pages: [Object.new],
+      issues: issues
+    )
+
+    Crawlscope::Reporter.new(io: io).report(result)
+
+    output = io.string
+
+    assert_includes output, "Status: FAILED"
+    assert_includes output, "Issues: 2 total (1 error, 1 warning)"
+  end
+
+  def test_limits_large_issue_groups
+    io = StringIO.new
+    issues = Crawlscope::IssueCollection.new
+    21.times do |index|
+      issues.add(
+        code: :low_dofollow_inlinks,
+        severity: :warning,
+        category: :links,
+        url: "https://example.com/page-#{index + 1}",
+        message: "dofollow inbound links 1 below 2",
+        details: {dofollow_inbound_count: 1, minimum: 2}
+      )
+    end
+
+    result = Crawlscope::Result.new(
+      base_url: "https://example.com",
+      sitemap_path: "/tmp/sitemap.xml",
+      urls: ["https://example.com"],
+      pages: [Object.new],
+      issues: issues
+    )
+
+    Crawlscope::Reporter.new(io: io).report(result)
+
+    output = io.string
+
+    assert_includes output, "links / low_dofollow_inlinks: 21"
+    assert_includes output, "  - /page-20  inbound 1/2"
+    refute_includes output, "  - /page-21"
+    assert_includes output, "  ... 1 more"
+  end
+
+  def test_reports_ratio_with_enough_precision_to_show_threshold_difference
+    io = StringIO.new
+    issues = Crawlscope::IssueCollection.new
+    issues.add(
+      code: :low_unique_token_ratio,
+      severity: :warning,
+      category: :content_quality,
+      url: "https://example.com/a",
+      message: "visible text has low token variety",
+      details: {ratio: 0.249, threshold: 0.25}
+    )
+
+    result = Crawlscope::Result.new(
+      base_url: "https://example.com",
+      sitemap_path: "/tmp/sitemap.xml",
+      urls: ["https://example.com/a"],
+      pages: [Object.new],
+      issues: issues
+    )
+
+    Crawlscope::Reporter.new(io: io).report(result)
+
+    assert_includes io.string, "ratio 0.249/0.250"
+  end
+
   def test_reports_source_details_on_one_line
     io = StringIO.new
     issues = Crawlscope::IssueCollection.new
diff --git a/test/crawlscope/result_test.rb b/test/crawlscope/result_test.rb
new file mode 100644
index 0000000..f8678f9
--- /dev/null
+++ b/test/crawlscope/result_test.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class CrawlscopeResultTest < Minitest::Test
+  def test_ok_when_result_has_warnings_only
+    issues = Crawlscope::IssueCollection.new
+    issues.add(code: :missing_title, severity: :warning, category: :metadata, url: "https://example.com", message: "missing <title>", details: {})
+
+    result = result_with(issues)
+
+    assert result.ok?
+  end
+
+  def test_not_ok_when_result_has_errors
+    issues = Crawlscope::IssueCollection.new
+    issues.add(code: :fetch_failed, severity: :error, category: :crawl, url: "https://example.com", message: "timeout", details: {})
+
+    result = result_with(issues)
+
+    refute result.ok?
+  end
+
+  private
+
+  def result_with(issues)
+    Crawlscope::Result.new(
+      base_url: "https://example.com",
+      sitemap_path: "/tmp/sitemap.xml",
+      urls: ["https://example.com"],
+      pages: [Object.new],
+      issues: issues
+    )
+  end
+end
diff --git a/test/crawlscope/sitemap_test.rb b/test/crawlscope/sitemap_test.rb
index 33c8151..ca83218 100644
--- a/test/crawlscope/sitemap_test.rb
+++ b/test/crawlscope/sitemap_test.rb
@@ -3,6 +3,19 @@
 require "test_helper"
 
 class CrawlscopeSitemapTest < Minitest::Test
+  class RecordingExecutor
+    attr_reader :batches
+
+    def initialize
+      @batches = []
+    end
+
+    def call(items)
+      @batches << items
+      items.map { |item| yield(item) }
+    end
+  end
+
   def test_parses_remote_sitemap_urlset
     stub_request(:get, "https://www.example.com/sitemap.xml")
       .to_return(
@@ -127,4 +140,43 @@ def test_parses_local_sitemap_index_with_absolute_child_sitemap_loc
       assert_equal ["http://localhost:3000/features/reviews"], parser.urls(base_url: "http://localhost:3000")
     end
   end
+
+  def test_child_sitemaps_are_collected_through_the_fetch_executor
+    Dir.mktmpdir do |dir|
+      File.write(
+        File.join(dir, "sitemap.xml"),
+        <<~XML
+          <?xml version="1.0" encoding="UTF-8"?>
+          <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+            <sitemap><loc>first.xml</loc></sitemap>
+            <sitemap><loc>second.xml</loc></sitemap>
+          </sitemapindex>
+        XML
+      )
+      File.write(
+        File.join(dir, "first.xml"),
+        <<~XML
+          <?xml version="1.0" encoding="UTF-8"?>
+          <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+            <url><loc>http://localhost:3000/first</loc></url>
+          </urlset>
+        XML
+      )
+      File.write(
+        File.join(dir, "second.xml"),
+        <<~XML
+          <?xml version="1.0" encoding="UTF-8"?>
+          <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+            <url><loc>http://localhost:3000/second</loc></url>
+          </urlset>
+        XML
+      )
+
+      executor = RecordingExecutor.new
+      parser = Crawlscope::Sitemap.new(path: File.join(dir, "sitemap.xml"), fetch_executor: executor)
+
+      assert_equal ["http://localhost:3000/first", "http://localhost:3000/second"], parser.urls(base_url: "http://localhost:3000")
+      assert_equal [[File.join(dir, "first.xml"), File.join(dir, "second.xml")]], executor.batches
+    end
+  end
 end
diff --git a/test/performance/async_fetch_benchmark.rb b/test/performance/async_fetch_benchmark.rb
new file mode 100644
index 0000000..d2315bd
--- /dev/null
+++ b/test/performance/async_fetch_benchmark.rb
@@ -0,0 +1,127 @@
+# frozen_string_literal: true
+
+$LOAD_PATH.unshift File.expand_path("../../lib", __dir__)
+
+require "bundler/setup"
+require "crawlscope"
+require "json"
+require "socket"
+require "time"
+
+class DelayedHttpServer
+  attr_reader :base_url
+
+  def initialize(page_count:, delay_seconds:)
+    @page_count = page_count
+    @delay_seconds = delay_seconds
+    @server = TCPServer.new("127.0.0.1", 0)
+    @base_url = "http://127.0.0.1:#{@server.addr[1]}"
+    @threads = []
+  end
+
+  def start
+    @thread = Thread.new do
+      loop do
+        socket = @server.accept
+        @threads << Thread.new(socket) { |client| respond(client) }
+      rescue IOError
+        break
+      end
+    end
+  end
+
+  def stop
+    @server.close
+    @thread&.join
+    @threads.each(&:join)
+  end
+
+  private
+
+  def respond(socket)
+    request_line = socket.gets.to_s
+    path = request_line.split[1].to_s
+    read_headers(socket)
+
+    if path == "/sitemap.xml"
+      write_response(socket, sitemap_xml, content_type: "application/xml")
+    else
+      sleep @delay_seconds
+      write_response(socket, page_html(path), content_type: "text/html")
+    end
+  ensure
+    socket.close
+  end
+
+  def read_headers(socket)
+    loop do
+      line = socket.gets
+      break if line.nil? || line == "\r\n"
+    end
+  end
+
+  def sitemap_xml
+    urls = (1..@page_count).map do |index|
+      "<url><loc>#{@base_url}/pages/#{index}</loc></url>"
+    end.join
+
+    %(<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">#{urls}</urlset>)
+  end
+
+  def page_html(path)
+    <<~HTML
+      <html>
+        <head><title>#{path}</title></head>
+        <body><main><h1>#{path}</h1><p>#{path} benchmark page</p></main></body>
+      </html>
+    HTML
+  end
+
+  def write_response(socket, body, content_type:)
+    socket.write "HTTP/1.1 200 OK\r\n"
+    socket.write "Content-Type: #{content_type}\r\n"
+    socket.write "Content-Length: #{body.bytesize}\r\n"
+    socket.write "Connection: close\r\n"
+    socket.write "\r\n"
+    socket.write body
+  end
+end
+
+def measure(name, base_url:, concurrency:, fetch_executor:)
+  started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+
+  result = Crawlscope::Crawl.new(
+    base_url: base_url,
+    sitemap_path: "#{base_url}/sitemap.xml",
+    rules: [],
+    schema_registry: Crawlscope::SchemaRegistry.default,
+    concurrency: concurrency,
+    fetch_executor: fetch_executor
+  ).call
+
+  elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at
+  [name, {seconds: elapsed.round(3), pages: result.pages.size, issues: result.issues.size}]
+end
+
+server = DelayedHttpServer.new(page_count: 24, delay_seconds: 0.08)
+server.start
+
+begin
+  results = {}
+  [
+    measure("threaded_concurrency_1", base_url: server.base_url, concurrency: 1, fetch_executor: :threaded),
+    measure("threaded_concurrency_8", base_url: server.base_url, concurrency: 8, fetch_executor: :threaded),
+    measure("async_concurrency_8", base_url: server.base_url, concurrency: 8, fetch_executor: :async)
+  ].each { |name, result| results[name] = result }
+
+  sequential = results.fetch("threaded_concurrency_1").fetch(:seconds)
+  threaded = results.fetch("threaded_concurrency_8").fetch(:seconds)
+  async = results.fetch("async_concurrency_8").fetch(:seconds)
+
+  abort "async benchmark failed: async was not meaningfully faster than sequential" unless async < sequential * 0.6
+  abort "async benchmark failed: async was more than 2x slower than threaded" if async > threaded * 2.0
+
+  puts JSON.pretty_generate(results)
+ensure
+  server.stop
+end
diff --git a/test/performance/fetch_executor_matrix.rb b/test/performance/fetch_executor_matrix.rb
new file mode 100644
index 0000000..97d579e
--- /dev/null
+++ b/test/performance/fetch_executor_matrix.rb
@@ -0,0 +1,162 @@
+# frozen_string_literal: true
+
+$LOAD_PATH.unshift File.expand_path("../../lib", __dir__)
+
+require "bundler/setup"
+require "crawlscope"
+require "json"
+require "socket"
+
+class MatrixHttpServer
+  attr_reader :base_url
+
+  def initialize(page_count:, delay_seconds:, link_targets: false)
+    @page_count = page_count
+    @delay_seconds = delay_seconds
+    @link_targets = link_targets
+    @server = TCPServer.new("127.0.0.1", 0)
+    @base_url = "http://127.0.0.1:#{@server.addr[1]}"
+    @threads = []
+  end
+
+  def start
+    @thread = Thread.new do
+      loop do
+        socket = @server.accept
+        @threads << Thread.new(socket) { |client| respond(client) }
+      rescue IOError
+        break
+      end
+    end
+  end
+
+  def stop
+    @server.close
+    @thread&.join
+    @threads.each(&:join)
+  end
+
+  private
+
+  def respond(socket)
+    request_line = socket.gets.to_s
+    path = request_line.split[1].to_s
+    read_headers(socket)
+
+    if path == "/sitemap.xml"
+      write_response(socket, sitemap_xml, content_type: "application/xml")
+    else
+      sleep @delay_seconds
+      write_response(socket, page_html(path), content_type: "text/html")
+    end
+  ensure
+    socket.close
+  end
+
+  def read_headers(socket)
+    loop do
+      line = socket.gets
+      break if line.nil? || line == "\r\n"
+    end
+  end
+
+  def sitemap_xml
+    paths = @link_targets ? ["/seed"] : (1..@page_count).map { |index| "/pages/#{index}" }
+    urls = paths.map { |path| "<url><loc>#{@base_url}#{path}</loc></url>" }.join
+
+    %(<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">#{urls}</urlset>)
+  end
+
+  def page_html(path)
+    links = if @link_targets && path == "/seed"
+      (1..@page_count).map { |index| %(<a href="/targets/#{index}">Target #{index}</a>) }.join
+    else
+      ""
+    end
+
+    <<~HTML
+      <html>
+        <head>
+          <title>#{path}</title>
+          <meta name="robots" content="noindex">
+        </head>
+        <body>
+          <main><h1>#{path}</h1><p>#{path} benchmark page</p>#{links}</main>
+        </body>
+      </html>
+    HTML
+  end
+
+  def write_response(socket, body, content_type:)
+    socket.write "HTTP/1.1 200 OK\r\n"
+    socket.write "Content-Type: #{content_type}\r\n"
+    socket.write "Content-Length: #{body.bytesize}\r\n"
+    socket.write "Connection: close\r\n"
+    socket.write "\r\n"
+    socket.write body
+  end
+end
+
+def measure(base_url:, concurrency:, fetch_executor:, rules:)
+  started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+
+  result = Crawlscope::Crawl.new(
+    base_url: base_url,
+    sitemap_path: "#{base_url}/sitemap.xml",
+    rules: rules,
+    schema_registry: Crawlscope::SchemaRegistry.default,
+    concurrency: concurrency,
+    fetch_executor: fetch_executor
+  ).call
+
+  elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at
+  {seconds: elapsed, pages: result.pages.size, issues: result.issues.size}
+end
+
+def median(values)
+  sorted = values.sort
+  sorted[sorted.length / 2]
+end
+
+def run_case(name:, page_count:, delay_seconds:, concurrency:, link_targets: false)
+  server = MatrixHttpServer.new(page_count: page_count, delay_seconds: delay_seconds, link_targets: link_targets)
+  server.start
+
+  rules = link_targets ? [Crawlscope::Rules::Links.new] : []
+  threaded = []
+  async = []
+
+  3.times do
+    threaded << measure(base_url: server.base_url, concurrency: concurrency, fetch_executor: :threaded, rules: rules)
+    async << measure(base_url: server.base_url, concurrency: concurrency, fetch_executor: :async, rules: rules)
+  end
+
+  threaded_seconds = median(threaded.map { |result| result.fetch(:seconds) })
+  async_seconds = median(async.map { |result| result.fetch(:seconds) })
+
+  {
+    name: name,
+    page_count: page_count,
+    delay_seconds: delay_seconds,
+    concurrency: concurrency,
+    link_targets: link_targets,
+    threaded_seconds: threaded_seconds.round(3),
+    async_seconds: async_seconds.round(3),
+    async_vs_threaded: (threaded_seconds / async_seconds).round(2),
+    pages: async.first.fetch(:pages),
+    issues: async.first.fetch(:issues)
+  }
+ensure
+  server&.stop
+end
+
+cases = [
+  {name: "direct_pages_c8", page_count: 48, delay_seconds: 0.02, concurrency: 8},
+  {name: "direct_pages_c16", page_count: 48, delay_seconds: 0.02, concurrency: 16},
+  {name: "slow_direct_pages_c8", page_count: 48, delay_seconds: 0.08, concurrency: 8},
+  {name: "slow_direct_pages_c16", page_count: 48, delay_seconds: 0.08, concurrency: 16},
+  {name: "link_targets_c8", page_count: 48, delay_seconds: 0.02, concurrency: 8, link_targets: true},
+  {name: "slow_link_targets_c8", page_count: 48, delay_seconds: 0.08, concurrency: 8, link_targets: true}
+]
+
+puts JSON.pretty_generate(cases.map { |attributes| run_case(**attributes) })
diff --git a/test/performance/sitemap_expansion_benchmark.rb b/test/performance/sitemap_expansion_benchmark.rb
new file mode 100644
index 0000000..183534f
--- /dev/null
+++ b/test/performance/sitemap_expansion_benchmark.rb
@@ -0,0 +1,121 @@
+# frozen_string_literal: true
+
+$LOAD_PATH.unshift File.expand_path("../../lib", __dir__)
+
+require "bundler/setup"
+require "crawlscope"
+require "json"
+require "socket"
+
+class DelayedSitemapServer
+  attr_reader :base_url
+
+  def initialize(child_count:, delay_seconds:)
+    @child_count = child_count
+    @delay_seconds = delay_seconds
+    @server = TCPServer.new("127.0.0.1", 0)
+    @base_url = "http://127.0.0.1:#{@server.addr[1]}"
+    @threads = []
+  end
+
+  def start
+    @thread = Thread.new do
+      loop do
+        socket = @server.accept
+        @threads << Thread.new(socket) { |client| respond(client) }
+      rescue IOError
+        break
+      end
+    end
+  end
+
+  def stop
+    @server.close
+    @thread&.join
+    @threads.each(&:join)
+  end
+
+  private
+
+  def respond(socket)
+    request_line = socket.gets.to_s
+    path = request_line.split[1].to_s
+    read_headers(socket)
+
+    if path == "/sitemap.xml"
+      write_response(socket, sitemap_index, content_type: "application/xml")
+    else
+      sleep @delay_seconds
+      write_response(socket, child_sitemap(path), content_type: "application/xml")
+    end
+  ensure
+    socket.close
+  end
+
+  def read_headers(socket)
+    loop do
+      line = socket.gets
+      break if line.nil? || line == "\r\n"
+    end
+  end
+
+  def sitemap_index
+    children = (1..@child_count).map do |index|
+      "<sitemap><loc>#{@base_url}/sitemaps/#{index}.xml</loc></sitemap>"
+    end.join
+
+    %(<?xml version="1.0" encoding="UTF-8"?><sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">#{children}</sitemapindex>)
+  end
+
+  def child_sitemap(path)
+    index = File.basename(path, ".xml")
+
+    %(<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><url><loc>#{@base_url}/pages/#{index}</loc></url></urlset>)
+  end
+
+  def write_response(socket, body, content_type:)
+    socket.write "HTTP/1.1 200 OK\r\n"
+    socket.write "Content-Type: #{content_type}\r\n"
+    socket.write "Content-Length: #{body.bytesize}\r\n"
+    socket.write "Connection: close\r\n"
+    socket.write "\r\n"
+    socket.write body
+  end
+end
+
+def measure(name, base_url:, concurrency:, fetch_executor:)
+  started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+
+  urls = Crawlscope::Sitemap.new(
+    path: "#{base_url}/sitemap.xml",
+    concurrency: concurrency,
+    fetch_executor: fetch_executor,
+    timeout_seconds: 5
+  ).urls(base_url: base_url)
+
+  elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at
+  [name, {seconds: elapsed.round(3), urls: urls.size}]
+end
+
+server = DelayedSitemapServer.new(child_count: 24, delay_seconds: 0.08)
+server.start
+
+begin
+  results = {}
+  [
+    measure("threaded_concurrency_1", base_url: server.base_url, concurrency: 1, fetch_executor: :threaded),
+    measure("threaded_concurrency_8", base_url: server.base_url, concurrency: 8, fetch_executor: :threaded),
+    measure("async_concurrency_8", base_url: server.base_url, concurrency: 8, fetch_executor: :async)
+  ].each { |name, result| results[name] = result }
+
+  sequential = results.fetch("threaded_concurrency_1").fetch(:seconds)
+  threaded = results.fetch("threaded_concurrency_8").fetch(:seconds)
+  async = results.fetch("async_concurrency_8").fetch(:seconds)
+
+  abort "sitemap benchmark failed: threaded parallelism was not at least 2x faster" unless threaded < sequential * 0.5
+  abort "sitemap benchmark failed: async parallelism was not at least 2x faster" unless async < sequential * 0.5
+
+  puts JSON.pretty_generate(results)
+ensure
+  server.stop
+end

From 1d814fb3b28113f357467837a5c983fef73d78f8 Mon Sep 17 00:00:00 2001
From: Paulo Fidalgo <paulo.fidalgo.pt@gmail.com>
Date: Mon, 1 Jun 2026 23:14:50 +0100
Subject: [PATCH 2/3] refactor: default HTTP crawling to async

Make async the default executor for HTTP crawls while preserving threaded execution for browser rendering and explicit FETCH_EXECUTOR=threaded overrides.

Centralize bounded ordered mapping under FetchExecutor.map and remove duplicated rule-level parallel_map helpers from links and uniqueness checks.

Verification: bundle exec rake test; bundle exec rake standard; mise exec ruby@3.4.8 -- bundle exec rake test; mise exec ruby@3.4.8 -- bundle exec rake standard; bundle exec rake crawlscope:validate URL=http://localhost:3000
---
 README.md                                 |  9 ++---
 UPGRADE.md                                |  9 ++---
 async-performance-assessment.md           |  5 +--
 lib/crawlscope/cli.rb                     |  9 ++++-
 lib/crawlscope/configuration.rb           |  6 ++--
 lib/crawlscope/crawl.rb                   |  8 +++--
 lib/crawlscope/fetch_executor.rb          |  7 ++++
 lib/crawlscope/fetch_executor/async.rb    | 12 +++----
 lib/crawlscope/fetch_executor/threaded.rb | 14 ++++----
 lib/crawlscope/rules/links.rb             | 12 +++----
 lib/crawlscope/rules/uniqueness.rb        | 12 +++----
 perf-improvement.md                       |  5 +--
 test/crawlscope/cli_test.rb               | 13 ++++++-
 test/crawlscope/configuration_test.rb     |  9 ++++-
 test/crawlscope/crawl_test.rb             | 32 +++++++++++++++--
 test/crawlscope/fetch_executor_test.rb    | 44 +++++++++++++++++++++++
 16 files changed, 157 insertions(+), 49 deletions(-)
 create mode 100644 test/crawlscope/fetch_executor_test.rb

diff --git a/README.md b/README.md
index c338f15..2566bbd 100644
--- a/README.md
+++ b/README.md
@@ -199,10 +199,11 @@ bundle exec rake 'crawlscope:validate:ldjson[https://example.com/article]'
 Plain `rake` does not pass `--url` style flags to tasks. Use `URL=...` or the
 task-argument form above instead.
 
-`FETCH_EXECUTOR=threaded` is the default and works with Crawlscope's required
-runtime dependencies. `FETCH_EXECUTOR=async` uses Ruby's fiber scheduler and
-Async::HTTP through Faraday, preserving the same `CONCURRENCY` bound. The CLI
-accepts the same setting with `--fetch-executor async`.
+`FETCH_EXECUTOR=async` is the default for HTTP crawling. It uses Ruby's fiber
+scheduler and Async::HTTP through Faraday, preserving the same `CONCURRENCY`
+bound. Use `FETCH_EXECUTOR=threaded` or `--fetch-executor threaded` for the
+thread-pool executor. Browser rendering uses the threaded executor by default
+because async fetch execution is only supported with HTTP rendering.
 
 `crawlscope:validate:ldjson` is separate because it directly checks the URL or semicolon-separated URLs in `URL`; it does not crawl the sitemap. Without `URL`, it checks the configured base URL, falling back to `http://localhost:3000`.
 
diff --git a/UPGRADE.md b/UPGRADE.md
index bbc6f6a..f3c9415 100644
--- a/UPGRADE.md
+++ b/UPGRADE.md
@@ -15,7 +15,8 @@ Recommended migration:
 
 1. Upgrade the host application runtime to Ruby 3.3 or newer.
 2. Run `bundle update crawlscope async async-http async-http-faraday`.
-3. Keep the default `FETCH_EXECUTOR=threaded` for the first deploy if you want
-   a conservative rollout.
-4. Enable async fetching with `FETCH_EXECUTOR=async` or
-   `--fetch-executor async` after the app is running on the new Ruby version.
+3. Crawlscope now uses `FETCH_EXECUTOR=async` by default for HTTP crawling.
+4. Set `FETCH_EXECUTOR=threaded` or pass `--fetch-executor threaded` for a
+   conservative rollout or for explicit thread-pool execution.
+5. Browser rendering continues to use threaded execution by default because
+   async fetch execution is only supported with HTTP rendering.
diff --git a/async-performance-assessment.md b/async-performance-assessment.md
index e38b1a7..ef7c30c 100644
--- a/async-performance-assessment.md
+++ b/async-performance-assessment.md
@@ -149,8 +149,9 @@ Position it as:
 - potentially better at higher concurrency with lower thread pressure,
 - equivalent-to-slightly-faster than threaded for the measured local workloads.
 
-If the product goal is a reliable 2x improvement over current default threaded
-crawls, the next performance work should not be "more async." It should target:
+If the product goal is a reliable 2x improvement over the previous threaded
+baseline, the next performance work should not be "more async." It should
+target:
 
 1. persistent fetch/result caching across rule phases,
 2. optional higher concurrency with per-host rate limits,
diff --git a/lib/crawlscope/cli.rb b/lib/crawlscope/cli.rb
index dbd2ec4..638db9a 100644
--- a/lib/crawlscope/cli.rb
+++ b/lib/crawlscope/cli.rb
@@ -134,6 +134,7 @@ def run_validate
 
       configure_renderer(resolved_renderer)
       @configuration.concurrency = resolved_concurrency
+      fetch_executor_configured = !normalized_string(ENV["FETCH_EXECUTOR"]).nil?
       @configuration.fetch_executor = resolved_fetch_executor
       @configuration.network_idle_timeout_seconds = resolved_integer("NETWORK_IDLE_TIMEOUT", default: @configuration.network_idle_timeout_seconds, minimum: 1)
       @configuration.timeout_seconds = resolved_integer("TIMEOUT", default: @configuration.timeout_seconds, minimum: 1)
@@ -170,11 +171,13 @@ def run_validate
         end
 
         opts.on("--fetch-executor NAME", "Use threaded or async fetch execution") do |value|
+          fetch_executor_configured = true
           @configuration.fetch_executor = value
         end
       end
 
       parser.parse!(@argv)
+      @configuration.fetch_executor = :threaded if @configuration.renderer == :browser && !fetch_executor_configured
 
       result = task.validate(
         base_url: options[:url],
@@ -227,7 +230,11 @@ def resolved_concurrency
     end
 
     def resolved_fetch_executor
-      normalized_string(ENV["FETCH_EXECUTOR"]) || @configuration.fetch_executor
+      configured_executor = normalized_string(ENV["FETCH_EXECUTOR"])
+      return configured_executor if configured_executor
+      return :threaded if @configuration.renderer == :browser
+
+      @configuration.fetch_executor
     end
 
     def resolved_integer(name, default:, minimum:)
diff --git a/lib/crawlscope/configuration.rb b/lib/crawlscope/configuration.rb
index afb3e8b..76df814 100644
--- a/lib/crawlscope/configuration.rb
+++ b/lib/crawlscope/configuration.rb
@@ -7,7 +7,7 @@ class Configuration
     DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS = 5
     DEFAULT_BROWSER_SCROLL_PAGE = true
     DEFAULT_CONCURRENCY = 10
-    DEFAULT_FETCH_EXECUTOR = :threaded
+    DEFAULT_FETCH_EXECUTOR = :async
     RENDERERS = %i[http browser].freeze
     DEFAULT_TIMEOUT_SECONDS = 20
 
@@ -33,7 +33,9 @@ def concurrency
 
     def fetch_executor
       value = resolve(@fetch_executor)
-      FetchExecutor.normalize(value.nil? ? DEFAULT_FETCH_EXECUTOR : value)
+      default = (renderer == :browser) ? :threaded : DEFAULT_FETCH_EXECUTOR
+
+      FetchExecutor.normalize(value.nil? ? default : value)
     end
 
     def browser_concurrency
diff --git a/lib/crawlscope/crawl.rb b/lib/crawlscope/crawl.rb
index 26fd1da..4bb42dc 100644
--- a/lib/crawlscope/crawl.rb
+++ b/lib/crawlscope/crawl.rb
@@ -2,16 +2,16 @@
 
 module Crawlscope
   class Crawl
-    def initialize(base_url:, sitemap_path:, rules:, schema_registry:, browser_factory: nil, concurrency: Configuration::DEFAULT_CONCURRENCY, fetch_executor: Configuration::DEFAULT_FETCH_EXECUTOR, network_idle_timeout_seconds: Configuration::DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS, renderer: :http, scroll_page: Configuration::DEFAULT_BROWSER_SCROLL_PAGE, timeout_seconds: Configuration::DEFAULT_TIMEOUT_SECONDS, allowed_statuses: Configuration::DEFAULT_ALLOWED_STATUSES)
+    def initialize(base_url:, sitemap_path:, rules:, schema_registry:, browser_factory: nil, concurrency: Configuration::DEFAULT_CONCURRENCY, fetch_executor: nil, network_idle_timeout_seconds: Configuration::DEFAULT_BROWSER_NETWORK_IDLE_TIMEOUT_SECONDS, renderer: :http, scroll_page: Configuration::DEFAULT_BROWSER_SCROLL_PAGE, timeout_seconds: Configuration::DEFAULT_TIMEOUT_SECONDS, allowed_statuses: Configuration::DEFAULT_ALLOWED_STATUSES)
       @base_url = base_url
       @sitemap_path = sitemap_path
       @rules = Array(rules)
       @schema_registry = schema_registry
       @browser_factory = browser_factory
       @concurrency = concurrency
-      @fetch_executor = fetch_executor
       @network_idle_timeout_seconds = network_idle_timeout_seconds
       @renderer = renderer.to_sym
+      @fetch_executor = fetch_executor || default_fetch_executor
       @scroll_page = scroll_page
       @timeout_seconds = timeout_seconds
       @allowed_statuses = allowed_statuses
@@ -88,6 +88,10 @@ def validate_fetch_executor!
       raise ConfigurationError, "Async fetch execution is only supported with http rendering"
     end
 
+    def default_fetch_executor
+      (@renderer == :browser) ? :threaded : Configuration::DEFAULT_FETCH_EXECUTOR
+    end
+
     def context
       Context.new(
         allowed_statuses: @allowed_statuses,
diff --git a/lib/crawlscope/fetch_executor.rb b/lib/crawlscope/fetch_executor.rb
index 50b2980..c462a71 100644
--- a/lib/crawlscope/fetch_executor.rb
+++ b/lib/crawlscope/fetch_executor.rb
@@ -17,6 +17,13 @@ def build(name:, concurrency:)
       end
     end
 
+    def map(name:, concurrency:, items:, &block)
+      items = Array(items)
+      return items.map(&block) if items.size < 2 || concurrency.to_i <= 1
+
+      build(name: name, concurrency: concurrency).call(items, &block)
+    end
+
     def normalize(name)
       return name if name.respond_to?(:call)
 
diff --git a/lib/crawlscope/fetch_executor/async.rb b/lib/crawlscope/fetch_executor/async.rb
index fc7cfdb..5bdf563 100644
--- a/lib/crawlscope/fetch_executor/async.rb
+++ b/lib/crawlscope/fetch_executor/async.rb
@@ -10,22 +10,22 @@ def initialize(concurrency:)
         @concurrency = concurrency
       end
 
-      def call(urls)
-        indexed_urls = Array(urls).each_with_index.to_a
-        pages = Array.new(indexed_urls.size)
+      def call(items)
+        indexed_items = Array(items).each_with_index.to_a
+        results = Array.new(indexed_items.size)
 
         Sync do |parent|
           semaphore = ::Async::Semaphore.new(@concurrency)
-          tasks = indexed_urls.map do |url, index|
+          tasks = indexed_items.map do |item, index|
             semaphore.async(parent: parent) do
-              pages[index] = yield(url)
+              results[index] = yield(item)
             end
           end
 
           tasks.each(&:wait)
         end
 
-        pages
+        results
       end
     end
   end
diff --git a/lib/crawlscope/fetch_executor/threaded.rb b/lib/crawlscope/fetch_executor/threaded.rb
index e9410f8..d819104 100644
--- a/lib/crawlscope/fetch_executor/threaded.rb
+++ b/lib/crawlscope/fetch_executor/threaded.rb
@@ -9,23 +9,23 @@ def initialize(concurrency:)
         @concurrency = concurrency
       end
 
-      def call(urls)
-        indexed_urls = Array(urls).each_with_index.to_a
-        pages = Array.new(indexed_urls.size)
+      def call(items)
+        indexed_items = Array(items).each_with_index.to_a
+        results = Array.new(indexed_items.size)
         mutex = Mutex.new
         pool = Concurrent::FixedThreadPool.new(@concurrency)
 
-        indexed_urls.each do |url, index|
+        indexed_items.each do |item, index|
           pool.post do
-            page = yield(url)
-            mutex.synchronize { pages[index] = page }
+            result = yield(item)
+            mutex.synchronize { results[index] = result }
           end
         end
 
         pool.shutdown
         pool.wait_for_termination
 
-        pages
+        results
       end
     end
   end
diff --git a/lib/crawlscope/rules/links.rb b/lib/crawlscope/rules/links.rb
index d725853..bc14b78 100644
--- a/lib/crawlscope/rules/links.rb
+++ b/lib/crawlscope/rules/links.rb
@@ -48,7 +48,11 @@ def contextual_links(doc)
 
       def extract_links(pages)
         html_pages = pages.select(&:html?)
-        parallel_map(html_pages) { |page| page_links(page) }.flatten
+        FetchExecutor.map(
+          name: @fetch_executor,
+          concurrency: @concurrency,
+          items: html_pages
+        ) { |page| page_links(page) }.flatten
       end
 
       def page_links(page)
@@ -629,12 +633,6 @@ def context_value(context, name, default: nil)
           default
         end
       end
-
-      def parallel_map(items, &block)
-        return items.map(&block) if items.size < 2 || @fetch_executor.nil? || @concurrency.to_i <= 1
-
-        FetchExecutor.build(name: @fetch_executor, concurrency: @concurrency).call(items, &block)
-      end
     end
   end
 end
diff --git a/lib/crawlscope/rules/uniqueness.rb b/lib/crawlscope/rules/uniqueness.rb
index baa43a6..3a3dc07 100644
--- a/lib/crawlscope/rules/uniqueness.rb
+++ b/lib/crawlscope/rules/uniqueness.rb
@@ -68,7 +68,11 @@ def summary_for(page)
       def summarize_pages(pages)
         html_pages = pages.select(&:html?)
 
-        parallel_map(html_pages) { |page| summary_for(page) }
+        FetchExecutor.map(
+          name: @fetch_executor,
+          concurrency: @concurrency,
+          items: html_pages
+        ) { |page| summary_for(page) }
       end
 
       def validate_duplicates(page_summaries, issues)
@@ -192,12 +196,6 @@ def context_value(context, name, default: nil)
           default
         end
       end
-
-      def parallel_map(items, &block)
-        return items.map(&block) if items.size < 2 || @fetch_executor.nil? || @concurrency.to_i <= 1
-
-        FetchExecutor.build(name: @fetch_executor, concurrency: @concurrency).call(items, &block)
-      end
     end
   end
 end
diff --git a/perf-improvement.md b/perf-improvement.md
index 6b7dd0d..1b5482f 100644
--- a/perf-improvement.md
+++ b/perf-improvement.md
@@ -26,8 +26,9 @@ Defer Plan 3 until benchmarks prove the graph-level complexity is needed.
 ## Implementation Update
 
 Plan 1 has been implemented with the deliberate Ruby `>= 3.3` contract change.
-The default executor remains threaded, and `fetch_executor: :async` is now
-available for HTTP crawls through `async-http-faraday`.
+The default executor is now async for HTTP crawls through
+`async-http-faraday`, with `fetch_executor: :threaded` retained as an explicit
+fallback.
 
 The safe follow-on parallelization work is also implemented:
 
diff --git a/test/crawlscope/cli_test.rb b/test/crawlscope/cli_test.rb
index a91a92a..1a33968 100644
--- a/test/crawlscope/cli_test.rb
+++ b/test/crawlscope/cli_test.rb
@@ -9,7 +9,7 @@ class FakeConfiguration
     def initialize
       @base_url = nil
       @concurrency = 10
-      @fetch_executor = :threaded
+      @fetch_executor = :async
       @network_idle_timeout_seconds = 5
       @renderer = :http
       @timeout_seconds = 20
@@ -134,6 +134,17 @@ def test_validate_reads_fetch_executor_from_environment
     assert_equal "async", configuration.fetch_executor
   end
 
+  def test_validate_uses_threaded_executor_for_browser_rendering_by_default
+    configuration = FakeConfiguration.new
+    task = FakeTask.new
+
+    status = Crawlscope::Cli.start(["validate", "--url", "https://example.com", "--renderer", "browser"], out: StringIO.new, err: StringIO.new, configuration: configuration, task: task)
+
+    assert_equal 0, status
+    assert_equal :browser, configuration.renderer
+    assert_equal :threaded, configuration.fetch_executor
+  end
+
   def test_ldjson_reads_urls_from_environment
     configuration = FakeConfiguration.new
     task = FakeTask.new
diff --git a/test/crawlscope/configuration_test.rb b/test/crawlscope/configuration_test.rb
index 2b5efa2..48d490c 100644
--- a/test/crawlscope/configuration_test.rb
+++ b/test/crawlscope/configuration_test.rb
@@ -57,7 +57,7 @@ def test_defaults_are_normalized
 
     assert_equal [200, 301, 302], config.allowed_statuses
     assert_equal 10, config.concurrency
-    assert_equal :threaded, config.fetch_executor
+    assert_equal :async, config.fetch_executor
     assert_equal 4, config.browser_concurrency
     assert_equal 5, config.network_idle_timeout_seconds
     assert_equal :http, config.renderer
@@ -66,6 +66,13 @@ def test_defaults_are_normalized
     assert config.scroll_page?
   end
 
+  def test_browser_renderer_defaults_to_threaded_fetch_executor
+    config = Crawlscope::Configuration.new
+    config.renderer = :browser
+
+    assert_equal :threaded, config.fetch_executor
+  end
+
   def test_configured_values_are_normalized
     config = Crawlscope::Configuration.new
     config.allowed_statuses = ["200", "404"]
diff --git a/test/crawlscope/crawl_test.rb b/test/crawlscope/crawl_test.rb
index 8b94255..353cebd 100644
--- a/test/crawlscope/crawl_test.rb
+++ b/test/crawlscope/crawl_test.rb
@@ -42,6 +42,29 @@ def teardown
     FileUtils.rm_rf(@tmp_dir)
   end
 
+  def test_http_renderer_defaults_to_async_executor
+    crawl = Crawlscope::Crawl.new(
+      base_url: "https://example.com",
+      sitemap_path: @sitemap_path,
+      rules: [],
+      schema_registry: Crawlscope::SchemaRegistry.default
+    )
+
+    assert_equal :async, crawl.instance_variable_get(:@fetch_executor)
+  end
+
+  def test_browser_renderer_defaults_to_threaded_executor
+    crawl = Crawlscope::Crawl.new(
+      base_url: "https://example.com",
+      sitemap_path: @sitemap_path,
+      rules: [],
+      schema_registry: Crawlscope::SchemaRegistry.default,
+      renderer: :browser
+    )
+
+    assert_equal :threaded, crawl.instance_variable_get(:@fetch_executor)
+  end
+
   def test_returns_ok_when_metadata_is_valid
     File.write(
       @sitemap_path,
@@ -86,7 +109,8 @@ def test_returns_ok_when_metadata_is_valid
       base_url: "https://example.com",
       sitemap_path: @sitemap_path,
       rules: Crawlscope::RuleRegistry.default(site_name: "Example").rules,
-      schema_registry: Crawlscope::SchemaRegistry.default
+      schema_registry: Crawlscope::SchemaRegistry.default,
+      fetch_executor: :threaded
     ).call
 
     assert result.ok?
@@ -127,7 +151,8 @@ def test_collects_metadata_issues_for_invalid_page
       base_url: "https://example.com",
       sitemap_path: @sitemap_path,
       rules: Crawlscope::RuleRegistry.default(site_name: "Example").rules,
-      schema_registry: Crawlscope::SchemaRegistry.default
+      schema_registry: Crawlscope::SchemaRegistry.default,
+      fetch_executor: :threaded
     ).call
 
     assert result.ok?
@@ -264,7 +289,8 @@ def test_reports_sitemap_redirect_url
       base_url: "https://example.com",
       sitemap_path: @sitemap_path,
       rules: [],
-      schema_registry: Crawlscope::SchemaRegistry.default
+      schema_registry: Crawlscope::SchemaRegistry.default,
+      fetch_executor: :threaded
     ).call
 
     assert_includes result.issues.to_a.map(&:code), :sitemap_redirect_url
diff --git a/test/crawlscope/fetch_executor_test.rb b/test/crawlscope/fetch_executor_test.rb
new file mode 100644
index 0000000..65873e4
--- /dev/null
+++ b/test/crawlscope/fetch_executor_test.rb
@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+
+require "test_helper"
+
+class CrawlscopeFetchExecutorTest < Minitest::Test
+  class RecordingExecutor
+    attr_reader :items
+
+    def call(items)
+      @items = items
+      items.map { |item| yield(item) }
+    end
+  end
+
+  def test_map_preserves_input_order
+    results = Crawlscope::FetchExecutor.map(name: :threaded, concurrency: 2, items: [3, 1, 2]) do |item|
+      item * 10
+    end
+
+    assert_equal [30, 10, 20], results
+  end
+
+  def test_map_uses_sequential_fallback_for_single_item
+    executor = RecordingExecutor.new
+
+    results = Crawlscope::FetchExecutor.map(name: executor, concurrency: 4, items: ["one"]) do |item|
+      item.upcase
+    end
+
+    assert_equal ["ONE"], results
+    assert_nil executor.items
+  end
+
+  def test_map_uses_injected_executor_for_parallel_work
+    executor = RecordingExecutor.new
+
+    results = Crawlscope::FetchExecutor.map(name: executor, concurrency: 4, items: %w[a b]) do |item|
+      item.upcase
+    end
+
+    assert_equal %w[A B], results
+    assert_equal %w[a b], executor.items
+  end
+end

From 20a59ab59c4853229c5beb89c8be8f2f72e4ace3 Mon Sep 17 00:00:00 2001
From: Paulo Fidalgo <paulo.fidalgo.pt@gmail.com>
Date: Mon, 1 Jun 2026 23:20:51 +0100
Subject: [PATCH 3/3] chore: update Ruby CI matrix

Drop Ruby 3.2 from the CI matrix now that Crawlscope requires Ruby 3.3 or newer for the async runtime dependencies.
---
 .github/workflows/ruby.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml
index 37ca9fe..1ec5062 100644
--- a/.github/workflows/ruby.yml
+++ b/.github/workflows/ruby.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        ruby-version: ["3.2", "3.4"]
+        ruby-version: ["3.3", "3.4", "4.0.3"]
 
     steps:
       - uses: actions/checkout@v4