diff --git a/CHANGELOG.md b/CHANGELOG.md index d0d917d..069c369 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Changelog +## 0.7.0 (2026-04-21) + +### Breaking changes + +- **`:adapter_error` removed from `DEFAULT_RETRY_ON`.** New default: `[:validation_failed, :parse_error]`. `ruby_llm` already retries transport errors (`RateLimitError`, `ServerError`, `ServiceUnavailableError`, `OverloadedError`, timeouts) at the Faraday layer, so the previous default re-ran the same model on errors the HTTP middleware already retried with backoff. To restore pre-0.7 behavior: `retry_on :validation_failed, :parse_error, :adapter_error`. Recommended pattern: pair `:adapter_error` with `escalate "model_a", "model_b"` — a different model/provider can bypass what transport retry could not. +- **`AdapterCaller` narrows `rescue` from `StandardError` to `RubyLLM::Error` + `Faraday::Error`.** Provider errors and transport errors that escape ruby_llm's Faraday retry middleware (`Faraday::TimeoutError`, `Faraday::ConnectionFailed`) still produce `:adapter_error` as before. Programmer errors that are neither (`NoMethodError`, adapter code bugs) now propagate instead of being silently converted to `:adapter_error` and retried. **Known limitation:** adapter code raising `ArgumentError` is still coerced into `:input_error` by `Step::Base#run_once` (which rescues `ArgumentError` for input-type validation). Disambiguating adapter-ArgumentError vs input-validation-ArgumentError requires a `run_once` refactor and is tracked as a follow-up. + +### Migration + +If you rely on the old behavior, opt in explicitly: + +```ruby +retry_policy do + attempts 3 + retry_on :validation_failed, :parse_error, :adapter_error +end +``` + +Or better, with a model fallback chain: + +```ruby +retry_policy do + escalate "gpt-4.1-nano", "gpt-4.1-mini" + retry_on :validation_failed, :parse_error, :adapter_error +end +``` + ## 0.6.4 (2026-04-20) ### Features diff --git a/Gemfile.lock b/Gemfile.lock index 5c17dd9..68c1daa 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - ruby_llm-contract (0.6.4) + ruby_llm-contract (0.7.0) dry-types (~> 1.7) ruby_llm (~> 1.0) ruby_llm-schema (~> 0.3) @@ -258,7 +258,7 @@ CHECKSUMS rubocop-ast (1.49.1) sha256=4412f3ee70f6fe4546cc489548e0f6fcf76cafcfa80fa03af67098ffed755035 ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33 ruby_llm (1.14.0) sha256=57c6f7034fc4a44504ea137d70f853b07824f1c1cdbe774ab3ab3522e7098deb - ruby_llm-contract (0.6.4) + ruby_llm-contract (0.7.0) ruby_llm-schema (0.3.0) sha256=a591edc5ca1b7f0304f0e2261de61ba4b3bea17be09f5cf7558153adfda3dec6 ruby_parser (3.22.0) sha256=1eb4937cd9eb220aa2d194e352a24dba90aef00751e24c8dfffdb14000f15d23 rubycritic (4.12.0) sha256=024fed90fe656fa939f6ea80aab17569699ac3863d0b52fd72cb99892247abc8 diff --git a/lib/ruby_llm/contract/step/adapter_caller.rb b/lib/ruby_llm/contract/step/adapter_caller.rb index f4809cd..19a4b15 100644 --- a/lib/ruby_llm/contract/step/adapter_caller.rb +++ b/lib/ruby_llm/contract/step/adapter_caller.rb @@ -1,9 +1,20 @@ # frozen_string_literal: true +require "faraday" + module RubyLLM module Contract module Step class AdapterCaller + # Exceptions treated as :adapter_error (retryable when explicitly opted in). + # RubyLLM::Error covers provider-semantic errors (auth, bad request, + # rate limit, server error, context length). Faraday::Error covers + # transport failures that escape ruby_llm's Faraday retry middleware + # after exhaustion (Faraday::TimeoutError, Faraday::ConnectionFailed). + # Anything else (NoMethodError, programmer ArgumentError from adapter + # code, etc.) propagates — those are bugs, not retry candidates. + ADAPTER_ERRORS = [::RubyLLM::Error, ::Faraday::Error].freeze + def initialize(adapter:, adapter_options:) @adapter = adapter @adapter_options = adapter_options @@ -14,8 +25,14 @@ def call(messages) response = @adapter.call(messages: messages, **@adapter_options) latency_ms = ((Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000).round [response, latency_ms] - rescue StandardError => error - [Result.new(status: :adapter_error, raw_output: nil, parsed_output: nil, validation_errors: [error.message]), 0] + rescue *ADAPTER_ERRORS => e + result = Result.new( + status: :adapter_error, + raw_output: nil, + parsed_output: nil, + validation_errors: [e.message] + ) + [result, 0] end end end diff --git a/lib/ruby_llm/contract/step/retry_policy.rb b/lib/ruby_llm/contract/step/retry_policy.rb index 17e8ce2..d99c4fc 100644 --- a/lib/ruby_llm/contract/step/retry_policy.rb +++ b/lib/ruby_llm/contract/step/retry_policy.rb @@ -6,7 +6,14 @@ module Step class RetryPolicy attr_reader :max_attempts, :retryable_statuses - DEFAULT_RETRY_ON = %i[validation_failed parse_error adapter_error].freeze + # Breaking (0.7.0): :adapter_error removed from defaults. ruby_llm's Faraday + # middleware already retries transport errors (RateLimitError, ServerError, + # ServiceUnavailableError, OverloadedError, timeouts). Retrying on + # :adapter_error against the same model re-runs what transport already did. + # Opt in explicitly with `retry_on :adapter_error` — only meaningful paired + # with `escalate "model_a", "model_b"` (a different model may bypass what + # transport retry could not). + DEFAULT_RETRY_ON = %i[validation_failed parse_error].freeze def initialize(models: nil, attempts: nil, retry_on: nil, &block) @configs = [] diff --git a/lib/ruby_llm/contract/version.rb b/lib/ruby_llm/contract/version.rb index d398f22..044e181 100644 --- a/lib/ruby_llm/contract/version.rb +++ b/lib/ruby_llm/contract/version.rb @@ -2,6 +2,6 @@ module RubyLLM module Contract - VERSION = "0.6.4" + VERSION = "0.7.0" end end diff --git a/spec/integration/step_end_to_end_spec.rb b/spec/integration/step_end_to_end_spec.rb index 1313150..ec0880e 100644 --- a/spec/integration/step_end_to_end_spec.rb +++ b/spec/integration/step_end_to_end_spec.rb @@ -137,7 +137,7 @@ class ClassifyIntent < RubyLLM::Contract::Step::Base it "returns :adapter_error with trace containing messages and model" do error_adapter = Class.new(RubyLLM::Contract::Adapters::Base) do def call(**) - raise StandardError, "connection refused" + raise RubyLLM::Error.new(nil, "connection refused") end end.new diff --git a/spec/ruby_llm/contract/adversarial_round10_spec.rb b/spec/ruby_llm/contract/adversarial_round10_spec.rb index 39256d2..4bd91bb 100644 --- a/spec/ruby_llm/contract/adversarial_round10_spec.rb +++ b/spec/ruby_llm/contract/adversarial_round10_spec.rb @@ -1433,9 +1433,15 @@ def to_json_schema expect(policy.retryable?(result)).to be true end - it "adapter_error is retryable by default" do + it "adapter_error is NOT retryable by default (breaking 0.7.0: requires explicit retry_on :adapter_error)" do policy = RubyLLM::Contract::Step::RetryPolicy.new(attempts: 3) result = RubyLLM::Contract::Step::Result.new(status: :adapter_error, raw_output: nil, parsed_output: nil) + expect(policy.retryable?(result)).to be false + end + + it "adapter_error IS retryable when explicitly opted in" do + policy = RubyLLM::Contract::Step::RetryPolicy.new(attempts: 3, retry_on: %i[validation_failed parse_error adapter_error]) + result = RubyLLM::Contract::Step::Result.new(status: :adapter_error, raw_output: nil, parsed_output: nil) expect(policy.retryable?(result)).to be true end diff --git a/spec/ruby_llm/contract/edge_cases_spec.rb b/spec/ruby_llm/contract/edge_cases_spec.rb index bd867a5..1dd5690 100644 --- a/spec/ruby_llm/contract/edge_cases_spec.rb +++ b/spec/ruby_llm/contract/edge_cases_spec.rb @@ -803,7 +803,7 @@ it "populates trace with messages and model even on adapter failure" do failing_adapter = Class.new(RubyLLM::Contract::Adapters::Base) do def call(messages:, **_options) # rubocop:disable Lint/UnusedMethodArgument - raise "network failure" + raise RubyLLM::Error.new(nil, "network failure") end end.new diff --git a/spec/ruby_llm/contract/step/retry_integration_spec.rb b/spec/ruby_llm/contract/step/retry_integration_spec.rb index 4640355..1ba4582 100644 --- a/spec/ruby_llm/contract/step/retry_integration_spec.rb +++ b/spec/ruby_llm/contract/step/retry_integration_spec.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require "faraday" + RSpec.describe "retry_policy integration" do before { RubyLLM::Contract.reset_configuration! } @@ -165,12 +167,12 @@ end describe "adapter_error retry" do - it "retries on :adapter_error (transient network/timeout failures)" do + it "retries on :adapter_error (transient network/timeout failures, with explicit opt-in)" do call_count = 0 flaky_adapter = Object.new flaky_adapter.define_singleton_method(:call) do |**_opts| call_count += 1 - raise StandardError, "connection timeout" if call_count < 3 + raise RubyLLM::Error.new(nil, "connection timeout") if call_count < 3 RubyLLM::Contract::Adapters::Response.new(content: '{"key": "good"}', usage: {}) end @@ -183,7 +185,10 @@ parse :json invariant("key not empty") { |o| o[:key].to_s != "" } end - retry_policy { attempts 3 } + retry_policy do + attempts 3 + retry_on :validation_failed, :parse_error, :adapter_error + end end result = step.run("test", context: { adapter: flaky_adapter }) @@ -195,7 +200,7 @@ it "returns :adapter_error after all retries exhausted" do failing_adapter = Object.new failing_adapter.define_singleton_method(:call) do |**_opts| - raise StandardError, "connection refused" + raise RubyLLM::Error.new(nil, "connection refused") end step = Class.new(RubyLLM::Contract::Step::Base) do @@ -203,7 +208,10 @@ output_type RubyLLM::Contract::Types::Hash prompt { user "{input}" } contract { parse :json } - retry_policy { attempts 3 } + retry_policy do + attempts 3 + retry_on :validation_failed, :parse_error, :adapter_error + end end result = step.run("test", context: { adapter: failing_adapter }) @@ -213,12 +221,12 @@ expect(result.trace[:attempts].map { |a| a[:status] }).to eq(%i[adapter_error adapter_error adapter_error]) end - it "escalates model on adapter_error" do + it "escalates model on adapter_error (with explicit opt-in)" do models_used = [] flaky_adapter = Object.new flaky_adapter.define_singleton_method(:call) do |**opts| models_used << opts[:model] - raise StandardError, "timeout" if opts[:model] == "nano" + raise RubyLLM::Error.new(nil, "timeout") if opts[:model] == "nano" RubyLLM::Contract::Adapters::Response.new(content: '{"key": "ok"}', usage: {}) end @@ -231,7 +239,10 @@ parse :json invariant("key not empty") { |o| o[:key].to_s != "" } end - retry_policy { escalate "nano", "mini", "full" } + retry_policy do + escalate "nano", "mini", "full" + retry_on :validation_failed, :parse_error, :adapter_error + end end result = step.run("test", context: { adapter: flaky_adapter }) @@ -465,4 +476,42 @@ expect(result.trace).not_to have_key(:attempts) end end + + # Regression: Codex review of PR #13 (0.7.0) found that narrowing + # AdapterCaller#rescue to RubyLLM::Error only would leak Faraday transport + # errors (TimeoutError, ConnectionFailed) out of Step.run — ruby_llm's Faraday + # retry re-raises these after exhaustion and they are NOT RubyLLM::Error. + # AdapterCaller must rescue Faraday::Error too, otherwise production network + # outages crash instead of becoming retryable :adapter_error results. + describe "transport error handling (Codex review regression)" do + it "converts Faraday::TimeoutError into :adapter_error instead of raising" do + adapter = Object.new + adapter.define_singleton_method(:call) { |**_opts| raise Faraday::TimeoutError, "timeout after retries" } + + step = Class.new(RubyLLM::Contract::Step::Base) { prompt "{input}" } + result = step.run("hi", context: { adapter: adapter }) + + expect(result.status).to eq(:adapter_error) + expect(result.validation_errors).to include(/timeout/) + end + + it "converts Faraday::ConnectionFailed into :adapter_error" do + adapter = Object.new + adapter.define_singleton_method(:call) { |**_opts| raise Faraday::ConnectionFailed, "connection refused" } + + step = Class.new(RubyLLM::Contract::Step::Base) { prompt "{input}" } + result = step.run("hi", context: { adapter: adapter }) + + expect(result.status).to eq(:adapter_error) + end + + it "propagates NoMethodError from adapter (programmer bug, not transport)" do + adapter = Object.new + adapter.define_singleton_method(:call) { |**_opts| nil.non_existent_method } + + step = Class.new(RubyLLM::Contract::Step::Base) { prompt "{input}" } + + expect { step.run("hi", context: { adapter: adapter }) }.to raise_error(NoMethodError) + end + end end diff --git a/spec/ruby_llm/contract/step/retry_policy_spec.rb b/spec/ruby_llm/contract/step/retry_policy_spec.rb index 36b10bb..d99dbbe 100644 --- a/spec/ruby_llm/contract/step/retry_policy_spec.rb +++ b/spec/ruby_llm/contract/step/retry_policy_spec.rb @@ -30,9 +30,9 @@ expect(policy.max_attempts).to eq(5) end - it "defaults retryable statuses" do + it "defaults retryable statuses (breaking 0.7.0: :adapter_error removed — opt in explicitly)" do policy = described_class.new - expect(policy.retryable_statuses).to eq(%i[validation_failed parse_error adapter_error]) + expect(policy.retryable_statuses).to eq(%i[validation_failed parse_error]) end it "allows custom retryable statuses" do diff --git a/spec/ruby_llm/contract/step/runner_spec.rb b/spec/ruby_llm/contract/step/runner_spec.rb index 0971f0a..1ea59c2 100644 --- a/spec/ruby_llm/contract/step/runner_spec.rb +++ b/spec/ruby_llm/contract/step/runner_spec.rb @@ -113,7 +113,7 @@ it "returns :adapter_error" do failing_adapter = Class.new(RubyLLM::Contract::Adapters::Base) do def call(messages:, **_options) # rubocop:disable Lint/UnusedMethodArgument - raise StandardError, "connection timeout" + raise RubyLLM::Error.new(nil, "connection timeout") end end.new definition = RubyLLM::Contract::Definition.new