From 393148d0848ed3d87bc3cae255bc484059e3549f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Kutryj?= Date: Fri, 29 May 2026 10:05:03 +0200 Subject: [PATCH 1/3] feat(zebra): gate resilient git checkout behind git_clone_slow_retry feature Injects SEMAPHORE_GIT_CLONE_SLOW_RETRY=true into the job environment when the :git_clone_slow_retry feature is enabled for the organization. This is the producer side of the toolbox checkout-resiliency work (semaphoreci/toolbox#538, semaphoreci/toolbox#539): the toolbox `checkout` reads this env var to opt into slow-clone detection and resilient retry, and is a no-op when the var is absent. Mirrors the existing TestResults / :test_results_no_trim pattern: a small feature-gated module appended to the job env var list in JobRequestFactory, rather than threading org_id through Repository.* (which would churn the exact-match repository tests). Only the on/off switch is injected; the toolbox keeps sensible defaults for the tuning knobs. The feature still needs registering in the feature management backend before it can be toggled per-org in production. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../lib/zebra/workers/job_request_factory.ex | 2 ++ .../job_request_factory/git_checkout.ex | 30 +++++++++++++++++++ zebra/test/support/stubbed_provider.ex | 6 ++++ .../job_request_factory/git_checkout_test.exs | 24 +++++++++++++++ 4 files changed, 62 insertions(+) create mode 100644 zebra/lib/zebra/workers/job_request_factory/git_checkout.ex create mode 100644 zebra/test/zebra/workers/job_request_factory/git_checkout_test.exs diff --git a/zebra/lib/zebra/workers/job_request_factory.ex b/zebra/lib/zebra/workers/job_request_factory.ex index cb74a62ae..dd81d98f5 100644 --- a/zebra/lib/zebra/workers/job_request_factory.ex +++ b/zebra/lib/zebra/workers/job_request_factory.ex @@ -7,6 +7,7 @@ defmodule Zebra.Workers.JobRequestFactory do Artifacthub, Cache, CallbackToken, + GitCheckout, JobRequest, Loghub2, Machine, @@ -102,6 +103,7 @@ defmodule Zebra.Workers.JobRequestFactory do cache_env_vars ++ ToolboxInstall.env_vars(job) ++ TestResults.env_vars(org_id) ++ + GitCheckout.env_vars(org_id) ++ open_id_token_env_vars ++ repo_env_vars ++ Enum.flat_map(all_secrets.job_secrets, & &1.env_vars) ++ diff --git a/zebra/lib/zebra/workers/job_request_factory/git_checkout.ex b/zebra/lib/zebra/workers/job_request_factory/git_checkout.ex new file mode 100644 index 000000000..cd47e555c --- /dev/null +++ b/zebra/lib/zebra/workers/job_request_factory/git_checkout.ex @@ -0,0 +1,30 @@ +defmodule Zebra.Workers.JobRequestFactory.GitCheckout do + @moduledoc """ + Handles resilient git checkout configuration for jobs. + + When the :git_clone_slow_retry feature is enabled for an organization, + this module adds the SEMAPHORE_GIT_CLONE_SLOW_RETRY environment variable, + which opts the toolbox `checkout` into slow-clone detection and resilient + retry (speed monitoring, retries, and alternative-endpoint fallback). + + The toolbox keeps sensible defaults for the tuning knobs + (threshold/timeout/grace/retries), so only the on/off switch is injected + here; the feature is a no-op in the toolbox when this var is absent. + """ + + alias Zebra.Workers.JobRequestFactory.JobRequest + + @doc """ + Returns environment variables for resilient git checkout. + + If the :git_clone_slow_retry feature is enabled for the organization, + adds SEMAPHORE_GIT_CLONE_SLOW_RETRY=true to enable it. + """ + def env_vars(org_id) do + if FeatureProvider.feature_enabled?(:git_clone_slow_retry, param: org_id) do + [JobRequest.env_var("SEMAPHORE_GIT_CLONE_SLOW_RETRY", "true")] + else + [] + end + end +end diff --git a/zebra/test/support/stubbed_provider.ex b/zebra/test/support/stubbed_provider.ex index b9d4c94b8..2fbacfba4 100644 --- a/zebra/test/support/stubbed_provider.ex +++ b/zebra/test/support/stubbed_provider.ex @@ -6,6 +6,7 @@ defmodule Support.StubbedProvider do @exclude_from_brownouts_org_id "org-exclude-from-brownouts-enabled" @test_results_no_trim_org_id "org-test-results-no-trim-enabled" + @git_clone_slow_retry_org_id "org-git-clone-slow-retry-enabled" @impl FeatureProvider.Provider def provide_features(org_id \\ nil, _opts \\ []) do @@ -18,11 +19,13 @@ defmodule Support.StubbedProvider do feature("e1_to_f1_migration", e1_to_f1_traits(org_id)), feature("e2_to_f1_migration", e2_to_f1_traits(org_id)), feature("test_results_no_trim", test_results_no_trim_traits(org_id)), + feature("git_clone_slow_retry", git_clone_slow_retry_traits(org_id)), feature("exclude_from_brownouts", exclude_from_brownouts_traits(org_id)) ]} end def test_results_no_trim_org_id, do: @test_results_no_trim_org_id + def git_clone_slow_retry_org_id, do: @git_clone_slow_retry_org_id def exclude_from_brownouts_org_id, do: @exclude_from_brownouts_org_id def e1_to_f1_org_id, do: @e1_to_f1_org_id @@ -57,6 +60,9 @@ defmodule Support.StubbedProvider do defp test_results_no_trim_traits(@test_results_no_trim_org_id), do: [:enabled] defp test_results_no_trim_traits(_org_id), do: [:hidden] + defp git_clone_slow_retry_traits(@git_clone_slow_retry_org_id), do: [:enabled] + defp git_clone_slow_retry_traits(_org_id), do: [:hidden] + defp exclude_from_brownouts_traits(@exclude_from_brownouts_org_id), do: [:enabled] defp exclude_from_brownouts_traits(_org_id), do: [:hidden] diff --git a/zebra/test/zebra/workers/job_request_factory/git_checkout_test.exs b/zebra/test/zebra/workers/job_request_factory/git_checkout_test.exs new file mode 100644 index 000000000..034701fc3 --- /dev/null +++ b/zebra/test/zebra/workers/job_request_factory/git_checkout_test.exs @@ -0,0 +1,24 @@ +defmodule Zebra.Workers.JobRequestFactory.GitCheckoutTest do + use Zebra.DataCase + + alias Zebra.Workers.JobRequestFactory.GitCheckout + + describe "env_vars/1" do + test "returns empty list when feature is disabled" do + org_id = Ecto.UUID.generate() + + assert GitCheckout.env_vars(org_id) == [] + end + + test "returns SEMAPHORE_GIT_CLONE_SLOW_RETRY env var when feature is enabled" do + org_id = Support.StubbedProvider.git_clone_slow_retry_org_id() + + env_vars = GitCheckout.env_vars(org_id) + + assert length(env_vars) == 1 + [env_var] = env_vars + assert env_var["name"] == "SEMAPHORE_GIT_CLONE_SLOW_RETRY" + assert env_var["value"] == Base.encode64("true") + end + end +end From fee4acd857af6021f5146e7101a1734df724f890 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Kutryj?= Date: Fri, 29 May 2026 10:13:11 +0200 Subject: [PATCH 2/3] test(zebra): bump stubbed feature count after adding git_clone_slow_retry feature_provider_invalidator_worker_test asserts the length of the full StubbedProvider feature list; adding :git_clone_slow_retry took it from 8 to 9. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../zebra/workers/feature_provider_invalidator_worker_test.exs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zebra/test/zebra/workers/feature_provider_invalidator_worker_test.exs b/zebra/test/zebra/workers/feature_provider_invalidator_worker_test.exs index ac7621f0e..2a527fafa 100644 --- a/zebra/test/zebra/workers/feature_provider_invalidator_worker_test.exs +++ b/zebra/test/zebra/workers/feature_provider_invalidator_worker_test.exs @@ -99,7 +99,7 @@ defmodule Zebra.Workers.FeatureProviderInvalidatorWorkerTest do Worker.features_changed(callback_message) {:ok, features} = FeatureProvider.list_features() - assert length(features) == 8 + assert length(features) == 9 end test "when the organization feature state changes, organization feature caches are invalidated" do From 2167e993319256c76473550d59c154be37bf87be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Kutryj?= Date: Fri, 29 May 2026 11:01:24 +0200 Subject: [PATCH 3/3] feat(zebra): only inject git_clone_slow_retry on cloud agents Per review: the resilient checkout (GeoDNS alternative-endpoint fallback, DoH lookups) targets GitHub.com reachability from Semaphore's cloud egress. On self-hosted agents the network is the customer's own and the DoH endpoint may be blocked, so gate the injection on cloud agents (not Job.self_hosted?/1) in addition to the feature flag. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../lib/zebra/workers/job_request_factory.ex | 2 +- .../job_request_factory/git_checkout.ex | 20 +++++++++++++++---- .../job_request_factory/git_checkout_test.exs | 18 +++++++++++++---- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/zebra/lib/zebra/workers/job_request_factory.ex b/zebra/lib/zebra/workers/job_request_factory.ex index dd81d98f5..0b28552a7 100644 --- a/zebra/lib/zebra/workers/job_request_factory.ex +++ b/zebra/lib/zebra/workers/job_request_factory.ex @@ -103,7 +103,7 @@ defmodule Zebra.Workers.JobRequestFactory do cache_env_vars ++ ToolboxInstall.env_vars(job) ++ TestResults.env_vars(org_id) ++ - GitCheckout.env_vars(org_id) ++ + GitCheckout.env_vars(job, org_id) ++ open_id_token_env_vars ++ repo_env_vars ++ Enum.flat_map(all_secrets.job_secrets, & &1.env_vars) ++ diff --git a/zebra/lib/zebra/workers/job_request_factory/git_checkout.ex b/zebra/lib/zebra/workers/job_request_factory/git_checkout.ex index cd47e555c..cf63680d8 100644 --- a/zebra/lib/zebra/workers/job_request_factory/git_checkout.ex +++ b/zebra/lib/zebra/workers/job_request_factory/git_checkout.ex @@ -7,24 +7,36 @@ defmodule Zebra.Workers.JobRequestFactory.GitCheckout do which opts the toolbox `checkout` into slow-clone detection and resilient retry (speed monitoring, retries, and alternative-endpoint fallback). + Only injected on cloud agents. The resilient behaviour (GeoDNS-based + alternative-endpoint fallback, DoH lookups) targets GitHub.com reachability + from Semaphore's cloud egress; on self-hosted agents the network is the + customer's own, so injecting it there is inappropriate (and the DoH + endpoint may well be blocked). + The toolbox keeps sensible defaults for the tuning knobs (threshold/timeout/grace/retries), so only the on/off switch is injected here; the feature is a no-op in the toolbox when this var is absent. """ + alias Zebra.Models.Job alias Zebra.Workers.JobRequestFactory.JobRequest @doc """ Returns environment variables for resilient git checkout. - If the :git_clone_slow_retry feature is enabled for the organization, - adds SEMAPHORE_GIT_CLONE_SLOW_RETRY=true to enable it. + Adds SEMAPHORE_GIT_CLONE_SLOW_RETRY=true when the job runs on a cloud agent + and the :git_clone_slow_retry feature is enabled for the organization. """ - def env_vars(org_id) do - if FeatureProvider.feature_enabled?(:git_clone_slow_retry, param: org_id) do + def env_vars(job, org_id) do + if inject?(job, org_id) do [JobRequest.env_var("SEMAPHORE_GIT_CLONE_SLOW_RETRY", "true")] else [] end end + + defp inject?(job, org_id) do + not Job.self_hosted?(job.machine_type) and + FeatureProvider.feature_enabled?(:git_clone_slow_retry, param: org_id) + end end diff --git a/zebra/test/zebra/workers/job_request_factory/git_checkout_test.exs b/zebra/test/zebra/workers/job_request_factory/git_checkout_test.exs index 034701fc3..75e48a348 100644 --- a/zebra/test/zebra/workers/job_request_factory/git_checkout_test.exs +++ b/zebra/test/zebra/workers/job_request_factory/git_checkout_test.exs @@ -1,24 +1,34 @@ defmodule Zebra.Workers.JobRequestFactory.GitCheckoutTest do use Zebra.DataCase + alias Zebra.Models.Job alias Zebra.Workers.JobRequestFactory.GitCheckout - describe "env_vars/1" do + @cloud_job %Job{machine_type: "e1-standard-2"} + @self_hosted_job %Job{machine_type: "s1-local"} + + describe "env_vars/2" do test "returns empty list when feature is disabled" do org_id = Ecto.UUID.generate() - assert GitCheckout.env_vars(org_id) == [] + assert GitCheckout.env_vars(@cloud_job, org_id) == [] end - test "returns SEMAPHORE_GIT_CLONE_SLOW_RETRY env var when feature is enabled" do + test "returns SEMAPHORE_GIT_CLONE_SLOW_RETRY env var on a cloud agent when feature is enabled" do org_id = Support.StubbedProvider.git_clone_slow_retry_org_id() - env_vars = GitCheckout.env_vars(org_id) + env_vars = GitCheckout.env_vars(@cloud_job, org_id) assert length(env_vars) == 1 [env_var] = env_vars assert env_var["name"] == "SEMAPHORE_GIT_CLONE_SLOW_RETRY" assert env_var["value"] == Base.encode64("true") end + + test "returns empty list on a self-hosted agent even when feature is enabled" do + org_id = Support.StubbedProvider.git_clone_slow_retry_org_id() + + assert GitCheckout.env_vars(@self_hosted_job, org_id) == [] + end end end