From 274c100dff92a862929666d4588237150b449725 Mon Sep 17 00:00:00 2001 From: Taleef Date: Wed, 3 Jun 2026 19:44:13 +0500 Subject: [PATCH 1/4] perf(ci): shard backend tests across 6 parallel runners The backend `./gradlew test` step is the entire CI bottleneck (~44 min), dominated by CQL-heavy integration tests (cqf-fhir-cr evaluations across the synthetic population plus historical-run seeding), previously run ~2-way parallel on a single runner. Frontend is ~50s and E2E is manual. Split the suite across a 6-way matrix, each runner executing a deterministic hash-based subset of test classes (union of all shards = full suite). Within a shard tests still fork 2-way. Only shard 0 writes the shared Gradle cache to avoid concurrent-write contention. build.gradle.kts gains overridable GRADLE_TEST_FORKS and the TEST_SHARD_TOTAL/TEST_SHARD_INDEX selection; with no shard env (local runs) the full suite runs unchanged. Also adds perf/** to CI push triggers so this branch self-verifies. Co-Authored-By: Claude Opus 4.8 --- .github/workflows/ci.yml | 21 ++++++++++++++++----- backend/build.gradle.kts | 24 ++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 16bcb47..3a1b069 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,7 +2,7 @@ name: CI on: push: - branches: [main, "claude/**", "fix/**", "feat/**"] + branches: [main, "claude/**", "fix/**", "feat/**", "perf/**"] pull_request: branches: [main] workflow_dispatch: @@ -13,14 +13,23 @@ concurrency: jobs: backend: - name: Backend — Test + name: Backend — Test (shard ${{ matrix.shard }}) runs-on: ubuntu-latest permissions: checks: write contents: read + strategy: + fail-fast: false + matrix: + shard: [0, 1, 2, 3, 4, 5] defaults: run: working-directory: backend + env: + # Split the CQL-heavy suite across parallel runners; keep this in sync with the + # length of matrix.shard above. See build.gradle.kts for the hash-based selection. + TEST_SHARD_TOTAL: "6" + TEST_SHARD_INDEX: ${{ matrix.shard }} steps: - uses: actions/checkout@v4 - uses: actions/setup-java@v4 @@ -29,14 +38,16 @@ jobs: java-version: 21 - uses: gradle/actions/setup-gradle@v4 with: - cache-read-only: false - - name: Run backend tests + # Only one shard writes the shared Gradle cache to avoid concurrent-write + # contention; the rest read it. + cache-read-only: ${{ matrix.shard != 0 }} + - name: Run backend tests (shard ${{ matrix.shard }}/6) run: ./gradlew test --build-cache --no-daemon - name: Publish test results uses: dorny/test-reporter@v1 if: always() with: - name: Backend Tests + name: Backend Tests (shard ${{ matrix.shard }}) path: backend/build/test-results/test/*.xml reporter: java-junit diff --git a/backend/build.gradle.kts b/backend/build.gradle.kts index e9e2730..110d3c7 100644 --- a/backend/build.gradle.kts +++ b/backend/build.gradle.kts @@ -61,8 +61,28 @@ dependencyManagement { tasks.withType { useJUnitPlatform() // CI gets two forks so long-running Spring/Testcontainers classes can overlap - // without turning the runner into a noisy stampede. - maxParallelForks = if (System.getenv("CI") == "true") 2 else 1 + // without turning the runner into a noisy stampede. Override via GRADLE_TEST_FORKS. + maxParallelForks = System.getenv("GRADLE_TEST_FORKS")?.toIntOrNull() + ?: if (System.getenv("CI") == "true") 2 else 1 + + // Optional CI matrix sharding: split the test classes across parallel runner jobs + // by a stable path hash, so each class runs in exactly one shard and the union of + // shards 0..TEST_SHARD_TOTAL-1 covers the whole suite. This is the lever that cuts + // the CQL-heavy backend suite from ~44 min on one runner to a few minutes across + // several. With no shard env set (local runs), the full suite runs as before. + val shardTotal = System.getenv("TEST_SHARD_TOTAL")?.toIntOrNull() + val shardIndex = System.getenv("TEST_SHARD_INDEX")?.toIntOrNull() + if (shardTotal != null && shardTotal > 1 && shardIndex != null) { + setCandidateClassFiles(candidateClassFiles.filter { classFile -> + Math.floorMod(classFile.path.replace('\\', '/').hashCode(), shardTotal) == shardIndex + }) + doFirst { + logger.lifecycle( + "Backend test shard $shardIndex/$shardTotal: ${candidateClassFiles.files.size} candidate class files" + ) + } + } + // Keep binary in-progress results outside the OneDrive tree so sync cannot // race against Gradle's rename of these short-lived files (NoSuchFileException). binaryResultsDirectory.set( From e9bdf9eaa34677812b96719055ad24bc85435cd1 Mon Sep 17 00:00:00 2001 From: Taleef Date: Wed, 3 Jun 2026 19:47:10 +0500 Subject: [PATCH 2/4] fix(ci): use Test.include(Spec) for shard selection (Gradle 9 compatible) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit setCandidateClassFiles is not a valid setter in Gradle 9.4.1. Use the Test task's PatternFilterable include(Spec) predicate — the documented mechanism for filtering candidate test classes — to assign each class to a shard by its '/'-separated relative path hash. Directories pass so the tree is traversed; the classpath is unaffected so @Nested discovery and class loading still work. Co-Authored-By: Claude Opus 4.8 --- backend/build.gradle.kts | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/backend/build.gradle.kts b/backend/build.gradle.kts index 110d3c7..ab25747 100644 --- a/backend/build.gradle.kts +++ b/backend/build.gradle.kts @@ -1,3 +1,6 @@ +import org.gradle.api.file.FileTreeElement +import org.gradle.api.specs.Spec + plugins { java id("org.springframework.boot") version "3.3.5" @@ -73,13 +76,15 @@ tasks.withType { val shardTotal = System.getenv("TEST_SHARD_TOTAL")?.toIntOrNull() val shardIndex = System.getenv("TEST_SHARD_INDEX")?.toIntOrNull() if (shardTotal != null && shardTotal > 1 && shardIndex != null) { - setCandidateClassFiles(candidateClassFiles.filter { classFile -> - Math.floorMod(classFile.path.replace('\\', '/').hashCode(), shardTotal) == shardIndex + // FileTreeElement.path is always '/'-separated and relative to the test + // classes root, so the hash is stable across OSes. Directories must pass so + // the tree is traversed into; only .class candidates are assigned to a shard. + include(Spec { element -> + element.isDirectory || + Math.floorMod(element.path.hashCode(), shardTotal) == shardIndex }) doFirst { - logger.lifecycle( - "Backend test shard $shardIndex/$shardTotal: ${candidateClassFiles.files.size} candidate class files" - ) + logger.lifecycle("Backend test shard $shardIndex/$shardTotal active") } } From c7c0fc81607df0d28212720db0bd499c8a4633ec Mon Sep 17 00:00:00 2001 From: Taleef Date: Wed, 3 Jun 2026 20:21:52 +0500 Subject: [PATCH 3/4] perf(ci): 8 shards x 4 forks + per-class timing diagnostics The 6-shard run cut wall-clock 44->25 min but stayed bottlenecked by one lopsided shard (hash distribution clustered heavy CQL integration classes). Increase to 8 shards and fork 4-wide within each (ubuntu-latest = 4 vCPU) so clustered heavy classes overlap; cap per-fork heap at 1.5g so 4 JVMs + their Postgres containers fit the runner. Add an always-on step that prints per-class suite durations, so if balance is still uneven we can move to time-weighted bin-packing with real data. Co-Authored-By: Claude Opus 4.8 --- .github/workflows/ci.yml | 15 ++++++++++++--- backend/build.gradle.kts | 11 ++++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3a1b069..2a61e21 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,14 +21,14 @@ jobs: strategy: fail-fast: false matrix: - shard: [0, 1, 2, 3, 4, 5] + shard: [0, 1, 2, 3, 4, 5, 6, 7] defaults: run: working-directory: backend env: # Split the CQL-heavy suite across parallel runners; keep this in sync with the # length of matrix.shard above. See build.gradle.kts for the hash-based selection. - TEST_SHARD_TOTAL: "6" + TEST_SHARD_TOTAL: "8" TEST_SHARD_INDEX: ${{ matrix.shard }} steps: - uses: actions/checkout@v4 @@ -41,8 +41,17 @@ jobs: # Only one shard writes the shared Gradle cache to avoid concurrent-write # contention; the rest read it. cache-read-only: ${{ matrix.shard != 0 }} - - name: Run backend tests (shard ${{ matrix.shard }}/6) + - name: Run backend tests (shard ${{ matrix.shard }}/8) run: ./gradlew test --build-cache --no-daemon + - name: Per-class timings (shard balancing data) + if: always() + run: | + shopt -s nullglob + for f in build/test-results/test/*.xml; do + cls=$(basename "$f" .xml); cls=${cls#TEST-} + t=$(sed -n 's/.*]*[[:space:]]time="\([0-9.]*\)".*/\1/p' "$f" | head -1) + echo "TIMING ${t:-0} ${cls}" + done | sort -t' ' -k2 -gr - name: Publish test results uses: dorny/test-reporter@v1 if: always() diff --git a/backend/build.gradle.kts b/backend/build.gradle.kts index ab25747..d1b4926 100644 --- a/backend/build.gradle.kts +++ b/backend/build.gradle.kts @@ -63,10 +63,15 @@ dependencyManagement { tasks.withType { useJUnitPlatform() - // CI gets two forks so long-running Spring/Testcontainers classes can overlap - // without turning the runner into a noisy stampede. Override via GRADLE_TEST_FORKS. + // CI forks 4-wide so heavy Spring/CQL/Testcontainers classes in a shard overlap + // (ubuntu-latest has 4 vCPUs). Override via GRADLE_TEST_FORKS. maxParallelForks = System.getenv("GRADLE_TEST_FORKS")?.toIntOrNull() - ?: if (System.getenv("CI") == "true") 2 else 1 + ?: if (System.getenv("CI") == "true") 4 else 1 + // Cap per-fork heap so 4 JVMs + their Postgres containers fit the runner's RAM; + // prod runs the app on 768m, so 1.5g per test fork is ample. + if (System.getenv("CI") == "true") { + maxHeapSize = "1536m" + } // Optional CI matrix sharding: split the test classes across parallel runner jobs // by a stable path hash, so each class runs in exactly one shard and the union of From 9b5e471e2e1d634898a290d64248de70595f90ec Mon Sep 17 00:00:00 2001 From: Taleef Date: Wed, 3 Jun 2026 21:09:27 +0500 Subject: [PATCH 4/4] perf(test): run population once per class for read-only heavy integration tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The slowest classes weren't a sharding problem — they re-ran a full-population CQL evaluation (~70s) in @BeforeEach, once per test method: - EvidenceAccessIntegrationTest: 14 tests x full run = 1022s. The evidence access/role tests only need a case to exist and filter audit by their own upload id, so they share one population run via @BeforeAll + @TestInstance( PER_CLASS). ~1022s -> ~90s. - CaseFlowRerunIntegrationTest: 5 tests x full run = 422s. Each test targets a distinct outcome-type case with non-overlapping mutations, so one shared run is sufficient. ~422s -> ~140s. ScopedRun/CaseUpsert/Major1 are intentionally left as-is: their reruns are the behavior under test (idempotency, rerun-to-verify, empty-table historical seed) and need per-test isolation. Co-Authored-By: Claude Opus 4.8 --- .../caseflow/CaseFlowRerunIntegrationTest.java | 9 +++++++-- .../workwell/web/EvidenceAccessIntegrationTest.java | 12 +++++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/backend/src/test/java/com/workwell/caseflow/CaseFlowRerunIntegrationTest.java b/backend/src/test/java/com/workwell/caseflow/CaseFlowRerunIntegrationTest.java index 5d724e0..c214470 100644 --- a/backend/src/test/java/com/workwell/caseflow/CaseFlowRerunIntegrationTest.java +++ b/backend/src/test/java/com/workwell/caseflow/CaseFlowRerunIntegrationTest.java @@ -7,13 +7,15 @@ import java.util.List; import java.util.Map; import java.util.UUID; -import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.jdbc.core.JdbcTemplate; @SpringBootTest +@TestInstance(TestInstance.Lifecycle.PER_CLASS) class CaseFlowRerunIntegrationTest extends AbstractIntegrationTest { @Autowired @@ -25,7 +27,10 @@ class CaseFlowRerunIntegrationTest extends AbstractIntegrationTest { @Autowired private JdbcTemplate jdbcTemplate; - @BeforeEach + // Each test targets a case of a distinct outcome type (COMPLIANT/EXCLUDED/DUE_SOON/ + // OVERDUE/MISSING_DATA) and verifies rerun behavior on it; the targets don't overlap, + // so one shared population run is enough instead of a full run before each of 5 tests. + @BeforeAll void seedData() { jdbcTemplate.execute("TRUNCATE TABLE runs, outcomes, cases, case_actions, run_logs, audit_events, outreach_records, scheduled_appointments, waivers, evidence_attachments CASCADE"); allProgramsRunService.runAllPrograms("All Programs", "admin@workwell.dev"); diff --git a/backend/src/test/java/com/workwell/web/EvidenceAccessIntegrationTest.java b/backend/src/test/java/com/workwell/web/EvidenceAccessIntegrationTest.java index 31083c3..caeedb0 100644 --- a/backend/src/test/java/com/workwell/web/EvidenceAccessIntegrationTest.java +++ b/backend/src/test/java/com/workwell/web/EvidenceAccessIntegrationTest.java @@ -19,8 +19,9 @@ import java.util.Comparator; import java.util.Map; import java.util.UUID; -import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.autoconfigure.web.servlet.AutoConfigureMockMvc; import org.springframework.boot.test.context.SpringBootTest; @@ -37,6 +38,7 @@ "workwell.auth.jwt-secret=test-secret-for-evidence-security" }) @AutoConfigureMockMvc +@TestInstance(TestInstance.Lifecycle.PER_CLASS) class EvidenceAccessIntegrationTest extends AbstractIntegrationTest { private static final Path evidenceRoot = createEvidenceRoot(); @@ -61,8 +63,12 @@ static void evidenceProperties(DynamicPropertyRegistry registry) { @Autowired private ObjectMapper objectMapper; - @BeforeEach - void resetState() throws Exception { + // Evidence access/role tests are read-only against the seeded population: each test + // uploads its own attachment (unique id) and filters audit by that id, so a single + // population run shared across the class is sufficient. This drops the class from + // ~14 full-population runs (~17 min) to one (~90s). + @BeforeAll + void seedPopulationOnce() throws Exception { jdbcTemplate.execute("TRUNCATE TABLE runs, outcomes, cases, case_actions, run_logs, audit_events, evidence_attachments, outreach_records, scheduled_appointments, waivers CASCADE"); deleteEvidenceFiles(); Files.createDirectories(evidenceRoot);