From 921e3a229c34eadd98e2327545c5825f9c5d5cfb Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Thu, 2 Jul 2026 11:24:53 -0400 Subject: [PATCH 1/2] Trigger CI baseline (no-op) for per-task sharding A/B Empty commit so this branch runs the current per-project test-slot sharding through CI as a baseline, before the per-task sharding change is committed on top for comparison. No files changed. Co-Authored-By: Claude Opus 4.8 (1M context) From 3394c283aff5503eebe27a3c5945dbdbafd5e850 Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Thu, 2 Jul 2026 11:28:17 -0400 Subject: [PATCH 2/2] Shard CI test slots per-task instead of per-project Rework -Pslot=X/Y sharding so a module's test variants (e.g. jdbc's test/forkedTest/oldH2Test/oldPostgresTest) hash to independent slots instead of serializing in a single job. Parse the slot selection once and cache it on the root project; keep Project.isInSelectedSlot (used by runMuzzle) at project granularity and add a task-level gate for Test tasks. The *Check aggregate and all coverage builds stay whole-module, project-slotted so per-module JaCoCo sees complete execution data. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../gradle/plugin/ci/CIJobsExtensions.kt | 225 +++++++++++------- .../kotlin/dd-trace-java.ci-jobs.gradle.kts | 10 +- 2 files changed, 150 insertions(+), 85 deletions(-) diff --git a/buildSrc/src/main/kotlin/datadog/gradle/plugin/ci/CIJobsExtensions.kt b/buildSrc/src/main/kotlin/datadog/gradle/plugin/ci/CIJobsExtensions.kt index d6269176a0a..76779c7890b 100644 --- a/buildSrc/src/main/kotlin/datadog/gradle/plugin/ci/CIJobsExtensions.kt +++ b/buildSrc/src/main/kotlin/datadog/gradle/plugin/ci/CIJobsExtensions.kt @@ -2,60 +2,95 @@ package datadog.gradle.plugin.ci import org.gradle.api.Project import org.gradle.api.Task -import org.gradle.api.provider.Provider +import org.gradle.api.tasks.testing.Test import org.gradle.kotlin.dsl.extra import kotlin.math.abs +/** Parsed `-Pslot=X/Y` selection: 1-based [selected] slot out of [total] (> 0). */ +private data class SlotSelection(val selected: Int, val total: Int) { + /** Whether [key] falls into this slot. Bucket = `abs(key.hashCode() % total) + 1`, matching Java's truncating `%`. */ + fun selects(key: String): Boolean = abs(key.hashCode() % total) + 1 == selected +} + +/** Boxes a parsed selection (possibly null) so "absent" can be cached distinctly from "not computed yet". */ +private class SlotHolder(val selection: SlotSelection?) + +private const val SLOT_HOLDER_KEY = "datadog.ci.slotSelection" +private const val FORCE_COVERAGE_PREFIXES_KEY = "datadog.ci.forceCoveragePrefixes" + /** - * Determines if the current project is in the selected slot. - * - * The "slot" property should be provided in the format "X/Y", where X is the selected slot (1-based) - * and Y is the total number of slots. - * - * If the "slot" property is not provided, all projects are considered to be in the selected slot. + * The `-Pslot=X/Y` selection, parsed once per build and cached on the root project, or null when the + * property is absent, empty, or malformed — meaning no slot filtering (everything is selected). */ -val Project.isInSelectedSlot: Provider - get() = rootProject.providers.gradleProperty("slot").map { slot -> - val parts = slot.split("/") - if (parts.size != 2) { - project.logger.warn("Invalid slot format '{}', expected 'X/Y'. Treating all projects as selected.", slot) - return@map true - } +private val Project.slotSelection: SlotSelection? + get() { + val root = rootProject + (root.extra.properties[SLOT_HOLDER_KEY] as? SlotHolder)?.let { return it.selection } + val parsed = parseSlotSelection(root.providers.gradleProperty("slot").orNull, root) + root.extra.set(SLOT_HOLDER_KEY, SlotHolder(parsed)) + return parsed + } - // When CI_NODE_INDEX or CI_NODE_TOTAL is unset in non-parallel jobs, one part may be empty (e.g. slot="/1") — treat as no filtering - if (parts[0].isBlank() || parts[1].isBlank()) { - project.logger.info("Incomplete slot value '{}', CI_NODE_INDEX or CI_NODE_TOTAL not set. Treating all projects as selected.", slot) - return@map true - } +private fun parseSlotSelection(raw: String?, root: Project): SlotSelection? { + if (raw == null) return null + val parts = raw.split("/") + if (parts.size != 2) { + root.logger.warn("Invalid slot format '{}', expected 'X/Y'. Treating all as selected.", raw) + return null + } + // When CI_NODE_INDEX or CI_NODE_TOTAL is unset in non-parallel jobs, one part may be empty + // (e.g. slot="/1") — treat as no filtering. + if (parts[0].isBlank() || parts[1].isBlank()) return null + val selected = parts[0].toIntOrNull() + val total = parts[1].toIntOrNull() + if (selected == null || total == null || total <= 0) { + root.logger.warn("Invalid slot values '{}', expected numeric 'X/Y' with Y > 0. Treating all as selected.", raw) + return null + } + return SlotSelection(selected, total) +} - val selectedSlot = parts[0].toIntOrNull() - val totalSlots = parts[1].toIntOrNull() +/** + * Module path prefixes registered (via [testAggregate] with `forceCoverage = true`) that always + * collect coverage. Their test tasks must stay whole-project-slotted even without `-PcheckCoverage`. + */ +@Suppress("UNCHECKED_CAST") +private val Project.forceCoveragePrefixes: List + get() = (rootProject.extra.properties[FORCE_COVERAGE_PREFIXES_KEY] as? List) ?: emptyList() - if (selectedSlot == null || totalSlots == null || totalSlots <= 0) { - project.logger.warn("Invalid slot values '{}', expected numeric 'X/Y' with Y > 0. Treating all projects as selected.", slot) - return@map true - } +/** + * Whether this module collects coverage — via `-PcheckCoverage` or a forceCoverage aggregate. When + * true the module must run whole (project-level slotting) so per-module JaCoCo sees complete + * execution data. [createRootTask] uses the same notion so the two stay consistent. + */ +private val Project.coverageEnabled: Boolean + get() = rootProject.providers.gradleProperty("checkCoverage").isPresent || + forceCoveragePrefixes.any { path.startsWith(it) } - // Distribution numbers when running on rootProject.allprojects indicates - // bucket sizes are reasonably balanced: - // - // * size 4 distribution: {2=146, 0=143, 1=157, 3=145} - // * size 6 distribution: {4=100, 0=92, 3=97, 2=97, 1=108, 5=97} - // * size 8 distribution: {2=62, 4=72, 0=71, 5=70, 7=78, 6=84, 1=87, 3=67} - // * size 10 distribution: {8=62, 0=65, 5=70, 9=59, 3=54, 1=56, 6=63, 4=47, 2=52, 7=63} - // * size 12 distribution: {10=55, 0=47, 4=45, 9=46, 8=51, 3=51, 2=46, 1=59, 5=52, 7=49, 11=45, 6=45} - val projectSlot = abs(project.path.hashCode() % totalSlots) + 1 // Convert to 1-based - - project.logger.info( - "Project {} assigned to slot {}/{}, active slot is {}", - project.path, - projectSlot, - totalSlots, - selectedSlot, - ) - - projectSlot == selectedSlot - }.orElse(true) +/** + * Whether the current project is in the selected slot, at coarse one-task-per-project granularity. + * Used by whole-project aggregates such as `runMuzzle`. + */ +val Project.isInSelectedSlot: Boolean + get() = slotSelection?.selects(path) ?: true + +/** + * Whether this Test task is in the selected slot. + * + * Sharding at the *task* level lets a module's test variants — e.g. jdbc's + * `test`/`forkedTest`/`oldH2Test`/`oldPostgresTest` — spread across different CI slots instead of + * serializing in one job. The key is `":"`. + * + * Exception: when the module collects coverage (see [coverageEnabled]) all its test tasks must stay + * in one slot so per-module JaCoCo sees complete execution data, so we fall back to the project-level + * key. This MUST match the coverage decision in [createRootTask]. + */ +val Task.isInSelectedSlot: Boolean + get() { + val slot = project.slotSelection ?: return true + val key = if (project.coverageEnabled) project.path else "${project.path}:$name" + return slot.selects(key) + } /** * Returns the task's path, given affected projects, if this task or its dependencies are affected by git changes. @@ -87,54 +122,64 @@ internal fun findAffectedTaskPath(baseTask: Task, affectedProjects: Map, excludePrefixes: List, - forceCoverage: Boolean + forceCoverage: Boolean, + testTaskFilter: ((Test) -> Boolean)? ) { val coverage = forceCoverage || rootProject.providers.gradleProperty("checkCoverage").isPresent + val perTaskShardable = !coverage && testTaskFilter != null + val slot = slotSelection tasks.register(rootTaskName) { - subprojects.forEach { subproject -> + subprojects.forEach forEachSub@{ subproject -> if ( - subproject.isInSelectedSlot.get() && - includePrefixes.any { subproject.path.startsWith(it) } && - !excludePrefixes.any { subproject.path.startsWith(it) } + !includePrefixes.any { subproject.path.startsWith(it) } || + excludePrefixes.any { subproject.path.startsWith(it) } ) { - val testTask = subproject.tasks.findByName(subProjTaskName) - var isAffected = true - - if (testTask != null) { - val useGitChanges = rootProject.extra.get("useGitChanges") as Boolean - if (useGitChanges) { - @Suppress("UNCHECKED_CAST") - val affectedProjects = rootProject.extra.get("affectedProjects") as Map> - val affectedTaskPath = findAffectedTaskPath(testTask, affectedProjects) - if (affectedTaskPath != null) { - logger.warn("Selecting ${subproject.path}:$subProjTaskName (affected by $affectedTaskPath)") - } else { - logger.warn("Skipping ${subproject.path}:$subProjTaskName (not affected by changed files)") - isAffected = false - } - } - if (isAffected) { - dependsOn(testTask) - } + return@forEachSub + } + + val subProjTask = subproject.tasks.findByName(subProjTaskName) + + // Git-change filtering, keyed off the umbrella task at module granularity (unchanged behavior). + if (subProjTask != null && rootProject.extra.get("useGitChanges") as Boolean) { + @Suppress("UNCHECKED_CAST") + val affectedProjects = rootProject.extra.get("affectedProjects") as Map> + val affectedTaskPath = findAffectedTaskPath(subProjTask, affectedProjects) + if (affectedTaskPath == null) { + logger.warn("Skipping ${subproject.path}:$subProjTaskName (not affected by changed files)") + return@forEachSub } + logger.warn("Selecting ${subproject.path}:$subProjTaskName (affected by $affectedTaskPath)") + } - if (isAffected && coverage) { - val coverageTask = subproject.tasks.findByName("jacocoTestReport") - if (coverageTask != null) { - dependsOn(coverageTask) - } - val verificationTask = subproject.tasks.findByName("jacocoTestCoverageVerification") - if (verificationTask != null) { - dependsOn(verificationTask) + if (perTaskShardable) { + // Depend only on the in-slot Test tasks the umbrella would run; leave the rest to other slots. + subproject.tasks.withType(Test::class.java).matching { testTaskFilter!!(it) }.forEach { testTask -> + if (slot == null || slot.selects("${subproject.path}:${testTask.name}")) { + dependsOn(testTask) } } + } else if (subProjTask != null && (slot == null || slot.selects(subproject.path))) { + dependsOn(subProjTask) + if (coverage) { + subproject.tasks.findByName("jacocoTestReport")?.let { dependsOn(it) } + subproject.tasks.findByName("jacocoTestCoverageVerification")?.let { dependsOn(it) } + } } } } @@ -154,7 +199,25 @@ fun Project.testAggregate( excludePrefixes: List = emptyList(), forceCoverage: Boolean = false ) { - createRootTask("${baseTaskName}Test", "allTests", includePrefixes, excludePrefixes, forceCoverage) - createRootTask("${baseTaskName}LatestDepTest", "allLatestDepTests", includePrefixes, excludePrefixes, forceCoverage) - createRootTask("${baseTaskName}Check", "check", includePrefixes, excludePrefixes, forceCoverage) + if (forceCoverage) { + registerForceCoveragePrefixes(includePrefixes) + } + // The two Test umbrellas mirror the membership filters in dd-trace-java.configure-tests.gradle.kts. + createRootTask("${baseTaskName}Test", "allTests", includePrefixes, excludePrefixes, forceCoverage) { + !it.name.contains("latest", ignoreCase = true) && it.name != "traceAgentTest" + } + createRootTask("${baseTaskName}LatestDepTest", "allLatestDepTests", includePrefixes, excludePrefixes, forceCoverage) { + it.name.contains("latest", ignoreCase = true) + } + createRootTask("${baseTaskName}Check", "check", includePrefixes, excludePrefixes, forceCoverage, testTaskFilter = null) +} + +@Suppress("UNCHECKED_CAST") +private fun Project.registerForceCoveragePrefixes(prefixes: List) { + val existing = rootProject.extra.properties[FORCE_COVERAGE_PREFIXES_KEY] as? MutableList + if (existing != null) { + existing.addAll(prefixes) + } else { + rootProject.extra.set(FORCE_COVERAGE_PREFIXES_KEY, prefixes.toMutableList()) + } } diff --git a/buildSrc/src/main/kotlin/dd-trace-java.ci-jobs.gradle.kts b/buildSrc/src/main/kotlin/dd-trace-java.ci-jobs.gradle.kts index 4868f4f769b..2849b373fe6 100644 --- a/buildSrc/src/main/kotlin/dd-trace-java.ci-jobs.gradle.kts +++ b/buildSrc/src/main/kotlin/dd-trace-java.ci-jobs.gradle.kts @@ -15,10 +15,12 @@ if (project != rootProject) { } allprojects { - // Enable tests only on the selected slot (if -Pslot=n/t is provided) + // Enable tests only on the selected slot (if -Pslot=n/t is provided). + // Sharding is per-task: a module's test variants (e.g. jdbc's test/forkedTest/oldH2Test/ + // oldPostgresTest) hash to independent slots instead of serializing within one job. tasks.withType().configureEach { - onlyIf("Project is in selected slot") { - project.isInSelectedSlot.get() + onlyIf("Task is in selected slot") { + isInSelectedSlot } } } @@ -124,7 +126,7 @@ if (gitBaseRefProvider.isPresent) { tasks.register("runMuzzle") { val muzzleSubprojects = subprojects.filter { p -> - p.isInSelectedSlot.get() + p.isInSelectedSlot && p.plugins.hasPlugin("java") && p.plugins.hasPlugin("dd-trace-java.muzzle") }