From b87ac2b4b8f6a404d3bc42fdf1b7dcb65fa178cb Mon Sep 17 00:00:00 2001 From: Jim Huang Date: Mon, 11 May 2026 22:48:31 +0800 Subject: [PATCH] Stabilize SMP priority_order selftest The SMP build-and-test job hung on selftest 32 (priority_order) past the 120s watcher budget and was killed. - The make watcher's CHECK_*_TIMEOUT used := so the CI workflow's CHECK_SELFTEST_TIMEOUT=240 env override never applied; the effective budget stayed at the 120s default. Switch to ?= so the override propagates. - test_priority_order waited for the three pinned worker tasks via a single sleep_ms(50). That wake path arms one callout and depends on a same-priority same-hart re-enqueue to drag the idle thread out of wfi: which is fragile under QEMU TCG SMP timing and has been observed to stall the test indefinitely. Replace the blind sleep with an atomic done-counter polled via sleep_ms(0) yields and a 2s wall-clock deadline, so the test makes progress on its own and fails cleanly if the workers never run. - dl_replenish_cb only re-enqueued the task when it found it in TD_STATE_DL_THROTTLED, missing the case where the task is already sitting in pcpu_dl_runq[cpu] in TD_STATE_READY (sched_dl_pick_next was skipping it because dl_throttled was set). Without that poke the owning hart can stay parked in wfi after the throttle flag clears. Set need_resched on the task's CPU (and cross-hart IPI) when we observe READY at replenish time. --- Makefile | 4 ++-- kernel/sched/deadline.c | 21 ++++++++++++++++++++- tests/tests-sched.c | 32 +++++++++++++++++++------------- 3 files changed, 41 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 5fcabc5..74d7a3f 100644 --- a/Makefile +++ b/Makefile @@ -286,8 +286,8 @@ run: $(CHECK_DEPS) CHECK_PID := $(BUILD_DIR)/check_qemu.pid CHECK_LOG := $(BUILD_DIR)/check_serial.log -CHECK_TIMEOUT := 30 -CHECK_SELFTEST_TIMEOUT := 120 +CHECK_TIMEOUT ?= 30 +CHECK_SELFTEST_TIMEOUT ?= 120 CHECK_SELFTEST_PID := $(BUILD_DIR)/check_selftest_qemu.pid CHECK_SELFTEST_LOG := $(BUILD_DIR)/check_selftest_serial.log diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 1d6878c..ad783d6 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -124,9 +124,28 @@ static void dl_replenish_cb(void *arg) * the original wake path handles re-enqueueing; the throttle flag * is already cleared above so the task won't be re-throttled on * the next enqueue. + * + * READY is a third case: the task is already sitting in + * pcpu_dl_runq[cpu] but sched_dl_pick_next was skipping it because + * dl_throttled was set. Now that we cleared the flag the task is + * pickable, but the owning hart has not been notified -- if it is + * the per-hart idle thread in wfi (pcpu_runq_bitmap == 0, only DL + * entries on the runq), it will stay there until something else + * pokes it. Force a reschedule on its CPU. */ - if (task->state == TD_STATE_DL_THROTTLED) + if (task->state == TD_STATE_DL_THROTTLED) { sched_wake_ready(task); + } else if (task->state == TD_STATE_READY) { + u32 cpu = task->td_last_cpu; + if (cpu < MAX_CPUS) { + struct pcpu *tpc = &pcpu_array[cpu]; + __atomic_store_n(&tpc->need_resched, 1, __ATOMIC_RELEASE); +#if CONFIG_SMP + if (cpu != get_cpuid()) + ipi_send(cpu, IPI_SCHED); +#endif + } + } } /* Admission check: can (runtime_ticks, period_ticks) be added without diff --git a/tests/tests-sched.c b/tests/tests-sched.c index b0a9dff..2c16b77 100644 --- a/tests/tests-sched.c +++ b/tests/tests-sched.c @@ -87,21 +87,24 @@ DEFINE_SELFTEST(cpu_time, test_cpu_time); /* Priority ordering: create tasks at different priorities, verify execution * order via a shared counter array. Higher priority runs first. */ -static volatile int prio_order[3]; +#define PRIO_ORDER_N 3 +static volatile int prio_order[PRIO_ORDER_N]; static volatile int prio_idx; +static volatile int prio_done_count; static void selftest_prio_cb(void *ctx) { int slot = (int) (uptr) ctx; - int idx = prio_idx; + int idx = __atomic_fetch_add(&prio_idx, 1, __ATOMIC_ACQ_REL); if (idx < (int) countof(prio_order)) prio_order[idx] = slot; - prio_idx = idx + 1; + __atomic_fetch_add(&prio_done_count, 1, __ATOMIC_RELEASE); } static i32 test_priority_order(void) { - prio_idx = 0; + __atomic_store_n(&prio_idx, 0, __ATOMIC_RELAXED); + __atomic_store_n(&prio_done_count, 0, __ATOMIC_RELAXED); prio_order[0] = prio_order[1] = prio_order[2] = -1; /* Create tasks: LOW=0, NORMAL=1, HIGH=2. Create in ascending order @@ -136,16 +139,19 @@ static i32 test_priority_order(void) } enable_interrupts(); - /* Yield to let them all run. The main task is IDLE priority, so all - * created tasks will run before resuming. Any pending IPI that - * arrived during the disabled window fires now, triggering the - * context switch to the highest-priority test task. - * - * Use a brief sleep (not yield) to give all three tasks time to - * complete on SMP, where timer ticks and IPI processing may delay - * the pinned tasks beyond a single yield quantum. + /* Poll for all three tasks to complete, yielding between checks. + * The main task is IDLE priority, so each yield cycles through the BSP idle + * thread and any pending IDLE-priority test task before resuming. Yielding + * (sleep_ms(0)) avoids the long-sleep wake path that has shown intermittent + * stalls under QEMU TCG SMP timing. */ - sleep_ms(time_ms_new(50)); + u64 start = time_rdtime(); + u64 timeout = time_ms_to_ticks(2000); + while (__atomic_load_n(&prio_done_count, __ATOMIC_ACQUIRE) < PRIO_ORDER_N) { + if (time_rdtime() - start > timeout) + return 1; + sleep_ms(time_ms_new(0)); + } /* Expected order: HIGH(2), NORMAL(1), IDLE(0). */ if (prio_order[0] != 2 || prio_order[1] != 1 || prio_order[2] != 0)