Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .github/workflows/Linux-libretro-common-samples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ jobs:
task_queue_title_error_test
tpool_wait_test
retro_atomic_test
retro_spsc_test
)

# Per-binary run command (overrides ./<binary> if present).
Expand Down Expand Up @@ -289,6 +290,31 @@ jobs:

TSAN_OPTIONS=halt_on_error=1 ./retro_atomic_test

- name: Run retro_spsc_test under Clang + ThreadSanitizer
shell: bash
working-directory: libretro-common/samples/queues/retro_spsc_test
run: |
# retro_spsc.c is a lock-free SPSC byte queue built on
# retro_atomic.h. Its correctness contract is acquire-load
# / release-store on the head and tail cursors, with the
# buffer reads/writes between them ordered by those barriers.
# Missing or weakened barriers produce torn data on the
# consumer side, observable as content mismatches in the
# stress harness AND as TSan-reported races. The default
# ASan/UBSan pass above catches the content mismatches but
# not the races; this lane catches both.
#
# halt_on_error=1 makes TSan exit non-zero on the first race
# rather than continuing -- which is what we want for CI:
# any race here means the SPSC contract is broken.
set -u
set -o pipefail

make clean
CC=clang make all SANITIZER=thread

TSAN_OPTIONS=halt_on_error=1 ./retro_spsc_test

# Cross-architecture validation lane for retro_atomic_test.
#
# The samples job above runs on x86_64, which is a strongly-ordered
Expand Down
83 changes: 83 additions & 0 deletions .github/workflows/Linux-samples-gfx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -205,3 +205,86 @@ jobs:
test -x vulkan_ctx_double_free_test
timeout 60 ./vulkan_ctx_double_free_test
echo "[pass] vulkan_ctx_double_free_test"

- name: Build and run gfx_thumbnail_status_atomic_test (TSan)
shell: bash
working-directory: samples/gfx/gfx_thumbnail_status_atomic
run: |
set -eu
# Regression test for the cross-thread thumbnail status
# synchronisation in gfx/gfx_thumbnail.c. The .status
# field is read by the video thread (gfx_thumbnail_draw)
# and written by the upload-callback thread
# (gfx_thumbnail_handle_upload), with a release-store /
# acquire-load pairing that guards the texture / width /
# height fields published alongside the AVAILABLE
# status. Pre-port the file used a hand-rolled
# __atomic_* / _Interlocked / volatile shim. Post-port
# the .status field is retro_atomic_int_t and the
# accesses route through retro_atomic_*_int macros.
#
# ThreadSanitizer is the right validator for this test:
# it instruments every atomic load and store and would
# flag a missing-barrier regression as a data race, even
# on x86 TSO where the hardware otherwise hides the bug.
# ASan/UBSan would not catch barrier removal at all
# (no out-of-bounds access, no UB). The test also
# carries static-assertion checks that validate the
# struct layout invariant (.status size == sizeof(int))
# so the field's atomic type does not change the
# gfx_thumbnail_t ABI. If gfx_thumbnail.c amends
# GFX_THUMB_STATUS_STORE / GFX_THUMB_STATUS_LOAD or the
# .status field type, the verbatim copies in
# gfx_thumbnail_status_atomic_test.c must follow.
make clean all SANITIZER=thread
test -x gfx_thumbnail_status_atomic_test
TSAN_OPTIONS=halt_on_error=1 timeout 60 \
./gfx_thumbnail_status_atomic_test
echo "[pass] gfx_thumbnail_status_atomic_test"

- name: Build and run gfx_widgets_msg_queue_race_test (TSan)
shell: bash
working-directory: samples/gfx/gfx_widgets_msg_queue_race
run: |
set -eu
# Regression test for the producer-producer race fix on
# dispgfx_widget_t::msg_queue in gfx/gfx_widgets.c.
# Pre-fix the FIFO had no dedicated lock: producers could
# be invoked from any thread (the threaded task system at
# libretro-common/queues/task_queue.c runs a worker
# thread; gfx/video_driver.c::video_driver_frame released
# RUNLOOP_MSG_QUEUE_LOCK before calling the producer, so
# main-thread producers ran unlocked) yet there was no
# lock dedicated to msg_queue itself. Post-fix a
# msg_queue_lock is acquired around every fifo_write (in
# gfx_widgets_msg_queue_push), every fifo_read (in
# gfx_widgets_iterate), and the avail-check that gates
# them. The outer fast-path FIFO_*_AVAIL checks that
# used to wrap the function bodies were dropped: reading
# the FIFO cursors outside the lock is itself a data
# race against the locked writes (TSan-detectable;
# benign on x86 TSO but real on weak-memory hardware).
#
# ThreadSanitizer is the right validator: it instruments
# every memory access on fifo_buffer_t::end and
# ::first and would flag a missing-lock regression as a
# data race on the producer side, the consumer side, or
# the avail check. ASan/UBSan would not catch the lock
# removal at all. halt_on_error=1 makes TSan exit
# non-zero on the first observed race, which is what we
# want for CI: any race here means the production
# locking discipline is broken.
#
# The test mirrors the post-fix locking protocol of
# gfx_widgets_msg_queue_push and gfx_widgets_iterate's
# FIFO interaction; the widget allocation, font
# measurement, and animation push that wrap the actual
# FIFO calls in production are not part of the race and
# are stripped from the test. If gfx_widgets.c amends
# the locking around msg_queue, the verbatim copies in
# gfx_widgets_msg_queue_race_test.c must follow.
make clean all SANITIZER=thread
test -x gfx_widgets_msg_queue_race_test
TSAN_OPTIONS=halt_on_error=1 timeout 60 \
./gfx_widgets_msg_queue_race_test
echo "[pass] gfx_widgets_msg_queue_race_test"
1 change: 1 addition & 0 deletions Makefile.common
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ OBJ += \
input/input_autodetect_builtin.o \
input/input_keymaps.o \
$(LIBRETRO_COMM_DIR)/queues/fifo_queue.o \
$(LIBRETRO_COMM_DIR)/queues/retro_spsc.o \
$(LIBRETRO_COMM_DIR)/compat/compat_fnmatch.o \
$(LIBRETRO_COMM_DIR)/compat/compat_posix_string.o

Expand Down
101 changes: 74 additions & 27 deletions audio/drivers/asio.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
#include <compat/strl.h>
#include <string/stdstring.h>
#include <lists/string_list.h>
#include <queues/fifo_queue.h>
#include <retro_spsc.h>

#ifdef HAVE_THREADS
#include <rthreads/rthreads.h>
Expand Down Expand Up @@ -644,7 +644,26 @@ static void *asio_load_driver(const CLSID *clsid)
typedef struct ra_asio
{
void *iasio; /* COM interface pointer */
fifo_buffer_t *ring; /* Ring buffer between write() and callback */
/* Lock-free SPSC ring buffer between asio_write (producer, main
* thread) and asio_cb_buffer_switch (consumer, ASIO callback
* thread). Pre-port this used fifo_buffer_t with no surrounding
* lock, which was a real cross-thread race on first/end -- see
* commit message. retro_spsc_t is the SPSC primitive designed
* for this exact pattern: lock-free (avoiding the priority-
* inversion concern of locking from a real-time audio callback),
* acquire/release ordering on the cursors, single-producer /
* single-consumer enforced by the type contract.
*
* Embedded by value (not via pointer) so the lifetime exactly
* tracks ra_asio_t. Initialised with retro_spsc_init in
* ra_asio_init / ra_asio_init_via_persistent and freed with
* retro_spsc_free in the corresponding teardown paths. The
* `ring_initialized` flag below distinguishes "never-initialised"
* from "init succeeded" so cleanup paths know whether to call
* retro_spsc_free. retro_spsc_t doesn't carry that bit
* internally because most callers know their own lifecycle. */
retro_spsc_t ring;
bool ring_initialized;
#ifdef HAVE_THREADS
scond_t *cond;
slock_t *cond_lock;
Expand Down Expand Up @@ -712,7 +731,11 @@ static void asio_deinterleave_to_buffers(ra_asio_t *ad,
long i;
void *buf_l = ad->buf_info[0].buffers[index];
void *buf_r = ad->buf_info[1].buffers[index];
size_t avail = FIFO_READ_AVAIL(ad->ring);
/* Acquire-load on the producer's head cursor. Pairs with the
* release-store inside retro_spsc_write that asio_write
* issues on the main thread, so the bytes we're about to read
* out via retro_spsc_read are guaranteed visible. */
size_t avail = retro_spsc_read_avail(&ad->ring);
long have = (long)(avail / (2 * sizeof(float)));

if (have > frames)
Expand All @@ -727,7 +750,7 @@ static void asio_deinterleave_to_buffers(ra_asio_t *ad,
float tmp[2];
for (i = 0; i < have; i++)
{
fifo_read(ad->ring, tmp, sizeof(tmp));
retro_spsc_read(&ad->ring, tmp, sizeof(tmp));
dl[i] = tmp[0];
dr[i] = tmp[1];
}
Expand All @@ -742,7 +765,7 @@ static void asio_deinterleave_to_buffers(ra_asio_t *ad,
float tmp[2];
for (i = 0; i < have; i++)
{
fifo_read(ad->ring, tmp, sizeof(tmp));
retro_spsc_read(&ad->ring, tmp, sizeof(tmp));
dl[i] = (double)tmp[0];
dr[i] = (double)tmp[1];
}
Expand All @@ -757,7 +780,7 @@ static void asio_deinterleave_to_buffers(ra_asio_t *ad,
float tmp[2];
for (i = 0; i < have; i++)
{
fifo_read(ad->ring, tmp, sizeof(tmp));
retro_spsc_read(&ad->ring, tmp, sizeof(tmp));
dl[i] = (int32_t)((double)tmp[0] * 2147483647.0);
dr[i] = (int32_t)((double)tmp[1] * 2147483647.0);
}
Expand All @@ -773,7 +796,7 @@ static void asio_deinterleave_to_buffers(ra_asio_t *ad,
for (i = 0; i < have; i++)
{
int32_t l, r;
fifo_read(ad->ring, tmp, sizeof(tmp));
retro_spsc_read(&ad->ring, tmp, sizeof(tmp));
l = (int32_t)(tmp[0] * 8388607.0f);
r = (int32_t)(tmp[1] * 8388607.0f);
l = l > 8388607 ? 8388607 : (l < -8388608 ? -8388608 : l);
Expand All @@ -797,7 +820,7 @@ static void asio_deinterleave_to_buffers(ra_asio_t *ad,
for (i = 0; i < have; i++)
{
int32_t l, r;
fifo_read(ad->ring, tmp, sizeof(tmp));
retro_spsc_read(&ad->ring, tmp, sizeof(tmp));
l = (int32_t)(tmp[0] * 32767.0f);
r = (int32_t)(tmp[1] * 32767.0f);
dl[i] = (int16_t)(l > 32767 ? 32767 : (l < -32768 ? -32768 : l));
Expand Down Expand Up @@ -826,7 +849,7 @@ static void asio_cb_buffer_switch(long index,
{
ra_asio_t *ad = g_asio;

if (!ad || !ad->ring || ad->is_paused || ad->shutdown)
if (!ad || !ad->ring_initialized || ad->is_paused || ad->shutdown)
{
if (ad && ad->buf_info[0].buffers[index])
{
Expand Down Expand Up @@ -924,8 +947,11 @@ static void asio_atexit_cleanup(void)
ASIO_CALL_RELEASE(ad->iasio);
}

if (ad->ring)
fifo_free(ad->ring);
if (ad->ring_initialized)
{
retro_spsc_free(&ad->ring);
ad->ring_initialized = false;
}

#ifdef HAVE_THREADS
if (ad->cond_lock)
Expand Down Expand Up @@ -991,7 +1017,11 @@ static void *ra_asio_init(const char *device, unsigned rate,
if (new_rate)
*new_rate = ad->sample_rate;

fifo_clear(ad->ring);
/* Discard any stale audio left over from the previous
* session. Safe here because the ASIO callback isn't
* running yet (g_asio is still NULL until the next line),
* so the SPSC is single-threaded at this point. */
retro_spsc_clear(&ad->ring);

g_asio = ad;
ad->running = true;
Expand Down Expand Up @@ -1158,14 +1188,18 @@ static void *ra_asio_init(const char *device, unsigned rate,

/* Create ring buffer BEFORE ASIO buffers — the driver may issue
* a bufferSwitch callback during ASIOCreateBuffers, and the
* callback needs the ring buffer to exist (even if empty). */
* callback needs the ring buffer to exist (even if empty).
* retro_spsc_init rounds capacity up to a power of 2; the
* over-allocation is small (factor of < 2) and irrelevant to
* the ASIO latency calculation, which uses ad->buffer_frames
* not the ring's actual byte capacity. */
ad->ring_size = pref_sz * 2 * sizeof(float) * ASIO_RING_MULT;
ad->ring = fifo_new(ad->ring_size);
if (!ad->ring)
if (!retro_spsc_init(&ad->ring, ad->ring_size))
{
RARCH_ERR("[ASIO] Failed to create ring buffer.\n");
goto error;
}
ad->ring_initialized = true;

#ifdef HAVE_THREADS
ad->cond = scond_new();
Expand Down Expand Up @@ -1229,8 +1263,11 @@ static void *ra_asio_init(const char *device, unsigned rate,
ASIO_CALL_DISPOSE_BUFFERS(ad->iasio);
ASIO_CALL_RELEASE(ad->iasio);
}
if (ad->ring)
fifo_free(ad->ring);
if (ad->ring_initialized)
{
retro_spsc_free(&ad->ring);
ad->ring_initialized = false;
}
#ifdef HAVE_THREADS
if (ad->cond_lock)
slock_free(ad->cond_lock);
Expand Down Expand Up @@ -1259,17 +1296,22 @@ static ssize_t ra_asio_write(void *data, const void *buf, size_t len)
if (ad->shutdown)
return -1;

avail = FIFO_WRITE_AVAIL(ad->ring);
avail = retro_spsc_write_avail(&ad->ring);
to_write = (len < avail) ? len : avail;
/* Align to frame boundary (stereo float = 8 bytes) */
to_write = (to_write / 8) * 8;

if (to_write > 0)
{
fifo_write(ad->ring, src, to_write);
src += to_write;
len -= to_write;
written += to_write;
/* retro_spsc_write returns bytes actually written. We've
* already capped to_write by retro_spsc_write_avail above,
* so the return value will equal to_write -- but use it
* defensively in case the contract ever changes. */
size_t actually_written =
retro_spsc_write(&ad->ring, src, to_write);
src += actually_written;
len -= actually_written;
written += actually_written;
}
else if (!ad->nonblock)
{
Expand Down Expand Up @@ -1346,9 +1388,14 @@ static void ra_asio_free(void *data)
/* Detach from the callback — silence output while parked */
g_asio = NULL;

/* Flush stale audio */
if (ad->ring)
fifo_clear(ad->ring);
/* No retro_spsc_clear here. The pre-port fifo_clear at this
* site was racy with stray ASIO callbacks that may still be
* running after ASIO_CALL_STOP returns (some drivers, notably
* ASIO4ALL, don't synchronously join their audio thread).
* The restart path in ra_asio_init_via_persistent calls
* retro_spsc_clear anyway, so any stale data will be flushed
* before the next run -- after the callback is provably
* stopped (g_asio == NULL gates it). */

/* Store for reuse */
g_asio_persistent = ad;
Expand All @@ -1361,9 +1408,9 @@ static bool ra_asio_use_float(void *data) { return true; }
static size_t ra_asio_write_avail(void *data)
{
ra_asio_t *ad = (ra_asio_t *)data;
if (!ad || !ad->ring)
if (!ad || !ad->ring_initialized)
return 0;
return FIFO_WRITE_AVAIL(ad->ring);
return retro_spsc_write_avail(&ad->ring);
}

static size_t ra_asio_buffer_size(void *data)
Expand Down
Loading
Loading