From b3618d6188be18ed83e2b6f8574cc412708bfe6c Mon Sep 17 00:00:00 2001
From: "U-DESKTOP-SPFP6AQ\\twistedtechre" <reallibretroretroarch@gmail.com>
Date: Wed, 29 Apr 2026 00:12:45 +0200
Subject: [PATCH 1/8] menu/ozone: silence UBSan float-to-unsigned conversion in
 entries draw

UBSan-instrumented run reported:

  menu/drivers/ozone.c:6401:13: runtime error: -10.0781 is outside
  the range of representable values of type 'unsigned int'

Origin: the value-text y argument to ozone_draw_entry_value() is
declared as unsigned, but the y expression at the call site is

    y                                                 /* size_t */
    + ozone->dimensions.entry_height / 2              /* int    */
    + ozone->fonts.entries_label.line_centre_offset   /* int    */
    + scroll_y                                        /* float  */

The trailing float promotes the whole sum to float.  scroll_y is
clamped to (-inf, 0] (lines 10435-10436), so when the topmost
partially-visible row's vertical centre lands above the header
boundary -- reachable during pointer/wheel scrolling -- the sum
goes slightly negative (UBSan saw -10.0781).  The implicit
float-to-unsigned conversion of a negative is undefined per C11
6.3.1.4 p1.

Runtime impact: low.  Every modern compiler wraps the conversion
to ~UINT_MAX in practice; ozone_draw_entry_value then promotes
that back to float as it forwards into gfx_display_draw_text(),
producing a coordinate around 4.29e9 that the rasteriser clips
off-screen.  The value text just doesn't render that frame,
visibly matching the off-screen-row case it would have hit on
the next frame anyway.

Fix: cast the sum through int (defined conversion for the range
these screen coordinates occupy: small positive y plus a bounded
negative scroll_y) so the negative case wraps in a defined
manner rather than triggering UB.  Matches the
(int)((float)y + scroll_y) idiom already used at lines 3581,
3595, 5970 and 5985 in the same file for sibling y arguments
that are signed-typed at the callee.

Also folds entry_height / 2 -> entry_height / 2.0f for
consistency with the surrounding label and sublabel y
expressions at lines 6319 and 6346, which already use float
division -- relevant only on the rare odd entry_height where
integer division would round half a pixel below the float
result.
---
 menu/drivers/ozone.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/menu/drivers/ozone.c b/menu/drivers/ozone.c
index 246e759f875..a9fa2f27c7b 100644
--- a/menu/drivers/ozone.c
+++ b/menu/drivers/ozone.c
@@ -6398,10 +6398,28 @@ static void ozone_draw_entries(
                   + x_offset
                   + entry_width
                   - ozone->dimensions.entry_icon_padding,
-            y
-                  + ozone->dimensions.entry_height / 2
+            /* The y arg is unsigned but the sum below evaluates to
+             * a float because of scroll_y, which is clamped to <= 0
+             * (see lines 10435-10436).  When the row's vertical
+             * centre lands above the visible top -- reachable
+             * during pointer/wheel scrolling whenever the topmost
+             * partially-visible row's value text would draw above
+             * the header -- the float value can be slightly
+             * negative (UBSan reported -10.0781).  Implicit
+             * float-to-unsigned conversion of a negative is UB
+             * (C11 6.3.1.4 p1); silently wraps to ~UINT_MAX in
+             * practice, which the renderer then implicit-converts
+             * back to float as a huge off-screen coordinate, so
+             * the text just fails to draw rather than corrupting
+             * anything.  Cast through int -- well-defined for the
+             * range these screen coords occupy -- so the negative
+             * case wraps in a defined manner instead.  Matches the
+             * (int)((float)y + scroll_y) idiom used at lines 5970,
+             * 5985, 3581, 3595. */
+            (int)((float)y
+                  + ozone->dimensions.entry_height / 2.0f
                   + ozone->fonts.entries_label.line_centre_offset
-                  + scroll_y,
+                  + scroll_y),
             alpha_uint32,
             &entry,
             mymat);

From 8ea17a0372b14f4f1da2efc835376b7d7877b3ac Mon Sep 17 00:00:00 2001
From: "U-DESKTOP-SPFP6AQ\\twistedtechre" <reallibretroretroarch@gmail.com>
Date: Wed, 29 Apr 2026 00:19:56 +0200
Subject: [PATCH 2/8] menu/ozone: clamp sidebar ticker field_width, root-cause
 UBSan trips

UBSan-instrumented run reported, on the same arithmetic at two
sites in ozone_draw_sidebar():

  menu/drivers/ozone.c:3658:41: runtime error: -20.4167 is
  outside the range of representable values of type 'unsigned int'
  menu/drivers/ozone.c:3806:41: runtime error: -20.4167 is
  outside the range of representable values of type 'unsigned int'

Two pre-existing TODO/FIXME comments in this file already noted
the same trip (with a -12.549 sample) but punted on the fix.
Same situation also lurks in the sibling non-smooth-ticker
branches, which UBSan didn't happen to catch only because
ticker.len is size_t rather than unsigned and the smooth path is
the default.

Where the float comes from
--------------------------
ozone->dimensions_sidebar_width is a *float* "animated field"
(declared so at line 578) that tweens between 0 / collapsed /
normal sidebar widths via the gfx_animation system.  It's
initialised to 0.0f at startup (line 9324), so on the very first
frame after init -- and during every collapse/expand animation
-- it holds a small or fractional value.

Through line 3568,

    entry_width = (unsigned)dimensions_sidebar_width
                - sidebar_padding_horizontal * 2;

so entry_width is integer but tracks the float.  Then both
ticker computations evaluate

    entry_width - sidebar_entry_icon_size - 40 * scale_factor

scale_factor is float (last_scale_factor at line 580), so
40 * scale_factor is float, so the whole sum is float.  When
entry_width is small (mid-animation, or sidebar collapsed) the
sum lands below zero -- UBSan saw -20.4167 (40 * 1.5 - the
~icon_size offset against an entry_width near zero).

The implicit float-to-unsigned (or float-to-size_t) conversion
of a negative is UB per C11 6.3.1.4 p1.  Modern compilers wrap
to ~UINT_MAX/SIZE_MAX, which the ticker then treats as "plenty
of room", drawing too much text for one or two animation frames
until dimensions_sidebar_width settles.  Visually mostly
invisible; UBSan-fatally noisy.

Fix
---
Compute the available width in signed int, with the float
intruder explicitly cast through int

    avail_width = entry_width - icon_size - (int)(40.0f * scale_factor);

and clamp to zero before assignment to the unsigned/size_t
field:

    ticker_field_width = avail_width > 0 ? (unsigned)avail_width : 0;

This produces a defined zero-width when there's no room (the
correct semantic -- the ticker has nothing to draw and skips
rather than scribbling at a wrap-around offset).  Apply
identically at both sites (system tab loop and horizontal-list
loop) and use the same local in both the smooth and non-smooth
branches, which removes the duplicated arithmetic and applies
the same clamp to ticker.len's previously-buggy non-smooth
branch.  The two TODO/FIXME comments are removed since they're
now fixed rather than known-broken.

Also adds a glyph_width != 0 guard around the non-smooth divide.
glyph_width is populated during font init and is normally non-
zero by the time draw_sidebar runs, but a font that failed to
load would leave it at 0 and the existing code would divide by
zero -- belt-and-suspenders since we're touching the line
anyway.

Address-of-root-cause vs whack-a-mole
-------------------------------------
The deeper root cause is dimensions_sidebar_width being a float
that flows into integer pixel math, rather than these specific
sites.  Normalising the animated width to an int field would
require restructuring how the gfx_animation tween writes back
(its subject is a float* by API), so it's not a drive-by fix.
For now, every site that mixes scale_factor into integer pixel
arithmetic is a candidate UBSan trip; this commit handles the
two reported plus their immediate non-smooth counterparts.
Future trips will need similar local clamps.
---
 menu/drivers/ozone.c | 86 ++++++++++++++++++++++++++------------------
 1 file changed, 52 insertions(+), 34 deletions(-)

diff --git a/menu/drivers/ozone.c b/menu/drivers/ozone.c
index a9fa2f27c7b..bdcb1b443c1 100644
--- a/menu/drivers/ozone.c
+++ b/menu/drivers/ozone.c
@@ -3643,6 +3643,24 @@ static void ozone_draw_sidebar(
          enum msg_hash_enums value_idx  = ozone_system_tabs_value[ozone->tabs[i]];
          const char *title              = msg_hash_to_str(value_idx);
          uint32_t text_color            = 0;
+         /* Available pixel width for the ticker.  scale_factor
+          * promotes the right operand to float, and entry_width
+          * may legitimately be small or zero (sidebar collapsed
+          * or animating in -- dimensions_sidebar_width is a float
+          * tween initialised to 0.0f), so the float sum can land
+          * below zero.  Implicit float-to-unsigned conversion of
+          * a negative is UB (C11 6.3.1.4 p1) -- UBSan flagged
+          * the previous integer-typed expression here as e.g.
+          * '-20.4167 outside the range of unsigned int'.  Compute
+          * in signed int (no float intruders) and clamp at zero;
+          * a non-positive width means there's no room to draw
+          * anything, which the ticker treats as 'skip' rather
+          * than scribbling at the wrap-around offset. */
+         int avail_width                = entry_width
+               - ozone->dimensions.sidebar_entry_icon_size
+               - (int)(40.0f * scale_factor);
+         unsigned ticker_field_width    = avail_width > 0
+               ? (unsigned)avail_width : 0;
          if (ozone->theme)
             text_color                  = selected
                ? COLOR_TEXT_ALPHA(ozone->theme->text_selected_rgba, text_alpha)
@@ -3651,13 +3669,7 @@ static void ozone_draw_sidebar(
          if (use_smooth_ticker)
          {
             ticker_smooth.selected    = selected;
-            /* TODO/FIXME - undefined behavior reported by ASAN -
-             *-12.549 is outside the range of representable values
-             of type 'unsigned int'
-             * */
-            ticker_smooth.field_width = (entry_width
-                  - ozone->dimensions.sidebar_entry_icon_size
-                  - 40 * scale_factor);
+            ticker_smooth.field_width = ticker_field_width;
             ticker_smooth.src_str     = title;
             ticker_smooth.dst_str     = console_title;
             ticker_smooth.dst_str_len = sizeof(console_title);
@@ -3666,10 +3678,9 @@ static void ozone_draw_sidebar(
          }
          else
          {
-            ticker.len      = (entry_width
-                  - ozone->dimensions.sidebar_entry_icon_size
-                  - 40 * scale_factor)
-                  / ozone->fonts.sidebar.glyph_width;
+            ticker.len      = ozone->fonts.sidebar.glyph_width
+                  ? ticker_field_width / ozone->fonts.sidebar.glyph_width
+                  : 0;
             ticker.s        = console_title;
             ticker.selected = selected;
             ticker.str      = title;
@@ -3796,33 +3807,40 @@ static void ozone_draw_sidebar(
          if (ozone->sidebar_collapsed)
             goto console_iterate;
 
-         if (use_smooth_ticker)
+         /* See matching computation earlier in this function for
+          * the rationale; entry_width can be small or zero
+          * (sidebar_width is a float tween, init 0.0f) and the
+          * float `40 * scale_factor` term promotes the sum to
+          * float, so the result can land below zero and the
+          * implicit conversion to unsigned/size_t is UB. */
          {
-            ticker_smooth.selected    = selected;
-            /* TODO/FIXME - undefined behavior reported by ASAN -
-             *-12.549 is outside the range of representable values
-             of type 'unsigned int'
-             * */
-            ticker_smooth.field_width = (entry_width
+            int avail_width                = entry_width
                   - ozone->dimensions.sidebar_entry_icon_size
-                  - 40 * scale_factor);
-            ticker_smooth.src_str     = node->console_name;
-            ticker_smooth.dst_str     = console_title;
-            ticker_smooth.dst_str_len = sizeof(console_title);
+                  - (int)(40.0f * scale_factor);
+            unsigned ticker_field_width    = avail_width > 0
+                  ? (unsigned)avail_width : 0;
 
-            gfx_animation_ticker_smooth(&ticker_smooth);
-         }
-         else
-         {
-            ticker.len      = (entry_width
-                  - ozone->dimensions.sidebar_entry_icon_size
-                  - 40 * scale_factor)
-                  / ozone->fonts.sidebar.glyph_width;
-            ticker.s        = console_title;
-            ticker.selected = selected;
-            ticker.str      = node->console_name;
+            if (use_smooth_ticker)
+            {
+               ticker_smooth.selected    = selected;
+               ticker_smooth.field_width = ticker_field_width;
+               ticker_smooth.src_str     = node->console_name;
+               ticker_smooth.dst_str     = console_title;
+               ticker_smooth.dst_str_len = sizeof(console_title);
 
-            gfx_animation_ticker(&ticker);
+               gfx_animation_ticker_smooth(&ticker_smooth);
+            }
+            else
+            {
+               ticker.len      = ozone->fonts.sidebar.glyph_width
+                     ? ticker_field_width / ozone->fonts.sidebar.glyph_width
+                     : 0;
+               ticker.s        = console_title;
+               ticker.selected = selected;
+               ticker.str      = node->console_name;
+
+               gfx_animation_ticker(&ticker);
+            }
          }
 
          gfx_display_draw_text(

From 71a1398e7c028d6184bb852c52ea26bd419222a1 Mon Sep 17 00:00:00 2001
From: "U-DESKTOP-SPFP6AQ\\twistedtechre" <reallibretroretroarch@gmail.com>
Date: Wed, 29 Apr 2026 00:36:29 +0200
Subject: [PATCH 3/8] (X11) Probe for xdg-screensaver/xset before invoking
 suspend

xdg-screensaver's X11 backend shells out to xset internally. On systems
without x11-xserver-utils installed (minimal installs, containers, some
WMs) the resulting failures surface as confusing stderr spam:

  /usr/bin/xdg-screensaver: 885: xset: not found
  /usr/bin/xdg-screensaver: 893: [: Illegal number:
  /usr/bin/xdg-screensaver: 632: xset: not found

with no indication that the missing dependency is xset rather than
something in RetroArch itself.

Probe once on first use via 'command -v' (POSIX shell builtin, no extra
deps) for both xdg-screensaver and xset, with output silenced. If either
is missing, flip xdg_screensaver_available to false and log a single
explanatory line; subsequent calls short-circuit out cleanly. Probe is
gated behind a static bool so it runs at most once per process, and only
when we'd actually invoke xdg-screensaver -- the DBus inhibit path still
returns early before we ever get here.
---
 gfx/common/x11_common.c | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/gfx/common/x11_common.c b/gfx/common/x11_common.c
index d3726ee4739..ea811453adc 100644
--- a/gfx/common/x11_common.c
+++ b/gfx/common/x11_common.c
@@ -252,13 +252,31 @@ static bool xss_screensaver_inhibit(Display *dpy, bool enable)
     return true;
 }
 #else
-static bool xss_screensaver_inhibit(Display *dpy, bool enable)
-{
-    (void) dpy;
-    return false;
-}
+static bool xss_screensaver_inhibit(Display *dpy, bool enable) { return false; }
 #endif
 
+/* Probe once for xdg-screensaver and its xset backend dependency.
+ * xdg-screensaver's "X11" backend shells out to xset; if xset is missing
+ * (common on minimal installs / containers / some WMs without
+ * x11-xserver-utils), invoking xdg-screensaver spams stderr with
+ * "xset: not found" and "Illegal number" without us ever knowing why.
+ * Check up front so we can silently no-op instead. */
+static bool xdg_screensaver_probe(void)
+{
+   /* Both are needed: xdg-screensaver itself, and xset which it execs.
+    * `command -v` is a POSIX shell builtin so this works under /bin/sh
+    * on every platform that has system(). Redirecting both streams
+    * keeps the probe silent. */
+   int ret = system("command -v xdg-screensaver >/dev/null 2>&1 && "
+                "command -v xset >/dev/null 2>&1");
+   if (ret == -1 || WEXITSTATUS(ret) != 0)
+   {
+      RARCH_LOG("[X11] xdg-screensaver or xset not available; screensaver suspension disabled.\n");
+      return false;
+   }
+   return true;
+}
+
 static void xdg_screensaver_inhibit(Window wnd)
 {
    int  ret;
@@ -312,6 +330,14 @@ bool x11_suspend_screensaver(void *data, bool enable)
     {
        if (xdg_screensaver_available)
        {
+          static bool probed = false;
+          if (!probed)
+          {
+             xdg_screensaver_available = xdg_screensaver_probe();
+             probed = true;
+          }
+          if (!xdg_screensaver_available)
+             return true;
           xdg_screensaver_inhibit(wnd);
           return xdg_screensaver_available;
        }

From 0c0eb03ab7b8d8a0be7d04a1e20a70a85adbbdea Mon Sep 17 00:00:00 2001
From: github-actions <github-actions@github.com>
Date: Wed, 29 Apr 2026 00:37:05 +0000
Subject: [PATCH 4/8] Fetch translations from Crowdin

---
 intl/msg_hash_ga.h |  4 ++++
 intl/msg_hash_ko.h |  4 ++++
 intl/msg_hash_sk.h | 12 ++++++++++++
 intl/msg_hash_sv.h | 20 ++++++++++++++++++++
 intl/progress.h    |  4 ++--
 5 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/intl/msg_hash_ga.h b/intl/msg_hash_ga.h
index 078cbc107ef..cb801da8b42 100644
--- a/intl/msg_hash_ga.h
+++ b/intl/msg_hash_ga.h
@@ -2530,6 +2530,10 @@ MSG_HASH(
    MENU_ENUM_LABEL_VALUE_VIDEO_SCANLINE_SYNC,
    "Sioncrónú Scanline"
    )
+MSG_HASH(
+   MENU_ENUM_SUBLABEL_VIDEO_SCANLINE_SYNC,
+   "Sioncrónaigh cur i láthair físe le suíomh na líne scanadh. Laghdaíonn sé moill ar chostas riosca níos airde stróiceadh. Ní mór VSync a dhíchumasú."
+   )
 MSG_HASH(
    MENU_ENUM_LABEL_VALUE_VIDEO_FRAME_DELAY,
    "Moill Fráma"
diff --git a/intl/msg_hash_ko.h b/intl/msg_hash_ko.h
index deb8dd7cb70..e9b9e217071 100644
--- a/intl/msg_hash_ko.h
+++ b/intl/msg_hash_ko.h
@@ -2554,6 +2554,10 @@ MSG_HASH(
    MENU_ENUM_LABEL_VALUE_VIDEO_SCANLINE_SYNC,
    "스캔라인 동기화"
    )
+MSG_HASH(
+   MENU_ENUM_SUBLABEL_VIDEO_SCANLINE_SYNC,
+   "동영상 출력을 스캔라인 위치에 맞춰 동기화합니다. 지연 시간이 줄어들지만 티어링 증상이 발생할 확률이 높아집니다. 수직 동기화가 비활성화되어야 합니다."
+   )
 MSG_HASH(
    MENU_ENUM_LABEL_VALUE_VIDEO_FRAME_DELAY,
    "프레임 지연"
diff --git a/intl/msg_hash_sk.h b/intl/msg_hash_sk.h
index 9605ad3a3ea..a34492e94c8 100644
--- a/intl/msg_hash_sk.h
+++ b/intl/msg_hash_sk.h
@@ -970,6 +970,14 @@ MSG_HASH(
    MENU_ENUM_LABEL_VALUE_CLOUD_SYNC_PASSWORD,
    "Heslo"
    )
+MSG_HASH(
+   MENU_ENUM_LABEL_VALUE_CLOUD_SYNC_ACCESS_KEY_ID,
+   "ID prístupového kľúča"
+   )
+MSG_HASH(
+   MENU_ENUM_LABEL_VALUE_CLOUD_SYNC_SECRET_ACCESS_KEY,
+   "Tajný prístupový kľúč"
+   )
 MSG_HASH(
    MENU_ENUM_LABEL_VALUE_LOGGING_SETTINGS,
    "Záznam"
@@ -2253,6 +2261,10 @@ MSG_HASH(
    MENU_ENUM_LABEL_VALUE_TURBO_MODE_SINGLEBUTTON_HOLD,
    "Jedno tlačidlo (držať)"
    )
+MSG_HASH(
+   MENU_ENUM_LABEL_VALUE_INPUT_TURBO_BUTTON,
+   "Tlačidlo Turbo"
+   )
 MSG_HASH(
    MENU_ENUM_LABEL_VALUE_INPUT_TURBO_FIRE_SETTINGS,
    "Rýchlo streľba"
diff --git a/intl/msg_hash_sv.h b/intl/msg_hash_sv.h
index e082d2c7282..2f390ac950e 100644
--- a/intl/msg_hash_sv.h
+++ b/intl/msg_hash_sv.h
@@ -1094,6 +1094,26 @@ MSG_HASH(
    MENU_ENUM_SUBLABEL_CLOUD_SYNC_PASSWORD,
    "Ditt lösenord för ditt molnlagringskonto."
    )
+MSG_HASH(
+   MENU_ENUM_LABEL_VALUE_CLOUD_SYNC_ACCESS_KEY_ID,
+   "Åtkomstnyckel-ID"
+   )
+MSG_HASH(
+   MENU_ENUM_SUBLABEL_CLOUD_SYNC_ACCESS_KEY_ID,
+   "Ditt åtkomstnyckel-ID för ditt molnlagringskonto."
+   )
+MSG_HASH(
+   MENU_ENUM_LABEL_VALUE_CLOUD_SYNC_SECRET_ACCESS_KEY,
+   "Hemlig åtkomstnyckel"
+   )
+MSG_HASH(
+   MENU_ENUM_SUBLABEL_CLOUD_SYNC_SECRET_ACCESS_KEY,
+   "Din hemliga åtkomstnyckel för ditt molnlagringskonto."
+   )
+MSG_HASH(
+   MENU_ENUM_SUBLABEL_CLOUD_SYNC_S3_URL,
+   "Din S3-slutpunkts-URL för molnlagring."
+   )
 MSG_HASH(
    MENU_ENUM_LABEL_VALUE_LOGGING_SETTINGS,
    "Loggning"
diff --git a/intl/progress.h b/intl/progress.h
index 1b94609b8a7..1e52be6c50e 100644
--- a/intl/progress.h
+++ b/intl/progress.h
@@ -63,7 +63,7 @@
 #define LANGUAGE_PROGRESS_FRENCH_APPROVED   100
 
 /* Irish */
-#define LANGUAGE_PROGRESS_IRISH_TRANSLATED 99
+#define LANGUAGE_PROGRESS_IRISH_TRANSLATED 100
 #define LANGUAGE_PROGRESS_IRISH_APPROVED   0
 
 /* Galician */
@@ -95,7 +95,7 @@
 #define LANGUAGE_PROGRESS_JAPANESE_APPROVED   0
 
 /* Korean */
-#define LANGUAGE_PROGRESS_KOREAN_TRANSLATED 99
+#define LANGUAGE_PROGRESS_KOREAN_TRANSLATED 100
 #define LANGUAGE_PROGRESS_KOREAN_APPROVED   0
 
 /* Dutch */

From ea87b489846a050d107fb2b1e4123203ac9bd59f Mon Sep 17 00:00:00 2001
From: "U-DESKTOP-SPFP6AQ\\twistedtechre" <reallibretroretroarch@gmail.com>
Date: Wed, 29 Apr 2026 03:52:35 +0200
Subject: [PATCH 5/8] gfx/common/vulkan: force MVK_CONFIG_USE_MTLHEAP=0 on
 Apple to avoid placement-heap crash

MoltenVK 1.3.0 flipped MVK_CONFIG_USE_MTLHEAP to active by default for
non-AMD GPUs, routing VkDeviceMemory through a placement MTLHeap.
Placement heaps need macOS 13+ AND supportsPlacementHeaps; older Intel
Macs satisfy neither and abort at swapchain-image allocation:

  -[MTLHeapDescriptorInternal validateWithDevice:]:335:
    failed assertion `Placement heap type is not supported.'

Force the path off in vulkan_context_init() before the loader is
opened, while still respecting an explicit user override. "0" is
boolean-false on 1.2.x and MVK_CONFIG_USE_MTLHEAP_NEVER on 1.3.x, so
the fix is version-agnostic.

Fixes #18985.
---
 gfx/common/vulkan_common.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/gfx/common/vulkan_common.c b/gfx/common/vulkan_common.c
index ed79716a22b..26018966835 100644
--- a/gfx/common/vulkan_common.c
+++ b/gfx/common/vulkan_common.c
@@ -2498,6 +2498,20 @@ bool vulkan_context_init(gfx_ctx_vulkan_data_t *vk,
          int use_mab                      = settings->bools.video_use_metal_arg_buffers;
          setenv("MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS", use_mab ? "1" : "0", 1);
       }
+      /* MoltenVK 1.3.0 flipped MVK_CONFIG_USE_MTLHEAP to active by default
+       * for all non-AMD GPUs, so VkDeviceMemory now allocates from a
+       * placement MTLHeap.  Placement heaps require macOS 13+ and a GPU
+       * that reports supportsPlacementHeaps; older Intel Macs (Mac GPU
+       * family 2 on macOS 12 and earlier) abort at swapchain-image
+       * allocation time with:
+       *   -[MTLHeapDescriptorInternal validateWithDevice:]:
+       *     failed assertion `Placement heap type is not supported.'
+       * Force the path off here so the assertion can never fire.  The
+       * value "0" is interpreted as boolean-false by 1.2.x and as the
+       * MVK_CONFIG_USE_MTLHEAP_NEVER enum value by 1.3.x, so it is
+       * forward- and backward-compatible.  See libretro/RetroArch#18985. */
+      if (!getenv("MVK_CONFIG_USE_MTLHEAP"))
+         setenv("MVK_CONFIG_USE_MTLHEAP", "0", 1);
       /* Try Vulkan loader first (enables validation layers if installed).
        * Falls back to MoltenVK directly if loader not available. */
       vulkan_library = dylib_load("libvulkan.dylib");

From b35625026337ac334896220d11ae2835c093fb00 Mon Sep 17 00:00:00 2001
From: "U-DESKTOP-SPFP6AQ\\twistedtechre" <reallibretroretroarch@gmail.com>
Date: Wed, 29 Apr 2026 04:02:30 +0200
Subject: [PATCH 6/8] Revert "gfx/common/vulkan: force MVK_CONFIG_USE_MTLHEAP=0
 on Apple to avoid placement-heap crash"

This reverts commit ea87b489846a050d107fb2b1e4123203ac9bd59f.
---
 gfx/common/vulkan_common.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/gfx/common/vulkan_common.c b/gfx/common/vulkan_common.c
index 26018966835..ed79716a22b 100644
--- a/gfx/common/vulkan_common.c
+++ b/gfx/common/vulkan_common.c
@@ -2498,20 +2498,6 @@ bool vulkan_context_init(gfx_ctx_vulkan_data_t *vk,
          int use_mab                      = settings->bools.video_use_metal_arg_buffers;
          setenv("MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS", use_mab ? "1" : "0", 1);
       }
-      /* MoltenVK 1.3.0 flipped MVK_CONFIG_USE_MTLHEAP to active by default
-       * for all non-AMD GPUs, so VkDeviceMemory now allocates from a
-       * placement MTLHeap.  Placement heaps require macOS 13+ and a GPU
-       * that reports supportsPlacementHeaps; older Intel Macs (Mac GPU
-       * family 2 on macOS 12 and earlier) abort at swapchain-image
-       * allocation time with:
-       *   -[MTLHeapDescriptorInternal validateWithDevice:]:
-       *     failed assertion `Placement heap type is not supported.'
-       * Force the path off here so the assertion can never fire.  The
-       * value "0" is interpreted as boolean-false by 1.2.x and as the
-       * MVK_CONFIG_USE_MTLHEAP_NEVER enum value by 1.3.x, so it is
-       * forward- and backward-compatible.  See libretro/RetroArch#18985. */
-      if (!getenv("MVK_CONFIG_USE_MTLHEAP"))
-         setenv("MVK_CONFIG_USE_MTLHEAP", "0", 1);
       /* Try Vulkan loader first (enables validation layers if installed).
        * Falls back to MoltenVK directly if loader not available. */
       vulkan_library = dylib_load("libvulkan.dylib");

From 32815722799bd206f26eca1dec07ba6bae78505c Mon Sep 17 00:00:00 2001
From: "U-DESKTOP-SPFP6AQ\\twistedtechre" <reallibretroretroarch@gmail.com>
Date: Wed, 29 Apr 2026 04:05:09 +0200
Subject: [PATCH 7/8] tpool: fix tpool_wait returning before queued work has
 started

The non-stopping branch of tpool_wait's predicate checked only
working_cnt, which is the number of jobs currently being executed
by a worker.  working_cnt is incremented when a worker dequeues a
job, not when a producer enqueues one, so:

    tp = tpool_create(N);
    for (i = 0; i < M; i++)
        tpool_add_work(tp, job, ctx);
    tpool_wait(tp);     /* could return immediately */
    /* read ctx state here, expecting all M jobs to have run */

was free to return before any worker had picked up the first job.
Producers that outpaced the workers' first dequeue would observe
zero or partial completion.

The worker's completion-side signal already covers the correct
condition (tpool.c:139 signals working_cond only when both
working_cnt == 0 and work_first == NULL), so the fix is just to
widen the wait predicate to match: also wait while work_first is
non-NULL.

The only in-tree caller is cores/libretro-ffmpeg/ffmpeg_core.c,
which uses tpool_wait to drain the pool before clearing the video
buffer.  All three call sites benefit from the corrected
semantics; none rely on the previous early-return behaviour.

Adds a regression test under
libretro-common/samples/rthreads/tpool_wait_test/ covering:
 - tpool_add_work then tpool_wait drains all jobs
 - tpool_create(0) defaults to a working pool
 - tpool_create / tpool_destroy roundtrip is heap-clean
 - tpool_destroy with queued-but-unrun work is heap-clean
   (tpool_destroy is documented to discard such work)

Hooks the new sample into the Linux libretro-common samples
workflow.  Built and run under SANITIZER=address,undefined like
the other samples; completes in under one second on a github-
hosted runner.

Verified the test catches the bug: against unpatched tpool.c,
test_work_executes_once and test_zero_threads_default fail with
counter=0 under ASan + UBSan.
---
 .../Linux-libretro-common-samples.yml         |   1 +
 libretro-common/rthreads/tpool.c              |  11 +-
 .../samples/rthreads/tpool_wait_test/Makefile |  39 +++
 .../tpool_wait_test/tpool_wait_test.c         | 311 ++++++++++++++++++
 4 files changed, 360 insertions(+), 2 deletions(-)
 create mode 100644 libretro-common/samples/rthreads/tpool_wait_test/Makefile
 create mode 100644 libretro-common/samples/rthreads/tpool_wait_test/tpool_wait_test.c

diff --git a/.github/workflows/Linux-libretro-common-samples.yml b/.github/workflows/Linux-libretro-common-samples.yml
index e75f4c841e0..07e4fcddd06 100644
--- a/.github/workflows/Linux-libretro-common-samples.yml
+++ b/.github/workflows/Linux-libretro-common-samples.yml
@@ -78,6 +78,7 @@ jobs:
             rpng_roundtrip_test
             word_wrap_overflow_test
             task_queue_title_error_test
+            tpool_wait_test
           )
 
           # Per-binary run command (overrides ./<binary> if present).
diff --git a/libretro-common/rthreads/tpool.c b/libretro-common/rthreads/tpool.c
index 184934b2504..53407e38567 100644
--- a/libretro-common/rthreads/tpool.c
+++ b/libretro-common/rthreads/tpool.c
@@ -281,8 +281,15 @@ void tpool_wait(tpool_t *tp)
    {
       /* working_cond is dual use. It signals when we're not stopping but the
        * working_cnt is 0 indicating there isn't any work processing. If we
-       * are stopping it will trigger when there aren't any threads running. */
-      if ((!tp->stop && tp->working_cnt != 0) || (tp->stop && tp->thread_cnt != 0))
+       * are stopping it will trigger when there aren't any threads running.
+       *
+       * The non-stopping branch must also wait while work_first is non-NULL:
+       * a tpool_wait racing tpool_add_work would otherwise return before any
+       * worker has dequeued the just-added work (working_cnt still 0).  The
+       * worker's completion path signals working_cond only when both
+       * working_cnt == 0 and work_first == NULL, so this predicate matches
+       * the signal exactly. */
+      if ((!tp->stop && (tp->work_first || tp->working_cnt != 0)) || (tp->stop && tp->thread_cnt != 0))
          scond_wait(tp->working_cond, tp->work_mutex);
       else
          break;
diff --git a/libretro-common/samples/rthreads/tpool_wait_test/Makefile b/libretro-common/samples/rthreads/tpool_wait_test/Makefile
new file mode 100644
index 00000000000..6f1081e0e2b
--- /dev/null
+++ b/libretro-common/samples/rthreads/tpool_wait_test/Makefile
@@ -0,0 +1,39 @@
+TARGET := tpool_wait_test
+
+LIBRETRO_COMM_DIR := ../../..
+
+# tpool.c depends on rthreads.c.  Both are self-contained at the
+# libretro-common layer (rthreads.c is the platform abstraction over
+# pthreads / Win32 / etc.) so we just compile them in directly --
+# no other libretro-common sources are needed.
+SOURCES := \
+	tpool_wait_test.c \
+	$(LIBRETRO_COMM_DIR)/rthreads/tpool.c \
+	$(LIBRETRO_COMM_DIR)/rthreads/rthreads.c
+
+OBJS := $(SOURCES:.c=.o)
+
+CFLAGS  += -Wall -pedantic -std=gnu99 -g -O0 -I$(LIBRETRO_COMM_DIR)/include
+LDFLAGS += -lpthread
+
+# rthreads.c uses clock_gettime + CLOCK_REALTIME on Linux glibc; on
+# older glibc those live in -lrt.  Harmless on newer glibc.
+LDFLAGS += -lrt
+
+ifneq ($(SANITIZER),)
+   CFLAGS  := -fsanitize=$(SANITIZER) -fno-omit-frame-pointer $(CFLAGS)
+   LDFLAGS := -fsanitize=$(SANITIZER) $(LDFLAGS)
+endif
+
+all: $(TARGET)
+
+%.o: %.c
+	$(CC) -c -o $@ $< $(CFLAGS)
+
+$(TARGET): $(OBJS)
+	$(CC) -o $@ $^ $(LDFLAGS)
+
+clean:
+	rm -f $(TARGET) $(OBJS)
+
+.PHONY: clean
diff --git a/libretro-common/samples/rthreads/tpool_wait_test/tpool_wait_test.c b/libretro-common/samples/rthreads/tpool_wait_test/tpool_wait_test.c
new file mode 100644
index 00000000000..245d9f4bb40
--- /dev/null
+++ b/libretro-common/samples/rthreads/tpool_wait_test/tpool_wait_test.c
@@ -0,0 +1,311 @@
+/* Regression test for the tpool_wait predicate in
+ * libretro-common/rthreads/tpool.c.
+ *
+ * Background
+ * ----------
+ * tpool_wait is the public "wait until all queued work has been
+ * processed" entry point.  Pre-fix, its predicate was:
+ *
+ *     (!tp->stop && tp->working_cnt != 0) ||
+ *     (tp->stop && tp->thread_cnt != 0)
+ *
+ * That is, in the non-stopping case it waited only on working_cnt.
+ * working_cnt is the number of jobs currently being executed by a
+ * worker -- it is incremented when a worker pops a job off the
+ * queue, not when a producer pushes one on.
+ *
+ * So the following sequence:
+ *
+ *     tp = tpool_create(N);
+ *     for (i = 0; i < M; i++) tpool_add_work(tp, job, ctx);
+ *     tpool_wait(tp);    // <-- can return immediately
+ *     // ctx state read here, expecting all M jobs to have run
+ *
+ * could return from tpool_wait before any worker had picked up the
+ * first job: working_cnt was 0 because no worker had yet dequeued.
+ * Callers that read shared state after tpool_wait would observe
+ * zero or partial completion.
+ *
+ * Worker code already signals working_cond only when both
+ * working_cnt == 0 AND work_first == NULL (tpool.c:139), so the
+ * fix is to widen the wait predicate to match: also wait while
+ * work_first is non-NULL.
+ *
+ * What this test asserts
+ * ----------------------
+ * 1. After a sequence of tpool_add_work followed by tpool_wait,
+ *    every queued job has run.  Verified by an under-lock counter.
+ * 2. tpool_create(0) defaults to a working pool (documented in
+ *    tpool.h) that completes a posted job before tpool_wait
+ *    returns.
+ * 3. tpool_create / tpool_destroy round-trips cleanly across many
+ *    iterations (heap consistency checked by ASan/UBSan/LSan in
+ *    the workflow).
+ * 4. tpool_destroy on a pool with queued-but-not-yet-run work does
+ *    not crash and does not corrupt the heap.  tpool_destroy is
+ *    documented to discard outstanding queued work, so we do NOT
+ *    assert on the counter -- we only verify clean teardown.
+ *
+ * What this test does NOT assert
+ * ------------------------------
+ * It does not exercise tpool_wait followed by further
+ * tpool_add_work, since that is not a documented use pattern.  It
+ * does not exercise the single-threaded fallback (HAVE_THREADS
+ * off); tpool.c requires threads.
+ *
+ * How the regression is caught
+ * ----------------------------
+ * Without the fix, test_work_executes_once and
+ * test_zero_threads_default both fail observably: the counter is
+ * less than the expected job count (typically zero, since the
+ * producer outpaces the workers' first dequeue).  The test prints
+ * a clear FAIL line and exits non-zero so the CI workflow flags
+ * it.  Built under ASan + UBSan (the workflow default), any
+ * collateral heap or UB issue is also caught.
+ *
+ * The test is bounded: tight loops are sized to run inside the
+ * workflow's per-binary 60-second timeout on a github-hosted
+ * runner.  Wall-clock under ASan + UBSan is under one second.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <rthreads/rthreads.h>
+#include <rthreads/tpool.h>
+
+#define POOL_THREADS         4
+#define WORK_JOBS         1000
+#define ROUNDTRIP_ITERS   2000
+#define STRESS_CYCLES      200
+#define STRESS_JOBS         32
+
+struct work_ctx
+{
+   slock_t *lock;
+   int      counter;
+};
+
+static void inc_job(void *arg)
+{
+   struct work_ctx *ctx = (struct work_ctx *)arg;
+   slock_lock(ctx->lock);
+   ctx->counter++;
+   slock_unlock(ctx->lock);
+}
+
+/* -----------------------------------------------------------------
+ * Test 1: tpool_wait correctly drains the queue.
+ *
+ * This is the regression case.  Before the predicate fix, the
+ * counter would commonly read 0 here: working_cnt was still 0 at
+ * the moment tpool_wait was entered (no worker had yet dequeued
+ * any of the just-pushed work) and the old predicate returned
+ * immediately.
+ * ----------------------------------------------------------------- */
+static int test_work_executes_once(void)
+{
+   tpool_t        *tp;
+   int             i;
+   struct work_ctx ctx;
+   int             rc = 0;
+
+   ctx.counter = 0;
+   ctx.lock    = slock_new();
+   if (!ctx.lock)
+   {
+      printf("[FAIL] test_work_executes_once: slock_new failed\n");
+      return 1;
+   }
+
+   tp = tpool_create(POOL_THREADS);
+   if (!tp)
+   {
+      printf("[FAIL] test_work_executes_once: tpool_create returned NULL\n");
+      slock_free(ctx.lock);
+      return 1;
+   }
+
+   for (i = 0; i < WORK_JOBS; i++)
+   {
+      if (!tpool_add_work(tp, inc_job, &ctx))
+      {
+         printf("[FAIL] test_work_executes_once: tpool_add_work failed at i=%d\n", i);
+         rc = 1;
+         break;
+      }
+   }
+
+   tpool_wait(tp);
+   tpool_destroy(tp);
+
+   if (!rc)
+   {
+      slock_lock(ctx.lock);
+      if (ctx.counter != WORK_JOBS)
+      {
+         printf("[FAIL] test_work_executes_once: counter=%d expected=%d\n",
+               ctx.counter, WORK_JOBS);
+         rc = 1;
+      }
+      else
+         printf("[PASS] test_work_executes_once (%d jobs across %d threads)\n",
+               WORK_JOBS, POOL_THREADS);
+      slock_unlock(ctx.lock);
+   }
+
+   slock_free(ctx.lock);
+   return rc;
+}
+
+/* -----------------------------------------------------------------
+ * Test 2: tpool_create(0) gives a working default pool.
+ *
+ * tpool.h documents num=0 as defaulting to 2 threads.  Smoke-test
+ * that this path produces a usable pool that runs a single job to
+ * completion before tpool_wait returns.
+ * ----------------------------------------------------------------- */
+static int test_zero_threads_default(void)
+{
+   tpool_t        *tp;
+   struct work_ctx ctx;
+   int             rc = 0;
+
+   ctx.counter = 0;
+   ctx.lock    = slock_new();
+   if (!ctx.lock)
+   {
+      printf("[FAIL] test_zero_threads_default: slock_new failed\n");
+      return 1;
+   }
+
+   tp = tpool_create(0);
+   if (!tp)
+   {
+      printf("[FAIL] test_zero_threads_default: tpool_create(0) returned NULL\n");
+      slock_free(ctx.lock);
+      return 1;
+   }
+
+   if (!tpool_add_work(tp, inc_job, &ctx))
+   {
+      printf("[FAIL] test_zero_threads_default: tpool_add_work failed\n");
+      rc = 1;
+   }
+
+   tpool_wait(tp);
+   tpool_destroy(tp);
+
+   if (!rc)
+   {
+      slock_lock(ctx.lock);
+      if (ctx.counter != 1)
+      {
+         printf("[FAIL] test_zero_threads_default: counter=%d expected=1\n",
+               ctx.counter);
+         rc = 1;
+      }
+      else
+         printf("[PASS] test_zero_threads_default\n");
+      slock_unlock(ctx.lock);
+   }
+
+   slock_free(ctx.lock);
+   return rc;
+}
+
+/* -----------------------------------------------------------------
+ * Test 3: create/destroy round-trip with no work.
+ *
+ * Heap consistency check.  Workers transition straight from their
+ * initial scond_wait to the stop branch; on the workflow runner
+ * with ASan+UBSan, any heap-buffer-overflow / use-after-free /
+ * undefined behaviour during teardown surfaces here.
+ * ----------------------------------------------------------------- */
+static int test_create_destroy_no_work(void)
+{
+   int i;
+   for (i = 0; i < ROUNDTRIP_ITERS; i++)
+   {
+      tpool_t *tp = tpool_create(POOL_THREADS);
+      if (!tp)
+      {
+         printf("[FAIL] test_create_destroy_no_work: tpool_create returned NULL at i=%d\n", i);
+         return 1;
+      }
+      tpool_destroy(tp);
+   }
+   printf("[PASS] test_create_destroy_no_work (%d iterations x %d threads)\n",
+         ROUNDTRIP_ITERS, POOL_THREADS);
+   return 0;
+}
+
+/* -----------------------------------------------------------------
+ * Test 4: stress -- create / push some work / destroy without
+ * waiting.
+ *
+ * tpool_destroy is documented to discard outstanding queued work,
+ * so the counter is non-deterministic and we don't check it.
+ * What we do check is that the destroyer terminates and the heap
+ * stays consistent across many fast cycles -- ASan/UBSan/LSan
+ * carry the verification.  This case was the one I originally
+ * (incorrectly) flagged as a UAF in the audit; the real situation
+ * is that scond_wait re-acquires the mutex before the destroyer
+ * can free it, so the original code is heap-safe here.  Keeping
+ * the test in place as a guard against any future regression that
+ * would actually break that invariant.
+ * ----------------------------------------------------------------- */
+static int test_stress_destroy_with_pending(void)
+{
+   int              i;
+   int              j;
+   struct work_ctx  ctx;
+
+   ctx.counter = 0;
+   ctx.lock    = slock_new();
+   if (!ctx.lock)
+   {
+      printf("[FAIL] test_stress_destroy_with_pending: slock_new failed\n");
+      return 1;
+   }
+
+   for (i = 0; i < STRESS_CYCLES; i++)
+   {
+      tpool_t *tp = tpool_create(POOL_THREADS);
+      if (!tp)
+      {
+         printf("[FAIL] test_stress_destroy_with_pending: tpool_create returned NULL at i=%d\n", i);
+         slock_free(ctx.lock);
+         return 1;
+      }
+      for (j = 0; j < STRESS_JOBS; j++)
+         tpool_add_work(tp, inc_job, &ctx);
+      /* Deliberately no tpool_wait here. */
+      tpool_destroy(tp);
+   }
+
+   printf("[PASS] test_stress_destroy_with_pending (%d cycles x %d jobs, ran=%d)\n",
+         STRESS_CYCLES, STRESS_JOBS, ctx.counter);
+
+   slock_free(ctx.lock);
+   return 0;
+}
+
+int main(void)
+{
+   int failures = 0;
+
+   failures += test_work_executes_once();
+   failures += test_zero_threads_default();
+   failures += test_create_destroy_no_work();
+   failures += test_stress_destroy_with_pending();
+
+   if (failures)
+   {
+      printf("\n%d tpool_wait regression test(s) failed\n", failures);
+      return 1;
+   }
+   printf("\nAll tpool_wait regression tests passed.\n");
+   return 0;
+}

From 1a1396546e8595886f5afb01f66f57cde7a5f454 Mon Sep 17 00:00:00 2001
From: "U-DESKTOP-SPFP6AQ\\twistedtechre" <reallibretroretroarch@gmail.com>
Date: Wed, 29 Apr 2026 04:52:07 +0200
Subject: [PATCH 8/8] libretro-common/queues: bound fifo_write/read + reject
 SIZE_MAX init

Audit-pass on libretro-common/queues/fifo_queue.c found three
defensive gaps.  Each is currently latent (audio + gfx widget
callers all gate on FIFO_WRITE_AVAIL / FIFO_READ_AVAIL before
calling fifo_write / fifo_read) but the function trusts @len
unconditionally, so any future caller that forgets the gate
silently corrupts memory rather than dropping the request.

* fifo_write: out-of-bounds write when @len exceeds available
  space (libretro-common/queues/fifo_queue.c).  The wrap-around
  branch computes rest_write = len - first_write and memcpy()s
  rest_write bytes starting at buffer->buffer; for any @len
  exceeding @buffer->size, that second memcpy walks off the end
  of the backing allocation.  Worse, the gating test
  `buffer->end + len > buffer->size` itself wraps in size_t for
  huge @len, mis-routing the call into a single-memcpy branch
  with @len bytes of unbounded write.  Fix by capping @len at
  FIFO_WRITE_AVAIL(buffer) on entry.  No-op for current callers
  (they already cap); turns the latent overrun into a defined
  silent truncation for any future caller that forgets to gate.

* fifo_read: same out-of-bounds shape on the read side --
  rest_read bytes copied past the end of @buffer->buffer when
  @len exceeds the readable range.  Same fix: cap @len at
  FIFO_READ_AVAIL(buffer) on entry.

* fifo_initialize_internal: SIZE_MAX wrap (libretro-common/
  queues/fifo_queue.c).  The function reserves one ring slot
  for the empty/full distinction, so the actual allocation is
  (len + 1) bytes.  Passing SIZE_MAX wraps that addition to 0;
  calloc(1, 0) is allowed to satisfy with a non-NULL pointer
  to a zero-byte allocation, so initialisation can succeed and
  leave buf->size == 0.  The next fifo_write would then divide
  by zero on the `% buffer->size` end-pointer update.  No
  current caller asks for SIZE_MAX, so the rejection is purely
  defensive.

Also documents the new caps in the public header (fifo_write /
fifo_read) so callers know that exceeding available space is a
silent drop, not a silent overrun.

Adds a regression test under libretro-common/samples/queues/
fifo_queue_bounds_test/.  The test exercises:

  - SIZE_MAX initialisation (must be rejected),
  - over-write of 2048 bytes into a 100-byte ring (capped at 100,
    no OOB write),
  - over-read into a 64-byte buffer (capped at FIFO_READ_AVAIL,
    trailing bytes untouched),
  - SIZE_MAX @len passed to fifo_write with end != 0 (the
    integer-overflow case in the original gating test),
  - wrap-around correctness when the cap isn't engaged,
  - zero-length write/read no-ops.

Wired into the existing libretro-common-samples CI workflow with
SANITIZER=address,undefined.  Pre-patch the test fails: the
SIZE_MAX init succeeds (no rejection), and the over-write trips
ASan with a 1947-byte heap-buffer-overflow WRITE.  Post-patch
all seven cases pass under ASan + UBSan + LSan.

Note on residual scope
----------------------
The cap protects @buffer->buffer (the destination ring) but not
@in_buf (the caller-supplied source).  fifo_write reads exactly
@len bytes from @in_buf -- a buggy caller that passes
@len > sizeof(*in_buf) gets a source over-read that we cannot
detect from inside the function (no @in_buf size parameter).
That residual is unchanged by this commit; the surface area for
it is much smaller than the destination overrun (audio drivers
all pass full-size frame buffers).  Any future hardening would
require an API break to add @in_buf_len, which is out of scope.
---
 .../Linux-libretro-common-samples.yml         |   1 +
 libretro-common/include/queues/fifo_queue.h   |  12 +
 libretro-common/queues/fifo_queue.c           |  57 +++-
 .../queues/fifo_queue_bounds_test/Makefile    |  29 ++
 .../fifo_queue_bounds_test.c                  | 278 ++++++++++++++++++
 5 files changed, 374 insertions(+), 3 deletions(-)
 create mode 100644 libretro-common/samples/queues/fifo_queue_bounds_test/Makefile
 create mode 100644 libretro-common/samples/queues/fifo_queue_bounds_test/fifo_queue_bounds_test.c

diff --git a/.github/workflows/Linux-libretro-common-samples.yml b/.github/workflows/Linux-libretro-common-samples.yml
index 07e4fcddd06..9046f1438e4 100644
--- a/.github/workflows/Linux-libretro-common-samples.yml
+++ b/.github/workflows/Linux-libretro-common-samples.yml
@@ -79,6 +79,7 @@ jobs:
             word_wrap_overflow_test
             task_queue_title_error_test
             tpool_wait_test
+            fifo_queue_bounds_test
           )
 
           # Per-binary run command (overrides ./<binary> if present).
diff --git a/libretro-common/include/queues/fifo_queue.h b/libretro-common/include/queues/fifo_queue.h
index 4879c3e96e1..bdd397420aa 100644
--- a/libretro-common/include/queues/fifo_queue.h
+++ b/libretro-common/include/queues/fifo_queue.h
@@ -126,6 +126,12 @@ static INLINE void fifo_clear(fifo_buffer_t *buffer)
 /**
  * Writes \c size bytes to the given queue.
  *
+ * \c size is silently capped at \c FIFO_WRITE_AVAIL(buffer) --
+ * the call writes at most that many bytes and discards any
+ * excess.  Callers that need to be sure all bytes are queued
+ * must gate on \c FIFO_WRITE_AVAIL beforehand.  Behaviour is
+ * undefined if \c buffer is \c NULL.
+ *
  * @param buffer The FIFO queue to write to.
  * @param in_buf The buffer to read bytes from.
  * @param size The length of \c in_buf, in bytes.
@@ -135,6 +141,12 @@ void fifo_write(fifo_buffer_t *buffer, const void *in_buf, size_t len);
 /**
  * Reads \c size bytes from the given queue.
  *
+ * \c size is silently capped at \c FIFO_READ_AVAIL(buffer) --
+ * the call returns at most that many bytes and leaves the
+ * trailing portion of \c in_buf untouched.  Callers that need
+ * exactly \c size bytes must gate on \c FIFO_READ_AVAIL
+ * beforehand.  Behaviour is undefined if \c buffer is \c NULL.
+ *
  * @param buffer The FIFO queue to read from.
  * @param in_buf The buffer to store the read bytes in.
  * @param size The length of \c in_buf, in bytes.
diff --git a/libretro-common/queues/fifo_queue.c b/libretro-common/queues/fifo_queue.c
index b05435addd8..0810c422265 100644
--- a/libretro-common/queues/fifo_queue.c
+++ b/libretro-common/queues/fifo_queue.c
@@ -31,7 +31,21 @@
 
 static bool fifo_initialize_internal(fifo_buffer_t *buf, size_t len)
 {
-   uint8_t *buffer    = (uint8_t*)calloc(1, len + 1);
+   uint8_t *buffer;
+
+   /* The ring reserves one slot to distinguish empty from full,
+    * so the actual allocation is (len + 1) bytes.  Reject @len
+    * values that would wrap that addition: SIZE_MAX would
+    * compute (size_t)0, which calloc(1, 0) is allowed to satisfy
+    * with a non-NULL pointer to a zero-byte allocation.  Letting
+    * that succeed would leave buf->size == 0 and the next
+    * fifo_write would divide by zero at the `% buffer->size`
+    * step.  No current caller asks for SIZE_MAX, so the rejection
+    * is purely defensive. */
+   if (len >= SIZE_MAX)
+      return false;
+
+   buffer = (uint8_t*)calloc(1, len + 1);
 
    if (!buffer)
       return false;
@@ -91,8 +105,31 @@ fifo_buffer_t *fifo_new(size_t len)
 
 void fifo_write(fifo_buffer_t *buffer, const void *in_buf, size_t len)
 {
-   size_t first_write = len;
+   size_t first_write;
    size_t rest_write  = 0;
+   size_t avail;
+
+   /* Cap @len at the available space.  Existing callers all
+    * gate on FIFO_WRITE_AVAIL before invoking us, so this is
+    * a no-op for them; for any caller that doesn't, the
+    * unbounded branch below would walk off the end of
+    * @buffer->buffer (the wrap-around copy at line `memcpy(
+    * buffer->buffer, ..., rest_write)` would write up to
+    * len - first_write bytes into a buffer of @buffer->size
+    * total, overrunning by len - size).  Worse, the original
+    * `buffer->end + len > buffer->size` test wraps in size_t
+    * for huge @len and silently misclassifies the request as
+    * "fits in one chunk", taking the corrupting first memcpy
+    * down a path with no wrap-around bound at all.  Capping
+    * here closes both windows. */
+   avail = FIFO_WRITE_AVAIL(buffer);
+   if (len > avail)
+      len = avail;
+
+   if (!len)
+      return;
+
+   first_write = len;
 
    if (buffer->end + len > buffer->size)
    {
@@ -109,8 +146,22 @@ void fifo_write(fifo_buffer_t *buffer, const void *in_buf, size_t len)
 
 void fifo_read(fifo_buffer_t *buffer, void *in_buf, size_t len)
 {
-   size_t first_read = len;
+   size_t first_read;
    size_t rest_read  = 0;
+   size_t avail;
+
+   /* Same rationale as fifo_write: cap @len at what's actually
+    * available to avoid out-of-buffer copies on a caller that
+    * forgot to gate on FIFO_READ_AVAIL.  Existing callers all
+    * gate first; this is defensive. */
+   avail = FIFO_READ_AVAIL(buffer);
+   if (len > avail)
+      len = avail;
+
+   if (!len)
+      return;
+
+   first_read = len;
 
    if (buffer->first + len > buffer->size)
    {
diff --git a/libretro-common/samples/queues/fifo_queue_bounds_test/Makefile b/libretro-common/samples/queues/fifo_queue_bounds_test/Makefile
new file mode 100644
index 00000000000..ee8146745f6
--- /dev/null
+++ b/libretro-common/samples/queues/fifo_queue_bounds_test/Makefile
@@ -0,0 +1,29 @@
+TARGET := fifo_queue_bounds_test
+
+LIBRETRO_COMM_DIR := ../../..
+
+SOURCES := \
+	fifo_queue_bounds_test.c \
+	$(LIBRETRO_COMM_DIR)/queues/fifo_queue.c
+
+OBJS := $(SOURCES:.c=.o)
+
+CFLAGS += -Wall -pedantic -std=gnu99 -g -O0 -I$(LIBRETRO_COMM_DIR)/include
+
+ifneq ($(SANITIZER),)
+   CFLAGS  := -fsanitize=$(SANITIZER) -fno-omit-frame-pointer $(CFLAGS)
+   LDFLAGS := -fsanitize=$(SANITIZER) $(LDFLAGS)
+endif
+
+all: $(TARGET)
+
+%.o: %.c
+	$(CC) -c -o $@ $< $(CFLAGS)
+
+$(TARGET): $(OBJS)
+	$(CC) -o $@ $^ $(LDFLAGS)
+
+clean:
+	rm -f $(TARGET) $(OBJS)
+
+.PHONY: clean
diff --git a/libretro-common/samples/queues/fifo_queue_bounds_test/fifo_queue_bounds_test.c b/libretro-common/samples/queues/fifo_queue_bounds_test/fifo_queue_bounds_test.c
new file mode 100644
index 00000000000..37c76b959b8
--- /dev/null
+++ b/libretro-common/samples/queues/fifo_queue_bounds_test/fifo_queue_bounds_test.c
@@ -0,0 +1,278 @@
+/* Regression test for the fifo_queue bounds checks added in
+ * libretro-common/queues/fifo_queue.c.
+ *
+ * Background
+ * ----------
+ * fifo_write / fifo_read previously trusted @len blindly: passing
+ * len > FIFO_WRITE_AVAIL would walk off the end of the ring's
+ * backing buffer (the wrap-around copy `memcpy(buffer->buffer,
+ * src + first_write, rest_write)` writes rest_write bytes into a
+ * size-byte buffer, overrunning by len - size).  Worse, the
+ * `buffer->end + len > buffer->size` check itself wraps in size_t
+ * for huge @len, mis-routing the caller down a single-memcpy
+ * branch with no wrap-around bound at all.  fifo_initialize
+ * accepted len == SIZE_MAX, which made `len + 1` wrap to 0, so
+ * calloc(1, 0) might return a non-NULL zero-byte buffer and
+ * subsequent fifo_write would `% 0` (division by zero) on the
+ * end-pointer update.
+ *
+ * What this test asserts
+ * ----------------------
+ * 1. fifo_initialize rejects SIZE_MAX (no wrap to zero-byte buf).
+ * 2. fifo_write caps @len at FIFO_WRITE_AVAIL: writing more than
+ *    available drops the excess silently rather than overrunning
+ *    the backing buffer.  ASan/LSan-clean.
+ * 3. fifo_read caps @len at FIFO_READ_AVAIL: reading more than
+ *    available leaves the trailing portion of @in_buf untouched.
+ * 4. The cap survives integer-overflow attempts on @len (very
+ *    large @len that would wrap (end + len) to a small value
+ *    in size_t arithmetic, which the original code mis-routed).
+ * 5. Wrap-around writes/reads still work correctly when the cap
+ *    isn't engaged.
+ *
+ * Build under -fsanitize=address,undefined to catch any future
+ * regression that re-introduces the OOB write or the SIZE_MAX
+ * wrap.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+#include <queues/fifo_queue.h>
+
+static int failures = 0;
+
+#define EXPECT(cond, fmt, ...) do {                              \
+   if (!(cond)) {                                                \
+      fprintf(stderr, "[FAIL] %s:%d: " fmt "\n",                 \
+            __func__, __LINE__, ##__VA_ARGS__);                  \
+      failures++;                                                \
+   }                                                             \
+} while (0)
+
+/* Test 1: SIZE_MAX is rejected.  Without the guard, len + 1
+ * would wrap to 0 and the buffer would be unusable. */
+static void test_initialize_size_max(void)
+{
+   fifo_buffer_t buf;
+
+   EXPECT(!fifo_initialize(&buf, SIZE_MAX),
+         "SIZE_MAX should be rejected (would wrap len + 1)");
+   /* If it incorrectly succeeded we'd leak; we asserted failure
+    * so no buffer was allocated. */
+   printf("[PASS] initialize_size_max\n");
+}
+
+/* Test 2: Normal init still works. */
+static void test_initialize_normal(void)
+{
+   fifo_buffer_t buf;
+
+   EXPECT(fifo_initialize(&buf, 256), "normal init should succeed");
+   /* size is len + 1 (one slot reserved for empty/full) */
+   /* Available bytes for writing == len */
+   EXPECT(FIFO_WRITE_AVAIL(&buf) == 256,
+         "fresh buffer should have 256 bytes available, got %zu",
+         FIFO_WRITE_AVAIL(&buf));
+   EXPECT(FIFO_READ_AVAIL(&buf) == 0,
+         "fresh buffer should have nothing to read, got %zu",
+         FIFO_READ_AVAIL(&buf));
+   fifo_deinitialize(&buf);
+   printf("[PASS] initialize_normal\n");
+}
+
+/* Test 3: write cap.  Pass more than available; the overrun
+ * should be silently truncated rather than corrupting memory.
+ * If ASan is enabled, an OOB write would trip it. */
+static void test_write_capped(void)
+{
+   fifo_buffer_t buf;
+   uint8_t       payload[2048];
+   size_t        i;
+
+   for (i = 0; i < sizeof(payload); i++)
+      payload[i] = (uint8_t)(i & 0xff);
+
+   EXPECT(fifo_initialize(&buf, 100), "init");
+   EXPECT(FIFO_WRITE_AVAIL(&buf) == 100, "100 avail");
+
+   /* Try to write 2048 into a 100-byte ring. */
+   fifo_write(&buf, payload, sizeof(payload));
+
+   EXPECT(FIFO_READ_AVAIL(&buf) == 100,
+         "after over-write, read avail should be 100, got %zu",
+         FIFO_READ_AVAIL(&buf));
+   EXPECT(FIFO_WRITE_AVAIL(&buf) == 0,
+         "after over-write, write avail should be 0, got %zu",
+         FIFO_WRITE_AVAIL(&buf));
+
+   fifo_deinitialize(&buf);
+   printf("[PASS] write_capped\n");
+}
+
+/* Test 4: read cap.  Try to read more than available; the
+ * over-read should be capped at FIFO_READ_AVAIL and the trailing
+ * portion of the destination buffer should remain untouched. */
+static void test_read_capped(void)
+{
+   fifo_buffer_t buf;
+   const char   *msg = "hello";
+   uint8_t       out[64];
+
+   EXPECT(fifo_initialize(&buf, 256), "init");
+   fifo_write(&buf, msg, 5);
+   EXPECT(FIFO_READ_AVAIL(&buf) == 5, "5 readable");
+
+   memset(out, 0xaa, sizeof(out));
+   /* Ask for more than available. */
+   fifo_read(&buf, out, 20);
+
+   EXPECT(memcmp(out, msg, 5) == 0,
+         "first 5 bytes should be 'hello'");
+   /* The cap means only 5 were actually written into out; the
+    * rest stays at the 0xaa sentinel.  This is the documented
+    * post-cap behaviour. */
+   EXPECT(out[5] == 0xaa,
+         "byte after capped read should be untouched (was 0x%02x)",
+         out[5]);
+   EXPECT(out[19] == 0xaa,
+         "trailing bytes should be untouched (was 0x%02x)",
+         out[19]);
+
+   EXPECT(FIFO_READ_AVAIL(&buf) == 0,
+         "after capped read, read avail should be 0, got %zu",
+         FIFO_READ_AVAIL(&buf));
+
+   fifo_deinitialize(&buf);
+   printf("[PASS] read_capped\n");
+}
+
+/* Test 5: huge @len that would have wrapped (end + len) in
+ * size_t.  The original code's `buffer->end + len > buffer->size`
+ * misclassifies this as fitting in one chunk, taking the
+ * single-memcpy path with len bytes of OOB write into the ring.
+ * The cap reduces len to FIFO_WRITE_AVAIL before any memcpy.
+ *
+ * Note: fifo_write reads exactly @len bytes from @in_buf -- the
+ * cap only protects the destination ring, not the source buffer.
+ * Callers must always supply a source buffer of at least @len
+ * bytes (or now, after the cap, at least FIFO_WRITE_AVAIL bytes).
+ * For this test we therefore use a source buffer big enough to
+ * cover the post-cap copy (which will be 99 bytes here). */
+static void test_write_size_max_len(void)
+{
+   fifo_buffer_t buf;
+   uint8_t       byte = 0x42;
+   uint8_t       big_src[256];
+
+   memset(big_src, 0xcd, sizeof(big_src));
+
+   EXPECT(fifo_initialize(&buf, 100), "init");
+   /* Set end != 0 so end + SIZE_MAX would wrap to a small value: */
+   fifo_write(&buf, &byte, 1);
+   /* Now end == 1.  Pass SIZE_MAX as len; without the cap, the
+    * (end + len) addition wraps to 0 (for end=1, len=SIZE_MAX),
+    * the comparison "> size" is false, and the function would
+    * memcpy SIZE_MAX bytes from big_src into buffer->buffer + 1
+    * -- a destination overrun of essentially the entire address
+    * space.  With the cap, len becomes FIFO_WRITE_AVAIL = 99
+    * and the write completes safely. */
+   fifo_write(&buf, big_src, SIZE_MAX);
+
+   /* Buffer should be full now. */
+   EXPECT(FIFO_WRITE_AVAIL(&buf) == 0,
+         "after SIZE_MAX write, write avail should be 0, got %zu",
+         FIFO_WRITE_AVAIL(&buf));
+   EXPECT(FIFO_READ_AVAIL(&buf) == 100,
+         "after SIZE_MAX write, read avail should be 100, got %zu",
+         FIFO_READ_AVAIL(&buf));
+
+   fifo_deinitialize(&buf);
+   printf("[PASS] write_size_max_len\n");
+}
+
+/* Test 6: wrap-around writes still work when not engaging the
+ * cap.  Write to fill, read half, write half: the wrap-around
+ * branch in fifo_write should produce the right contents. */
+static void test_wrap_around(void)
+{
+   fifo_buffer_t buf;
+   uint8_t       in[10] = {0,1,2,3,4,5,6,7,8,9};
+   uint8_t       out[10];
+
+   EXPECT(fifo_initialize(&buf, 10), "init"); /* 10 usable */
+
+   /* Fill it. */
+   fifo_write(&buf, in, 10);
+   EXPECT(FIFO_WRITE_AVAIL(&buf) == 0, "full");
+
+   /* Drain half. */
+   fifo_read(&buf, out, 5);
+   EXPECT(memcmp(out, in, 5) == 0, "first 5 bytes");
+   EXPECT(FIFO_WRITE_AVAIL(&buf) == 5, "5 free");
+   EXPECT(FIFO_READ_AVAIL(&buf) == 5, "5 used");
+
+   /* Write 5 more — engages the wrap-around branch. */
+   {
+      uint8_t more[5] = {10,11,12,13,14};
+      fifo_write(&buf, more, 5);
+   }
+   EXPECT(FIFO_WRITE_AVAIL(&buf) == 0, "full again");
+
+   /* Read all 10 — also engages wrap-around branch. */
+   memset(out, 0, sizeof(out));
+   fifo_read(&buf, out, 10);
+   EXPECT(out[0] == 5,  "out[0]=5, got %u", out[0]);
+   EXPECT(out[4] == 9,  "out[4]=9, got %u", out[4]);
+   EXPECT(out[5] == 10, "out[5]=10, got %u", out[5]);
+   EXPECT(out[9] == 14, "out[9]=14, got %u", out[9]);
+
+   fifo_deinitialize(&buf);
+   printf("[PASS] wrap_around\n");
+}
+
+/* Test 7: zero-len write/read should be a defined no-op. */
+static void test_zero_len(void)
+{
+   fifo_buffer_t buf;
+   uint8_t       byte;
+
+   EXPECT(fifo_initialize(&buf, 32), "init");
+
+   /* Zero-len write on empty buffer. */
+   fifo_write(&buf, &byte, 0);
+   EXPECT(FIFO_READ_AVAIL(&buf) == 0, "still empty");
+
+   /* Zero-len read on empty buffer. */
+   fifo_read(&buf, &byte, 0);
+   EXPECT(FIFO_READ_AVAIL(&buf) == 0, "still empty");
+
+   /* Zero-len read on non-empty buffer. */
+   fifo_write(&buf, "x", 1);
+   fifo_read(&buf, &byte, 0);
+   EXPECT(FIFO_READ_AVAIL(&buf) == 1, "still 1 byte");
+
+   fifo_deinitialize(&buf);
+   printf("[PASS] zero_len\n");
+}
+
+int main(void)
+{
+   test_initialize_size_max();
+   test_initialize_normal();
+   test_write_capped();
+   test_read_capped();
+   test_write_size_max_len();
+   test_wrap_around();
+   test_zero_len();
+
+   if (failures)
+   {
+      fprintf(stderr, "\n%d fifo_queue test(s) failed\n", failures);
+      return 1;
+   }
+   printf("\nAll fifo_queue bounds tests passed.\n");
+   return 0;
+}