Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions .github/workflows/ci-arm64.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Native arm64 coverage: the only job that exercises the SPSC ring's
# acquire/release protocol on real weakly-ordered hardware (x86 TSan runs
# can't reorder like Arm does, and QEMU does not model weak memory).
# TSan on native arm64: ci.yml's macos-latest leg already runs the suite
# per push on Apple Silicon (arm64), but without TSan — this workflow's
# unique value is the ring stress under TSan on real weakly-ordered
# hardware (x86 TSan runs can't reorder like Arm does, and QEMU does not
# model weak memory).
#
# Kept out of the per-push workflow on purpose: GitHub's arm64 hosted
# runners ("ubuntu-24.04-arm") are not available on every plan for private
Expand All @@ -13,6 +15,10 @@ on:
schedule:
- cron: "17 6 * * 1" # Mondays 06:17 UTC

permissions:
contents: read
issues: write

jobs:
arm64-native:
name: Linux arm64 (native weak-memory)
Expand Down Expand Up @@ -52,3 +58,22 @@ jobs:
for i in 1 2 3 4 5; do
ctest --test-dir build-tsan -R 'SpscRing' --output-on-failure
done

# Scheduled runs have no PR or push audience; without this a weekly
# failure goes unseen until someone checks the Actions tab.
- name: File or update failure issue
if: failure()
env:
GH_TOKEN: ${{ github.token }}
run: |
title="ci-arm64 weekly run failing"
body="Run failed: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
number=$(gh issue list --repo "${{ github.repository }}" \
--state open --search "in:title \"$title\"" \
--json number,title \
--jq ".[] | select(.title == \"$title\") | .number" | head -1)
if [ -n "$number" ]; then
gh issue comment "$number" --repo "${{ github.repository }}" --body "$body"
else
gh issue create --repo "${{ github.repository }}" --title "$title" --body "$body"
fi
70 changes: 61 additions & 9 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ jobs:
- name: macOS AppleClang
os: macos-latest
werror: ON
# Warnings stay non-fatal on MSVC until /W4 output has been triaged.
# Warnings stay non-fatal on MSVC until /W4 output has been
# triaged (docs/PERFORMANCE.md "Known debt").
- name: Windows MSVC
os: windows-latest
werror: OFF
Expand Down Expand Up @@ -107,10 +108,10 @@ jobs:
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
with:
path: ~/hexagon
# -verified suffix: caches predating checksum verification are not
# reused (the download step, and thus verification, only runs on a
# cache miss).
key: ${{ env.HEXAGON_TOOLCHAIN_URL }}-verified-1
# Keyed on the pinned digest: every job that can write this key
# verifies its download against the same pin, so no unverified
# writer can poison the trusted entry.
key: hexagon-toolchain-${{ env.HEXAGON_TOOLCHAIN_SHA256 }}-1

- name: Download toolchain
if: steps.cache.outputs.cache-hit != 'true'
Expand Down Expand Up @@ -268,11 +269,17 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 45
env:
QEMU_PLUGIN_HEADER_URL: https://raw.githubusercontent.com/qemu/qemu/v8.2.2/include/qemu/qemu-plugin.h
# Commit the v8.2.2 tag pointed at when pinned (tags are movable;
# commit SHAs are not), with the header's digest verified on download.
QEMU_PLUGIN_HEADER_URL: https://raw.githubusercontent.com/qemu/qemu/11aa0b1ff115b86160c4d37e7c37e6a6b13b77ea/include/qemu/qemu-plugin.h
QEMU_PLUGIN_HEADER_SHA256: "c53a2af163e80e3f4bc6c60dbdfc84003db329d757e37cd8a16a77e1d82606ff"
QEMU_SRC_URL: https://download.qemu.org/qemu-8.2.2.tar.xz
# Hard pin from the "qemu source sha256:" line of run #24.
QEMU_SRC_SHA256: "847346c1b82c1a54b2c38f6edbd85549edeb17430b7d4d3da12620e2962bc4f3"
HEXAGON_TOOLCHAIN_URL: https://artifacts.codelinaro.org/artifactory/codelinaro-toolchain-for-hexagon/19.1.5/clang+llvm-19.1.5-cross-hexagon-unknown-linux-musl.tar.zst
# Same hard pin as the hexagon-qemu job: this job also writes the
# shared toolchain cache, so it must verify against the same digest.
HEXAGON_TOOLCHAIN_SHA256: "55b41922318f6331590ab7baa7f5dbdd99c109327a9c44a52c5e9878fab148c1"
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6

Expand All @@ -285,6 +292,10 @@ jobs:
- name: Build counting plugin
run: |
curl -sfLo /tmp/qemu-plugin.h "$QEMU_PLUGIN_HEADER_URL"
actual=$(sha256sum /tmp/qemu-plugin.h | cut -d' ' -f1)
if [ "$actual" != "$QEMU_PLUGIN_HEADER_SHA256" ]; then
echo "::error::qemu-plugin.h checksum mismatch"; exit 1
fi
gcc -shared -fPIC $(pkg-config --cflags glib-2.0) -I/tmp \
-o /tmp/libinsncount.so tools/qemu_insn_plugin/insn_count.c

Expand Down Expand Up @@ -349,13 +360,20 @@ jobs:
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
with:
path: ~/hexagon
key: ${{ env.HEXAGON_TOOLCHAIN_URL }}-verified-1
# Same digest-keyed name as the hexagon-qemu job; the download
# below verifies the same pin before anything is saved under it.
key: hexagon-toolchain-${{ env.HEXAGON_TOOLCHAIN_SHA256 }}-1

- name: Ratchet Hexagon
run: |
if [ "${{ steps.cache.outputs.cache-hit }}" != "true" ]; then
mkdir -p ~/hexagon && cd ~/hexagon
curl -sfLo toolchain.tar.zst "$HEXAGON_TOOLCHAIN_URL"
actual=$(sha256sum toolchain.tar.zst | cut -d' ' -f1)
if [ "$actual" != "$HEXAGON_TOOLCHAIN_SHA256" ]; then
echo "::error::toolchain checksum mismatch against pinned value"
exit 1
fi
tar --zstd -xf toolchain.tar.zst && rm toolchain.tar.zst
cd "$GITHUB_WORKSPACE"
fi
Expand Down Expand Up @@ -416,6 +434,39 @@ jobs:
- name: Run (smoke)
run: ./build/bench/srt_bench --benchmark_min_time=0.01s

# Keeps the manually-triggered comparison paths (compare.yml,
# bench/compare) compiling per push; build-only — the measured numbers
# come from the manual workflow, never from here.
compare-smoke:
name: Comparison build smoke
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6

- name: Install dependencies
run: >
sudo apt-get update -q &&
sudo apt-get install -y -q libsamplerate0-dev libsoxr-dev
gcc-arm-none-eabi

- name: Build host comparison bench
run: >
cmake -B build-host
-DCMAKE_BUILD_TYPE=Release
-DSRT_BUILD_BENCHMARKS=ON
-DSRT_BUILD_COMPARE_BENCH=ON
&& cmake --build build-host -j 4 --target srt_bench_compare

- name: Build M55 comparison workload
run: >
cmake -B build-m55
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_TOOLCHAIN_FILE=cmake/arm-cortex-m55-mps3.cmake
-DSRT_BUILD_TESTS=OFF -DSRT_BUILD_EXAMPLES=OFF
-DSRT_BUILD_ICOUNT_BENCH=ON -DSRT_ICOUNT_COMPARE=ON
&& cmake --build build-m55 -j 4 --target cmp_icount_lsr_medium

clang-format:
name: clang-format
runs-on: ubuntu-latest
Expand All @@ -427,5 +478,6 @@ jobs:
sudo apt-get update -q && sudo apt-get install -y -q clang-format-18
clang-format-18 --dry-run --Werror \
include/srt/*.hpp include/srt/detail/*.hpp \
bench/*.cpp bench/icount/*.cpp tools/capi/*.cpp \
tests/*.cpp tests/support/*.hpp examples/*.cpp
bench/*.cpp bench/icount/*.cpp bench/compare/*.cpp \
tools/capi/*.cpp tools/qemu_insn_plugin/*.c \
tests/*.cpp tests/support/*.hpp examples/*.cpp platform/*.c
21 changes: 19 additions & 2 deletions .github/workflows/compare.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,16 @@ jobs:
runs-on: ubuntu-latest
timeout-minutes: 60
env:
QEMU_PLUGIN_HEADER_URL: https://raw.githubusercontent.com/qemu/qemu/v8.2.2/include/qemu/qemu-plugin.h
# Commit the v8.2.2 tag pointed at when pinned (tags are movable;
# commit SHAs are not), with the header's digest verified on download.
QEMU_PLUGIN_HEADER_URL: https://raw.githubusercontent.com/qemu/qemu/11aa0b1ff115b86160c4d37e7c37e6a6b13b77ea/include/qemu/qemu-plugin.h
QEMU_PLUGIN_HEADER_SHA256: "c53a2af163e80e3f4bc6c60dbdfc84003db329d757e37cd8a16a77e1d82606ff"
QEMU_SRC_URL: https://download.qemu.org/qemu-8.2.2.tar.xz
QEMU_SRC_SHA256: "847346c1b82c1a54b2c38f6edbd85549edeb17430b7d4d3da12620e2962bc4f3"
HEXAGON_TOOLCHAIN_URL: https://artifacts.codelinaro.org/artifactory/codelinaro-toolchain-for-hexagon/19.1.5/clang+llvm-19.1.5-cross-hexagon-unknown-linux-musl.tar.zst
# Same hard pin as ci.yml's hexagon jobs: this job also writes the
# shared toolchain cache, so it must verify against the same digest.
HEXAGON_TOOLCHAIN_SHA256: "55b41922318f6331590ab7baa7f5dbdd99c109327a9c44a52c5e9878fab148c1"
steps:
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6

Expand All @@ -30,6 +36,10 @@ jobs:
- name: Build counting plugin
run: |
curl -sfLo /tmp/qemu-plugin.h "$QEMU_PLUGIN_HEADER_URL"
actual=$(sha256sum /tmp/qemu-plugin.h | cut -d' ' -f1)
if [ "$actual" != "$QEMU_PLUGIN_HEADER_SHA256" ]; then
echo "::error::qemu-plugin.h checksum mismatch"; exit 1
fi
gcc -shared -fPIC $(pkg-config --cflags glib-2.0) -I/tmp \
-o /tmp/libinsncount.so tools/qemu_insn_plugin/insn_count.c

Expand Down Expand Up @@ -85,13 +95,20 @@ jobs:
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5
with:
path: ~/hexagon
key: ${{ env.HEXAGON_TOOLCHAIN_URL }}-verified-1
# Same digest-keyed name as ci.yml's hexagon jobs; the download
# below verifies the same pin before anything is saved under it.
key: hexagon-toolchain-${{ env.HEXAGON_TOOLCHAIN_SHA256 }}-1

- name: Measure Hexagon
run: |
if [ "${{ steps.cache.outputs.cache-hit }}" != "true" ]; then
mkdir -p ~/hexagon && cd ~/hexagon
curl -sfLo toolchain.tar.zst "$HEXAGON_TOOLCHAIN_URL"
actual=$(sha256sum toolchain.tar.zst | cut -d' ' -f1)
if [ "$actual" != "$HEXAGON_TOOLCHAIN_SHA256" ]; then
echo "::error::toolchain checksum mismatch against pinned value"
exit 1
fi
tar --zstd -xf toolchain.tar.zst && rm toolchain.tar.zst
cd "$GITHUB_WORKSPACE"
fi
Expand Down
3 changes: 2 additions & 1 deletion bench/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ set(BENCHMARK_ENABLE_INSTALL OFF CACHE BOOL "" FORCE)
FetchContent_Declare(
googlebenchmark
GIT_REPOSITORY https://github.com/google/benchmark.git
GIT_TAG v1.9.1)
# Commit pin, not the movable tag: tags can be re-pointed upstream.
GIT_TAG c58e6d0710581e3a08d65c349664128a8d9a2461) # v1.9.1
FetchContent_MakeAvailable(googlebenchmark)

add_executable(srt_bench bench_asrc.cpp)
Expand Down
19 changes: 15 additions & 4 deletions docs/PERFORMANCE.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,13 @@ baseline lands and revised deliberately. Stop when any of:

## Regression prevention

- **Deterministic ratchet (CI-gated)**: the QEMU instruction-count benches
compare against a checked-in `bench/baselines.json`; a PR fails if any
metric regresses > 3%. Improvements update the file *in the diff* —
reviewable, with history in git.
- **Deterministic ratchet (CI-gated, two-sided)**: the QEMU
instruction-count benches compare against a checked-in
`bench/baselines.json`; a PR fails if any metric moves more than 3% in
*either* direction. Regressions are rejected; improvements beyond
tolerance also fail until the baseline is re-recorded (`icount.py
--update`) *in the diff* — otherwise the stale slack would let later
regressions hide. Reviewable, with history in git.

Mechanics: `bench/icount/` builds one fixed-workload binary per scenario
(no argv on bare metal); `tools/qemu_insn_plugin/` is the counting
Expand All @@ -95,6 +98,14 @@ from `bench/baselines.json`, and the icount-ratchet CI job regenerates it
and fails on any diff — those published numbers cannot go stale. The SNR
table is already enforced by test thresholds.

## Known debt

- **MSVC /W4 triage outstanding**: the Windows CI leg builds with
`SRT_WERROR=OFF` until the /W4 output has been triaged (ci.yml carries
the matching comment).
- **Tail-latency benchmark not implemented**: the Metrics table promises
p99/max per-call `pull(128)` timing; no benchmark measures it yet.

## Sequencing & status

- [x] **PR A** — this document, Google Benchmark infrastructure
Expand Down
26 changes: 20 additions & 6 deletions platform/armv8m_startup.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ extern "C" {

extern uint32_t __bss_start__, __bss_end__;
extern uint32_t __stack_top;
extern uint32_t __stack_limit;
extern char __heap_start__, __heap_end__;

extern void __libc_init_array(void);
Expand Down Expand Up @@ -101,6 +102,11 @@ uint64_t __atomic_exchange_8(volatile void* ptr, uint64_t value, int memorder) {
}

void Reset_Handler(void) {
/* MSPLIM exists on Armv8-M Mainline only (both targets are M33/M55
* class): a main-stack overflow past __stack_limit raises a fault
* instead of silently corrupting whatever sits below the stack. */
__asm volatile("msr msplim, %0" ::"r"(&__stack_limit));

/* Grant full access to CP10/CP11 (scalar FPU + MVE) first: code below
* may legitimately use FP registers once newlib is involved. */
volatile uint32_t* const cpacr = (volatile uint32_t*)0xE000ED88u;
Expand All @@ -120,15 +126,23 @@ void Default_Handler(void) {
}
}

void HardFault_Handler(void) {
/* Distinct park loop so a HardFault (e.g. MSPLIM violation escalation)
* is distinguishable from other parked vectors under a debugger. */
__asm volatile("bkpt #0");
for (;;) {
}
}

__attribute__((section(".vectors"), used)) static const uintptr_t vectors[16] = {
(uintptr_t)&__stack_top,
(uintptr_t)&Reset_Handler,
(uintptr_t)&Default_Handler, /* NMI */
(uintptr_t)&Default_Handler, /* HardFault */
(uintptr_t)&Default_Handler, /* MemManage */
(uintptr_t)&Default_Handler, /* BusFault */
(uintptr_t)&Default_Handler, /* UsageFault */
(uintptr_t)&Default_Handler, /* SecureFault */
(uintptr_t)&Default_Handler, /* NMI */
(uintptr_t)&HardFault_Handler, /* HardFault */
(uintptr_t)&Default_Handler, /* MemManage */
(uintptr_t)&Default_Handler, /* BusFault */
(uintptr_t)&Default_Handler, /* UsageFault */
(uintptr_t)&Default_Handler, /* SecureFault */
0,
0,
0,
Expand Down
4 changes: 4 additions & 0 deletions platform/mps2_an505/mps2_an505.ld
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ SECTIONS
__heap_end__ = .;
} > DATA

/* MSPLIM (set in Reset_Handler): the stack may descend to the heap cap
* but no further — overflow into the heap faults instead of corrupting. */
__stack_limit = __heap_end__;

/* librdimon's (unused, weak) _sbrk references `end`; satisfy it. */
PROVIDE(end = __heap_start__);
}
3 changes: 3 additions & 0 deletions platform/mps3_an547/mps3_an547.ld
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ MEMORY
}

__stack_top = ORIGIN(DTCM) + LENGTH(DTCM);
/* MSPLIM (set in Reset_Handler): the stack owns all of DTCM, so the lowest
* address it may legally reach is the region base. */
__stack_limit = ORIGIN(DTCM);

ENTRY(Reset_Handler)

Expand Down
Loading
Loading