From f8e915e1293f175b16ddd92f1ceaa8086e233fa4 Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Wed, 27 May 2026 13:17:46 +0200 Subject: [PATCH 1/2] spirv: add LLVM SPIR-V backend crash fuzzer --- README.md | 1 + spirv/.gitignore | 11 + spirv/README.md | 110 +++++++ spirv/fuzzer/CMakeLists.txt | 55 ++++ spirv/fuzzer/llvm_spirv_crash_fuzzer.cpp | 399 +++++++++++++++++++++++ spirv/scripts/build_directed_fuzzer.sh | 39 +++ spirv/scripts/build_instrumented_llvm.sh | 90 +++++ spirv/scripts/run_directed_fuzzer.sh | 51 +++ spirv/scripts/seed_ir_corpus.sh | 74 +++++ 9 files changed, 830 insertions(+) create mode 100644 spirv/.gitignore create mode 100644 spirv/README.md create mode 100644 spirv/fuzzer/CMakeLists.txt create mode 100644 spirv/fuzzer/llvm_spirv_crash_fuzzer.cpp create mode 100755 spirv/scripts/build_directed_fuzzer.sh create mode 100755 spirv/scripts/build_instrumented_llvm.sh create mode 100755 spirv/scripts/run_directed_fuzzer.sh create mode 100755 spirv/scripts/seed_ir_corpus.sh diff --git a/README.md b/README.md index dbf8c11..8ce5ae6 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ FuzzX is a collection of compiler fuzzers. | [`ptx/`](ptx/) | NVIDIA `ptxas` fuzzer | | [`amdgpu/`](amdgpu/) | AMDGPU fuzzer | | [`x86/`](x86/) | x86 fuzzer | +| [`spirv/`](spirv/) | LLVM SPIR-V backend crash fuzzer | See each subdirectory for build and run instructions. diff --git a/spirv/.gitignore b/spirv/.gitignore new file mode 100644 index 0000000..55c632f --- /dev/null +++ b/spirv/.gitignore @@ -0,0 +1,11 @@ +/build/ +/runtime/ +fuzz-*.log +__pycache__/ +*.swp + +/third_party/* +!/third_party/llvm-project + +/findings/ +/corpus/ diff --git a/spirv/README.md b/spirv/README.md new file mode 100644 index 0000000..2e8adc9 --- /dev/null +++ b/spirv/README.md @@ -0,0 +1,110 @@ +# FuzzX SPIR-V + +This directory mirrors the [`amdgpu/`](../amdgpu/) layout for the LLVM +SPIR-V backend. + +## How this differs from AMDGPU / PTX (read this first) + +The AMDGPU and PTX fuzzers are **differential-execution** fuzzers: they +generate UB-free programs, compile them, run them on a real GPU through +HIP / the CUDA driver, and compare `-O0` vs `-O2` vs an interpreter +oracle. That's only possible because each backend has a vendor-provided +host runtime that loads and executes its output, already installed on +the box. + +SPIR-V has no equivalent. Executing what the LLVM SPIR-V backend emits +requires either: + +- a Vulkan compute pipeline (Vulkan ICD + descriptor/dispatch boilerplate), +- an OpenCL ICD that accepts LLVM's SPIR-V flavor (e.g. PoCL), or +- SPIRV-Cross translation back to GLSL/HLSL/MSL plus a downstream compiler. + +None is a drop-in equivalent of `libamdhip64` / `libcuda`, and the LLVM +SPIR-V backend itself has no `-O0` vs `-O2` story to diff against (both +go through the same backend pipeline). + +So this fuzzer is **crash-only**: it runs the SPIR-V codegen pipeline +in-process and reports any assertion failure, `report_fatal_error`, +`UNREACHABLE`, segfault, or `CrashRecoveryContext`-trapped abort as a +libFuzzer crash. Everything else — libFuzzer entry, coverage-guided +input mutation, in-process `TargetMachine`, fatal-error handler routed +to `std::abort`, seed-corpus bitcode, the `build_instrumented_llvm.sh` +/ `build_directed_fuzzer.sh` / `run_directed_fuzzer.sh` script trio — +is mirrored straight from `amdgpu/`. + +## Layout + +| Path | Purpose | +| --- | --- | +| `fuzzer/llvm_spirv_crash_fuzzer.cpp` | libFuzzer target. Parses input as bitcode, runs the SPIR-V codegen pipeline under `CrashRecoveryContext`, aborts on any backend ICE. | +| `fuzzer/CMakeLists.txt` | Same shape as `amdgpu/fuzzer/CMakeLists.txt` minus LLD / HIP. | +| `scripts/build_instrumented_llvm.sh` | Builds LLVM with assertions, `SPIRV;X86` targets, and sancov for coverage feedback. | +| `scripts/build_directed_fuzzer.sh` | Builds the libFuzzer target against the instrumented LLVM. | +| `scripts/run_directed_fuzzer.sh` | Seeds the corpus and runs the fuzzer; identical flow to the AMDGPU script. | +| `scripts/seed_ir_corpus.sh` | Emits a single `spirv64` bitcode seed. | +| `third_party/llvm-project/` | Place an llvm-project checkout here, or override `LLVM_PROJECT_DIR=`. | +| `patches/` | For local patches against the LLVM checkout (analogous to `amdgpu/patches/`). | +| `known-crashes/` | Hand-curated reproducers (empty for now). | + +## Build + +``` +# 0. drop or symlink an llvm-project checkout +ln -s /path/to/llvm-project third_party/llvm-project + +# 1. instrumented LLVM (slow; sancov + assertions + SPIRV;X86 targets) +./scripts/build_instrumented_llvm.sh + +# 2. libFuzzer target +./scripts/build_directed_fuzzer.sh +``` + +To reuse an existing LLVM build instead of step 1, set +`LLVM_DIR=/path/to/llvm-build/lib/cmake/llvm` for step 2. Coverage +feedback will be limited to the fuzzer TU (the harness), not the +backend, but the crash-detection path still works — that's how the +smoke run below was driven. + +## Run + +``` +./scripts/run_directed_fuzzer.sh -runs=10000 +``` + +Useful env vars (mirroring AMDGPU): + +| Var | Default | Purpose | +| --- | --- | --- | +| `FUZZER_BIN` | `build/fuzzer/llvm_spirv_crash_fuzzer` | binary to run | +| `CORPUS_DIR` | `$FUZZX_RUNTIME_ROOT/corpus/directed` | libFuzzer corpus | +| `ARTIFACT_DIR` | `$FUZZX_RUNTIME_ROOT/artifacts/directed` | libFuzzer crash dumps | +| `FUZZX_FINDINGS_DIR` | `$FUZZX_RUNTIME_ROOT/findings` | `.bc` / `.ll` for each finding the harness catches | +| `FUZZX_RUNTIME_ROOT` | `${TMPDIR:-/tmp}/fuzzx-spirv-$USER` | parent for the above | + +## Smoke run + +A ~30-second run (`-runs=4000 -max_total_time=120`) against an +`llvm-project` build (LLVM 23.0.0git, assertions on, no sancov) +reproducibly hit: + +``` +Assertion `reservedRegsFrozen() && "Reserved registers haven't been frozen yet. " + "Use TRI::getReservedRegs()."' failed. + at llvm/include/llvm/CodeGen/MachineRegisterInfo.h:964 +``` + +Caveat: the saved `.bc` does **not** reproduce under a standalone +`llc -mtriple=spirv64` invocation. The crash only fires inside the +in-process harness, where `TargetMachine` instances (and any backend +global state) are shared across many compilations — same caching +pattern as AMDGPU's `getTargetMachine`. That means it's either: + +1. a real SPIR-V backend bug latent on repeated in-process codegen + (something fails to re-freeze reserved regs between functions / + modules when the TM is reused), or +2. a harness artifact from how this crash fuzzer drives the SPIR-V + backend. + +Distinguishing the two requires reducing further and trying to repro +with a small in-process driver that just loops `llc`-equivalent +codegen on the saved `.bc`. Not done. diff --git a/spirv/fuzzer/CMakeLists.txt b/spirv/fuzzer/CMakeLists.txt new file mode 100644 index 0000000..c91b35e --- /dev/null +++ b/spirv/fuzzer/CMakeLists.txt @@ -0,0 +1,55 @@ +cmake_minimum_required(VERSION 3.20) + +project(FuzzXLLVMSPIRVCrashFuzzer LANGUAGES C CXX) + +find_package(LLVM REQUIRED CONFIG) + +message(STATUS "Using LLVM ${LLVM_PACKAGE_VERSION} from ${LLVM_DIR}") + +list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") +include(HandleLLVMOptions) + +set(FUZZER_SANITIZERS "fuzzer" CACHE STRING + "Comma-separated sanitizer list for the fuzzer binary") + +add_executable(llvm_spirv_crash_fuzzer llvm_spirv_crash_fuzzer.cpp) +target_compile_features(llvm_spirv_crash_fuzzer PRIVATE cxx_std_17) +target_include_directories(llvm_spirv_crash_fuzzer PRIVATE + ${LLVM_INCLUDE_DIRS} +) +target_compile_definitions(llvm_spirv_crash_fuzzer PRIVATE + ${LLVM_DEFINITIONS} +) + +if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options(llvm_spirv_crash_fuzzer PRIVATE + -fno-omit-frame-pointer + -fsanitize=${FUZZER_SANITIZERS} + ) + target_link_options(llvm_spirv_crash_fuzzer PRIVATE + -fsanitize=${FUZZER_SANITIZERS} + ) +endif() + +if(TARGET LLVM) + target_link_libraries(llvm_spirv_crash_fuzzer PRIVATE LLVM) +else() + llvm_map_components_to_libnames(LLVM_LIBS + Analysis + BitReader + BitWriter + CodeGen + Core + IRReader + MC + Passes + SPIRVCodeGen + SPIRVDesc + SPIRVInfo + Support + Target + TargetParser + TransformUtils + ) + target_link_libraries(llvm_spirv_crash_fuzzer PRIVATE ${LLVM_LIBS}) +endif() diff --git a/spirv/fuzzer/llvm_spirv_crash_fuzzer.cpp b/spirv/fuzzer/llvm_spirv_crash_fuzzer.cpp new file mode 100644 index 0000000..f96d5f0 --- /dev/null +++ b/spirv/fuzzer/llvm_spirv_crash_fuzzer.cpp @@ -0,0 +1,399 @@ +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CrashRecoveryContext.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/TargetParser/Triple.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; + +namespace { + +constexpr StringRef DefaultTriple = "spirv64-unknown-unknown"; + +bool envFlag(const char *Name, bool Default) { + const char *Value = std::getenv(Name); + if (!Value || !*Value) + return Default; + return std::strcmp(Value, "0") != 0 && std::strcmp(Value, "false") != 0 && + std::strcmp(Value, "False") != 0 && std::strcmp(Value, "no") != 0 && + std::strcmp(Value, "off") != 0; +} + +const Target *getSPIRVTarget() { + static const Target *T = [] { + LLVMInitializeSPIRVTargetInfo(); + LLVMInitializeSPIRVTarget(); + LLVMInitializeSPIRVTargetMC(); + LLVMInitializeSPIRVAsmPrinter(); + + std::string Error; + Triple TT(DefaultTriple); + const Target *Target = TargetRegistry::lookupTarget(TT, Error); + if (!Target) + std::abort(); + return Target; + }(); + return T; +} + +CodeGenOptLevel codeGenOptLevel(OptimizationLevel Level) { + if (Level == OptimizationLevel::O0) + return CodeGenOptLevel::None; + if (Level == OptimizationLevel::O1) + return CodeGenOptLevel::Less; + if (Level == OptimizationLevel::O2) + return CodeGenOptLevel::Default; + return CodeGenOptLevel::Aggressive; +} + +TargetMachine *getTargetMachine(StringRef CPU, OptimizationLevel Level) { + static std::unique_ptr O0TM; + static std::unique_ptr O2TM; + std::unique_ptr &TM = + Level == OptimizationLevel::O0 ? O0TM : O2TM; + if (TM) + return TM.get(); + + Triple TT(DefaultTriple); + TargetOptions Options; + TM.reset(getSPIRVTarget()->createTargetMachine( + TT, CPU, "", Options, std::nullopt, std::nullopt, + codeGenOptLevel(Level))); + if (!TM) + std::abort(); + return TM.get(); +} + +bool runOptimizationPipeline(Module &M, TargetMachine &TM, + OptimizationLevel Level) { + LoopAnalysisManager LAM; + FunctionAnalysisManager FAM; + CGSCCAnalysisManager CGAM; + ModuleAnalysisManager MAM; + + PassBuilder PB(&TM); + PB.registerModuleAnalyses(MAM); + PB.registerCGSCCAnalyses(CGAM); + PB.registerFunctionAnalyses(FAM); + PB.registerLoopAnalyses(LAM); + PB.crossRegisterProxies(LAM, FAM, CGAM, MAM); + + ModulePassManager MPM = + Level == OptimizationLevel::O0 ? PB.buildO0DefaultPipeline(Level) + : PB.buildPerModuleDefaultPipeline(Level); + MPM.run(M, MAM); + return !verifyModule(M, &errs()); +} + +std::string tempPath(StringRef Suffix) { + static std::atomic Counter{0}; + auto Dir = std::filesystem::temp_directory_path(); + auto Now = std::chrono::steady_clock::now().time_since_epoch().count(); + return (Dir / ("fuzzx-spirv-crash-" + std::to_string(getpid()) + "-" + + std::to_string(Now) + "-" + std::to_string(Counter++) + + Suffix.str())) + .string(); +} + +bool writeBytes(StringRef Path, ArrayRef Bytes) { + std::ofstream Out(Path.str(), std::ios::binary); + if (!Out) + return false; + Out.write(Bytes.data(), static_cast(Bytes.size())); + return static_cast(Out); +} + +std::optional> emitObject(Module &M, TargetMachine &TM) { + M.setDataLayout(TM.createDataLayout()); + if (verifyModule(M, &errs())) + return std::nullopt; + + SmallVector Obj; + raw_svector_ostream OS(Obj); + legacy::PassManager PM; + if (TM.addPassesToEmitFile(PM, OS, nullptr, CodeGenFileType::ObjectFile)) + return std::nullopt; + PM.run(M); + return Obj; +} + +struct CompileResult { + SmallVector Object; + std::string FailureStage; + int CrashRetCode = 0; + bool Success = false; + bool Crashed = false; +}; + +std::string moduleToString(Module &M) { + std::string Text; + raw_string_ostream OS(Text); + M.print(OS, nullptr); + return Text; +} + +StringRef getCPU() { + const char *Env = std::getenv("FUZZX_SPIRV_CPU"); + if (Env && *Env) + return Env; + return ""; +} + +std::unique_ptr createIRSkeletonModule(LLVMContext &Ctx, + StringRef /*CPU*/) { + auto M = std::make_unique("fuzzx_spirv_crash", Ctx); + M->setTargetTriple(Triple(DefaultTriple)); + + Type *VoidTy = Type::getVoidTy(Ctx); + Type *I32 = Type::getInt32Ty(Ctx); + Type *GlobalPtr = PointerType::get(Ctx, 1); + FunctionType *FT = + FunctionType::get(VoidTy, {GlobalPtr, GlobalPtr, I32}, false); + Function *F = Function::Create(FT, GlobalValue::ExternalLinkage, + "fuzz_kernel", *M); + F->setCallingConv(CallingConv::SPIR_KERNEL); + + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); + BasicBlock *Body = BasicBlock::Create(Ctx, "body", F); + BasicBlock *Exit = BasicBlock::Create(Ctx, "exit", F); + + IRBuilder<> B(Entry); + Argument *In = F->getArg(0); + Argument *Out = F->getArg(1); + Argument *N = F->getArg(2); + Value *Ok = B.CreateICmpSGT(N, ConstantInt::get(I32, 0)); + B.CreateCondBr(Ok, Body, Exit); + + B.SetInsertPoint(Body); + Value *V = B.CreateAlignedLoad(I32, In, Align(4)); + Value *Salt = B.CreateMul(N, ConstantInt::getSigned(I32, -1640531527)); + Value *Mix = B.CreateXor(V, Salt); + B.CreateAlignedStore(Mix, Out, Align(4)); + B.CreateBr(Exit); + + B.SetInsertPoint(Exit); + B.CreateRetVoid(); + return M; +} + +bool validateIRCorpusModule(const Module &M) { + if (M.empty()) + return false; + for (const Function &F : M) { + if (F.isDeclaration()) + continue; + if (F.getCallingConv() != CallingConv::SPIR_KERNEL && + F.getCallingConv() != CallingConv::SPIR_FUNC && + F.getCallingConv() != CallingConv::C) + return false; + } + return true; +} + +std::unique_ptr parseIRCorpusModule(const uint8_t *Data, size_t Size, + LLVMContext &Ctx, StringRef CPU, + bool *Valid = nullptr) { + if (Valid) + *Valid = false; + if (Size == 0) + return createIRSkeletonModule(Ctx, CPU); + StringRef Buffer(reinterpret_cast(Data), Size); + MemoryBufferRef MemBuf(Buffer, "fuzzx-spirv-ir-bitcode"); + std::unique_ptr Parsed; + // BitcodeReader is not hardened against arbitrary mutated bytes; trap its + // assertions so we report only SPIR-V backend findings. + CrashRecoveryContext CRC; + CRC.RunSafely([&]() { + Expected> P = parseBitcodeFile(MemBuf, Ctx); + if (!P) { + consumeError(P.takeError()); + return; + } + Parsed = std::move(*P); + }); + if (!Parsed) + return createIRSkeletonModule(Ctx, CPU); + // Force the triple so corpus mutation of target metadata cannot send us to + // a different backend. + Parsed->setTargetTriple(Triple(DefaultTriple)); + if (!validateIRCorpusModule(*Parsed)) + return createIRSkeletonModule(Ctx, CPU); + if (Valid) + *Valid = true; + return Parsed; +} + +void saveFailureFinding(const uint8_t *Data, size_t Size, StringRef IRText, + StringRef Kind, StringRef Stage, + std::optional CrashRetCode = std::nullopt) { + const char *FindingsDir = std::getenv("FUZZX_FINDINGS_DIR"); + if (!FindingsDir || !*FindingsDir) + return; + std::error_code EC; + std::filesystem::create_directories(FindingsDir, EC); + auto Now = std::chrono::steady_clock::now().time_since_epoch().count(); + std::string Base = std::string(FindingsDir) + "/" + Kind.str() + "-" + + Stage.str() + "-" + std::to_string(getpid()) + "-" + + std::to_string(Now); + if (CrashRetCode) + Base += "-rc" + std::to_string(*CrashRetCode); + std::ofstream BC(Base + ".bc", std::ios::binary); + if (BC) + BC.write(reinterpret_cast(Data), + static_cast(Size)); + if (!IRText.empty()) { + std::ofstream LL(Base + ".ll"); + if (LL) + LL.write(IRText.data(), static_cast(IRText.size())); + } +} + +CompileResult compileIRModuleToObject(Module &M, StringRef CPU, + OptimizationLevel Level, + std::string *IRText = nullptr) { + CompileResult R; + TargetMachine *TM = getTargetMachine(CPU, Level); + if (!TM) { + R.FailureStage = "target-machine"; + return R; + } + CrashRecoveryContext CRC; + CRC.DumpStackAndCleanupOnFailure = true; + bool Ran = CRC.RunSafely([&]() { + if (!runOptimizationPipeline(M, *TM, Level)) { + R.FailureStage = "opt"; + return; + } + if (IRText) + *IRText = moduleToString(M); + auto Obj = emitObject(M, *TM); + if (!Obj) { + R.FailureStage = "codegen"; + return; + } + R.Object = std::move(*Obj); + R.Success = true; + }); + if (!Ran) { + R.Crashed = true; + R.CrashRetCode = -1; + if (R.FailureStage.empty()) + R.FailureStage = "codegen"; + } + return R; +} + +} // namespace + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { + if (Size > 1 << 20) + return 0; + + StringRef CPU = getCPU(); + LLVMContext Ctx; + bool ValidInput = false; + std::unique_ptr M = + parseIRCorpusModule(Data, Size, Ctx, CPU, &ValidInput); + if (!ValidInput) + return 0; + if (!validateIRCorpusModule(*M)) + return 0; + + std::string O0IR; + auto O0Obj = compileIRModuleToObject(*M, CPU, OptimizationLevel::O0, &O0IR); + if (!O0Obj.Success) { + if (O0Obj.FailureStage != "validate") { + std::string Stage = "o0-" + O0Obj.FailureStage; + saveFailureFinding(Data, Size, O0IR, + O0Obj.Crashed ? "compiler-crash" + : "compiler-failure", + Stage, + O0Obj.Crashed + ? std::optional(O0Obj.CrashRetCode) + : std::nullopt); + if (O0Obj.Crashed) + std::abort(); + } + return 0; + } + + // Re-parse a fresh copy for O2 since the O0 pipeline mutated the module. + ValidInput = false; + std::unique_ptr M2 = + parseIRCorpusModule(Data, Size, Ctx, CPU, &ValidInput); + if (!ValidInput) + return 0; + + std::string O2IR; + auto O2Obj = compileIRModuleToObject(*M2, CPU, OptimizationLevel::O2, &O2IR); + if (!O2Obj.Success) { + if (O2Obj.FailureStage != "validate") { + std::string Stage = "o2-" + O2Obj.FailureStage; + saveFailureFinding(Data, Size, O2IR, + O2Obj.Crashed ? "compiler-crash" + : "compiler-failure", + Stage, + O2Obj.Crashed + ? std::optional(O2Obj.CrashRetCode) + : std::nullopt); + if (O2Obj.Crashed) + std::abort(); + } + return 0; + } + + // SPIR-V has no host runtime, so we stop after codegen. See ../README.md + // for why a faithful differential port (the AMDGPU/PTX pattern) requires + // setting up a Vulkan or OpenCL ICD. + (void)envFlag; + (void)writeBytes; + (void)tempPath; + return 0; +} + +extern "C" int LLVMFuzzerInitialize(int *, char ***) { + CrashRecoveryContext::Enable(); + install_fatal_error_handler( + [](void *, const char *Reason, bool) { + errs() << "FuzzX SPIR-V fatal: " << Reason << "\n"; + std::abort(); + }, + nullptr); + install_bad_alloc_error_handler( + [](void *, const char *Reason, bool) { + errs() << "FuzzX SPIR-V bad alloc: " << Reason << "\n"; + std::abort(); + }, + nullptr); + StringRef CPU = getCPU(); + (void)getTargetMachine(CPU, OptimizationLevel::O0); + (void)getTargetMachine(CPU, OptimizationLevel::O2); + return 0; +} diff --git a/spirv/scripts/build_directed_fuzzer.sh b/spirv/scripts/build_directed_fuzzer.sh new file mode 100755 index 0000000..7297942 --- /dev/null +++ b/spirv/scripts/build_directed_fuzzer.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Build the in-process SPIR-V backend crash libFuzzer target. +# Mirrors amdgpu/scripts/build_directed_fuzzer.sh, with HIP/LLD removed. +# +# Required unless using the default instrumented build path: +# LLVM_DIR=/path/to/llvm-build/lib/cmake/llvm +# +# Optional: +# FUZZER_SANITIZERS=fuzzer,address +# CMAKE_BUILD_TYPE=Release + +set -euo pipefail + +cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/.." +ROOT="$(pwd)" + +LLVM_BUILD_DIR="${LLVM_BUILD_DIR:-$ROOT/build/llvm-fuzzer}" +LLVM_DIR="${LLVM_DIR:-$LLVM_BUILD_DIR/lib/cmake/llvm}" +FUZZER_BUILD_DIR="${FUZZER_BUILD_DIR:-$ROOT/build/fuzzer}" +FUZZER_SANITIZERS="${FUZZER_SANITIZERS:-fuzzer}" +CMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE:-Release}" + +if [[ ! -f "$LLVM_DIR/LLVMConfig.cmake" ]]; then + echo "LLVMConfig.cmake not found under LLVM_DIR=$LLVM_DIR" >&2 + echo "Build LLVM first with scripts/build_instrumented_llvm.sh or set LLVM_DIR." >&2 + exit 2 +fi + +cmake -S "$ROOT/fuzzer" -B "$FUZZER_BUILD_DIR" -G Ninja \ + -DLLVM_DIR="$LLVM_DIR" \ + -DFUZZER_SANITIZERS="$FUZZER_SANITIZERS" \ + -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \ + -DCMAKE_C_COMPILER="${CC:-clang}" \ + -DCMAKE_CXX_COMPILER="${CXX:-clang++}" + +cmake --build "$FUZZER_BUILD_DIR" --target llvm_spirv_crash_fuzzer \ + --parallel "${NINJAJOBS:-$(nproc)}" + +echo "$FUZZER_BUILD_DIR/llvm_spirv_crash_fuzzer" diff --git a/spirv/scripts/build_instrumented_llvm.sh b/spirv/scripts/build_instrumented_llvm.sh new file mode 100755 index 0000000..5c8de73 --- /dev/null +++ b/spirv/scripts/build_instrumented_llvm.sh @@ -0,0 +1,90 @@ +#!/usr/bin/env bash +# Configure and build an LLVM tree suitable for coverage-guided SPIR-V +# backend crash fuzzing. Mirrors amdgpu/scripts/build_instrumented_llvm.sh, +# with HIP/AMDGPU specifics removed. +# +# Optional: +# LLVM_PROJECT_DIR=/path/to/llvm-project +# LLVM_BUILD_DIR=$PWD/build/llvm-fuzzer +# LLVM_INSTALL_DIR=$PWD/build/llvm-fuzzer-install +# LLVM_TARGETS_TO_BUILD='SPIRV;X86' +# LLVM_ENABLE_ASSERTIONS=ON +# LLVM_USE_SANITIZER=OFF +# LLVM_USE_SANITIZE_COVERAGE=ON +# LLVM_FUZZX_SANCOV=ON +# CMAKE_BUILD_TYPE=Release +# HOST_CLANG / HOST_CLANGXX + +set -euo pipefail + +cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/.." +ROOT="$(pwd)" + +LLVM_PROJECT_DIR="${LLVM_PROJECT_DIR:-$ROOT/third_party/llvm-project}" + +if [[ ! -d "$LLVM_PROJECT_DIR/llvm" ]]; then + echo "LLVM source checkout not found under LLVM_PROJECT_DIR=$LLVM_PROJECT_DIR" >&2 + echo "Run: git clone --depth 1 https://github.com/llvm/llvm-project $LLVM_PROJECT_DIR" >&2 + exit 2 +fi + +LLVM_PROJECT_DIR="$(cd "$LLVM_PROJECT_DIR" && pwd)" +LLVM_BUILD_DIR="${LLVM_BUILD_DIR:-$ROOT/build/llvm-fuzzer}" +LLVM_INSTALL_DIR="${LLVM_INSTALL_DIR:-$ROOT/build/llvm-fuzzer-install}" +LLVM_TARGETS_TO_BUILD="${LLVM_TARGETS_TO_BUILD:-SPIRV;X86}" +# Assertions on so we catch backend ICEs, the whole point of the fuzzer. +LLVM_ENABLE_ASSERTIONS="${LLVM_ENABLE_ASSERTIONS:-ON}" +LLVM_USE_SANITIZER="${LLVM_USE_SANITIZER:-OFF}" +LLVM_USE_SANITIZE_COVERAGE="${LLVM_USE_SANITIZE_COVERAGE:-ON}" +# LLVM_FUZZX_SANCOV=ON injects -fsanitize-coverage=... into CMAKE_*_FLAGS so +# every LLVM TU gets sancov even when LLVM_USE_SANITIZER is OFF. +LLVM_FUZZX_SANCOV="${LLVM_FUZZX_SANCOV:-ON}" +CMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE:-Release}" + +HOST_CLANG="${HOST_CLANG:-${CC:-clang}}" +HOST_CLANGXX="${HOST_CLANGXX:-${CXX:-clang++}}" + +cmake_args=( + -S "$LLVM_PROJECT_DIR/llvm" + -B "$LLVM_BUILD_DIR" + -G Ninja + -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" + -DCMAKE_C_COMPILER="$HOST_CLANG" + -DCMAKE_CXX_COMPILER="$HOST_CLANGXX" + -DCMAKE_INSTALL_PREFIX="$LLVM_INSTALL_DIR" + -DLLVM_ENABLE_PROJECTS="" + -DLLVM_TARGETS_TO_BUILD="$LLVM_TARGETS_TO_BUILD" + -DLLVM_ENABLE_ASSERTIONS="$LLVM_ENABLE_ASSERTIONS" + -DLLVM_USE_SANITIZE_COVERAGE="$LLVM_USE_SANITIZE_COVERAGE" + -DLLVM_LINK_LLVM_DYLIB=OFF + -DBUILD_SHARED_LIBS=OFF +) + +if [[ -n "$LLVM_USE_SANITIZER" && "$LLVM_USE_SANITIZER" != "OFF" ]]; then + cmake_args+=(-DLLVM_USE_SANITIZER="$LLVM_USE_SANITIZER") +else + cmake_args+=(-DLLVM_USE_SANITIZER=) +fi + +if [[ "$LLVM_FUZZX_SANCOV" =~ ^(1|ON|on|true|TRUE|yes|YES)$ ]]; then + SANCOV_FLAGS="-fsanitize-coverage=inline-8bit-counters,pc-table" + cmake_args+=( + -DCMAKE_C_FLAGS_INIT="$SANCOV_FLAGS" + -DCMAKE_CXX_FLAGS_INIT="$SANCOV_FLAGS" + ) +fi + +cmake "${cmake_args[@]}" + +cmake --build "$LLVM_BUILD_DIR" \ + --target llc llvm-stress opt llvm-as \ + --parallel "${NINJAJOBS:-$(nproc)}" + +cat <&2 + echo "Run scripts/build_directed_fuzzer.sh first." >&2 + exit 2 +fi + +if [[ "$FUZZX_LOCALIZE_FUZZER" != "0" && "$FUZZX_LOCALIZE_FUZZER" != "false" ]]; then + mkdir -p "$RUNTIME_ROOT/bin" + fuzzer_key="$(printf '%s' "$FUZZER_BIN" | cksum | awk '{print $1}')" + local_fuzzer_bin="$RUNTIME_ROOT/bin/$(basename "$FUZZER_BIN")-$fuzzer_key" + src_size="$(stat -c '%s' "$FUZZER_BIN")" + dst_size="$(stat -c '%s' "$local_fuzzer_bin" 2>/dev/null || echo -1)" + if [[ ! -x "$local_fuzzer_bin" || "$FUZZER_BIN" -nt "$local_fuzzer_bin" || "$src_size" != "$dst_size" ]]; then + cp -f "$FUZZER_BIN" "$local_fuzzer_bin.tmp" + chmod +x "$local_fuzzer_bin.tmp" + mv -f "$local_fuzzer_bin.tmp" "$local_fuzzer_bin" + fi + FUZZER_BIN="$local_fuzzer_bin" +fi + +mkdir -p "$CORPUS_DIR" "$ARTIFACT_DIR" "$FUZZX_FINDINGS_DIR" "$TMPDIR" +"$ROOT/scripts/seed_ir_corpus.sh" "$CORPUS_DIR" + +export TMPDIR +export FUZZX_FINDINGS_DIR +export ASAN_OPTIONS + +exec "$FUZZER_BIN" "$CORPUS_DIR" \ + -artifact_prefix="$ARTIFACT_DIR/" \ + "$@" diff --git a/spirv/scripts/seed_ir_corpus.sh b/spirv/scripts/seed_ir_corpus.sh new file mode 100755 index 0000000..85eb1c2 --- /dev/null +++ b/spirv/scripts/seed_ir_corpus.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# Seed an empty SPIR-V fuzzer corpus with a valid LLVM bitcode module +# targeting spirv64. Mirrors amdgpu/scripts/seed_ir_corpus.sh. + +set -euo pipefail + +ROOT="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")/.." && pwd)" + +if [[ "$#" -ne 1 ]]; then + echo "usage: $0 CORPUS_DIR" >&2 + exit 2 +fi + +CORPUS_DIR="$1" +mkdir -p "$CORPUS_DIR" +if compgen -G "$CORPUS_DIR/*" >/dev/null; then + exit 0 +fi + +find_opt() { + if [[ -n "${LLVM_OPT:-}" ]]; then + printf '%s\n' "$LLVM_OPT" + return 0 + fi + + local candidate + for candidate in \ + "$ROOT/build/llvm-fuzzer/bin/opt" \ + "$ROOT/build/llvm-fuzzer-install/bin/opt" \ + opt; do + if [[ "$candidate" == */* ]]; then + if [[ -x "$candidate" ]]; then + printf '%s\n' "$candidate" + return 0 + fi + elif command -v "$candidate" >/dev/null 2>&1; then + command -v "$candidate" + return 0 + fi + done + return 1 +} + +LLVM_OPT_BIN="$(find_opt)" || { + echo "could not find LLVM opt; set LLVM_OPT=/path/to/opt" >&2 + exit 2 +} + +TMP_LL="$CORPUS_DIR/.seed-$$.ll" +TMP_BC="$CORPUS_DIR/.seed-$$.bc" +trap 'rm -f "$TMP_LL" "$TMP_BC"' EXIT + +cat >"$TMP_LL" <<'EOF' +target triple = "spirv64-unknown-unknown" + +define spir_kernel void @fuzz_kernel(ptr addrspace(1) %in, ptr addrspace(1) %out, i32 %n) { +entry: + %ok = icmp sgt i32 %n, 0 + br i1 %ok, label %body, label %exit + +body: + %v = load i32, ptr addrspace(1) %in, align 4 + %salt = mul i32 %n, -1640531527 + %mix = xor i32 %v, %salt + store i32 %mix, ptr addrspace(1) %out, align 4 + br label %exit + +exit: + ret void +} +EOF + +"$LLVM_OPT_BIN" -o "$TMP_BC" "$TMP_LL" +mv "$TMP_BC" "$CORPUS_DIR/seed.bc" From 2e17d2ed144f9a29c048ee1fd4dff8fe1ead7099 Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Wed, 27 May 2026 15:25:38 +0200 Subject: [PATCH 2/2] Address bot comments --- spirv/fuzzer/llvm_spirv_crash_fuzzer.cpp | 155 +++++++++-------------- 1 file changed, 58 insertions(+), 97 deletions(-) diff --git a/spirv/fuzzer/llvm_spirv_crash_fuzzer.cpp b/spirv/fuzzer/llvm_spirv_crash_fuzzer.cpp index f96d5f0..69287a6 100644 --- a/spirv/fuzzer/llvm_spirv_crash_fuzzer.cpp +++ b/spirv/fuzzer/llvm_spirv_crash_fuzzer.cpp @@ -18,11 +18,9 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/TargetParser/Triple.h" -#include #include #include #include -#include #include #include #include @@ -37,15 +35,6 @@ namespace { constexpr StringRef DefaultTriple = "spirv64-unknown-unknown"; -bool envFlag(const char *Name, bool Default) { - const char *Value = std::getenv(Name); - if (!Value || !*Value) - return Default; - return std::strcmp(Value, "0") != 0 && std::strcmp(Value, "false") != 0 && - std::strcmp(Value, "False") != 0 && std::strcmp(Value, "no") != 0 && - std::strcmp(Value, "off") != 0; -} - const Target *getSPIRVTarget() { static const Target *T = [] { LLVMInitializeSPIRVTargetInfo(); @@ -73,22 +62,16 @@ CodeGenOptLevel codeGenOptLevel(OptimizationLevel Level) { return CodeGenOptLevel::Aggressive; } -TargetMachine *getTargetMachine(StringRef CPU, OptimizationLevel Level) { - static std::unique_ptr O0TM; - static std::unique_ptr O2TM; - std::unique_ptr &TM = - Level == OptimizationLevel::O0 ? O0TM : O2TM; - if (TM) - return TM.get(); - +std::unique_ptr createTargetMachine(StringRef CPU, + OptimizationLevel Level) { Triple TT(DefaultTriple); TargetOptions Options; - TM.reset(getSPIRVTarget()->createTargetMachine( + std::unique_ptr TM(getSPIRVTarget()->createTargetMachine( TT, CPU, "", Options, std::nullopt, std::nullopt, codeGenOptLevel(Level))); if (!TM) std::abort(); - return TM.get(); + return TM; } bool runOptimizationPipeline(Module &M, TargetMachine &TM, @@ -112,29 +95,7 @@ bool runOptimizationPipeline(Module &M, TargetMachine &TM, return !verifyModule(M, &errs()); } -std::string tempPath(StringRef Suffix) { - static std::atomic Counter{0}; - auto Dir = std::filesystem::temp_directory_path(); - auto Now = std::chrono::steady_clock::now().time_since_epoch().count(); - return (Dir / ("fuzzx-spirv-crash-" + std::to_string(getpid()) + "-" + - std::to_string(Now) + "-" + std::to_string(Counter++) + - Suffix.str())) - .string(); -} - -bool writeBytes(StringRef Path, ArrayRef Bytes) { - std::ofstream Out(Path.str(), std::ios::binary); - if (!Out) - return false; - Out.write(Bytes.data(), static_cast(Bytes.size())); - return static_cast(Out); -} - std::optional> emitObject(Module &M, TargetMachine &TM) { - M.setDataLayout(TM.createDataLayout()); - if (verifyModule(M, &errs())) - return std::nullopt; - SmallVector Obj; raw_svector_ostream OS(Obj); legacy::PassManager PM; @@ -279,34 +240,47 @@ CompileResult compileIRModuleToObject(Module &M, StringRef CPU, OptimizationLevel Level, std::string *IRText = nullptr) { CompileResult R; - TargetMachine *TM = getTargetMachine(CPU, Level); - if (!TM) { - R.FailureStage = "target-machine"; + std::unique_ptr TM = createTargetMachine(CPU, Level); + + M.setDataLayout(TM->createDataLayout()); + if (verifyModule(M, &errs())) { + R.FailureStage = "verify"; return R; } - CrashRecoveryContext CRC; - CRC.DumpStackAndCleanupOnFailure = true; - bool Ran = CRC.RunSafely([&]() { - if (!runOptimizationPipeline(M, *TM, Level)) { - R.FailureStage = "opt"; - return; - } - if (IRText) - *IRText = moduleToString(M); - auto Obj = emitObject(M, *TM); - if (!Obj) { - R.FailureStage = "codegen"; - return; + + auto runStage = [&](const char *Stage, auto &&Fn) -> bool { + CrashRecoveryContext CRC; + CRC.DumpStackAndCleanupOnFailure = true; + if (!CRC.RunSafely(std::forward(Fn))) { + R.FailureStage = Stage; + R.Crashed = true; + R.CrashRetCode = CRC.RetCode; + return false; } - R.Object = std::move(*Obj); - R.Success = true; - }); - if (!Ran) { - R.Crashed = true; - R.CrashRetCode = -1; - if (R.FailureStage.empty()) - R.FailureStage = "codegen"; + return true; + }; + + bool PipelineOk = false; + if (!runStage("opt", + [&] { PipelineOk = runOptimizationPipeline(M, *TM, Level); })) + return R; + if (!PipelineOk) { + R.FailureStage = "opt"; + return R; } + + if (IRText) + *IRText = moduleToString(M); + + std::optional> Obj; + if (!runStage("codegen", [&] { Obj = emitObject(M, *TM); })) + return R; + if (!Obj) { + R.FailureStage = "codegen"; + return R; + } + R.Object = std::move(*Obj); + R.Success = true; return R; } @@ -329,18 +303,13 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { std::string O0IR; auto O0Obj = compileIRModuleToObject(*M, CPU, OptimizationLevel::O0, &O0IR); if (!O0Obj.Success) { - if (O0Obj.FailureStage != "validate") { - std::string Stage = "o0-" + O0Obj.FailureStage; - saveFailureFinding(Data, Size, O0IR, - O0Obj.Crashed ? "compiler-crash" - : "compiler-failure", - Stage, - O0Obj.Crashed - ? std::optional(O0Obj.CrashRetCode) - : std::nullopt); - if (O0Obj.Crashed) - std::abort(); - } + saveFailureFinding(Data, Size, O0IR, + O0Obj.Crashed ? "compiler-crash" : "compiler-failure", + "o0-" + O0Obj.FailureStage, + O0Obj.Crashed ? std::optional(O0Obj.CrashRetCode) + : std::nullopt); + if (O0Obj.Crashed) + std::abort(); return 0; } @@ -354,27 +323,19 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { std::string O2IR; auto O2Obj = compileIRModuleToObject(*M2, CPU, OptimizationLevel::O2, &O2IR); if (!O2Obj.Success) { - if (O2Obj.FailureStage != "validate") { - std::string Stage = "o2-" + O2Obj.FailureStage; - saveFailureFinding(Data, Size, O2IR, - O2Obj.Crashed ? "compiler-crash" - : "compiler-failure", - Stage, - O2Obj.Crashed - ? std::optional(O2Obj.CrashRetCode) - : std::nullopt); - if (O2Obj.Crashed) - std::abort(); - } + saveFailureFinding(Data, Size, O2IR, + O2Obj.Crashed ? "compiler-crash" : "compiler-failure", + "o2-" + O2Obj.FailureStage, + O2Obj.Crashed ? std::optional(O2Obj.CrashRetCode) + : std::nullopt); + if (O2Obj.Crashed) + std::abort(); return 0; } // SPIR-V has no host runtime, so we stop after codegen. See ../README.md // for why a faithful differential port (the AMDGPU/PTX pattern) requires // setting up a Vulkan or OpenCL ICD. - (void)envFlag; - (void)writeBytes; - (void)tempPath; return 0; } @@ -392,8 +353,8 @@ extern "C" int LLVMFuzzerInitialize(int *, char ***) { std::abort(); }, nullptr); - StringRef CPU = getCPU(); - (void)getTargetMachine(CPU, OptimizationLevel::O0); - (void)getTargetMachine(CPU, OptimizationLevel::O2); + // Smoke-test target initialization once at startup so any registration + // failure surfaces before the fuzzer enters its hot loop. + (void)createTargetMachine(getCPU(), OptimizationLevel::O0); return 0; }