From 1fc2fb8367d956e9858fbc3d966f9cd2849a8417 Mon Sep 17 00:00:00 2001 From: Maxim Menshikov Date: Fri, 29 May 2026 14:48:31 +0100 Subject: [PATCH 1/3] Inline barriers Signed-off-by: Maxim Menshikov --- .../24_inline_write_barrier_riscv64.patch | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 patches/bflat-runtime/24_inline_write_barrier_riscv64.patch diff --git a/patches/bflat-runtime/24_inline_write_barrier_riscv64.patch b/patches/bflat-runtime/24_inline_write_barrier_riscv64.patch new file mode 100644 index 0000000..78fcff4 --- /dev/null +++ b/patches/bflat-runtime/24_inline_write_barrier_riscv64.patch @@ -0,0 +1,85 @@ +diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h +index 6e36a55a67a..688db223f6a 100644 +--- a/src/coreclr/jit/jitconfigvalues.h ++++ b/src/coreclr/jit/jitconfigvalues.h +@@ -35,6 +35,14 @@ CONFIG_INTEGER(AltJitLimit, "AltJitLimit", 0) + // If AltJit hits an assert, fall back to the fallback JIT. Useful in conjunction with DOTNET_ContinueOnAssert=1 + CONFIG_INTEGER(AltJitSkipOnAssert, "AltJitSkipOnAssert", 0) + ++// zkVM/RISC-V64: uGC is non-moving, non-generational and single-threaded, so a ++// managed reference store needs no card mark or checked write barrier - it is a ++// plain store. When set (default), the RISC-V64 codegen emits ref stores inline ++// instead of calling the (already barrier-free) RhpAssignRef* / ASSIGN_BYREF ++// helpers, saving the t3/t4 arg marshalling, the call/ret and the helper body. ++// Set DOTNET_JitRiscV64InlineWriteBarrier=0 to restore the out-of-line helper call. ++CONFIG_INTEGER(RiscV64InlineWriteBarrier, "JitRiscV64InlineWriteBarrier", 1) ++ + // Breaks when using internal logging on a particular token value. + CONFIG_INTEGER(BreakOnDumpToken, "BreakOnDumpToken", 0xffffffff) + +diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp +index 540d92a9060..b33ad238dc6 100644 +--- a/src/coreclr/jit/codegenriscv64.cpp ++++ b/src/coreclr/jit/codegenriscv64.cpp +@@ -2230,8 +2232,24 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) + } + else + { +- // In the case of a GC-Pointer we'll call the ByRef write barrier helper +- genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); ++ if (JitConfig.RiscV64InlineWriteBarrier() != 0) ++ { ++ // zkVM: the ByRef barrier helper is reduced to a bare ++ // *dst++ = *src++ (uGC has no card table). Inline it to drop ++ // the per-slot call/ret. Mirrors __wrap_RhpByRefAssignRef: ++ // ld t,0(t5); addi t5,t5,8; sd t,0(t3); addi t3,t3,8. ++ emit->emitIns_R_R_I(INS_ld, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); ++ emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_WRITE_BARRIER_SRC_BYREF, ++ REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); ++ emit->emitIns_R_R_I(INS_sd, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); ++ emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_WRITE_BARRIER_DST_BYREF, ++ REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); ++ } ++ else ++ { ++ // In the case of a GC-Pointer we'll call the ByRef write barrier helper ++ genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); ++ } + gcPtrCount--; + } + ++i; +@@ -2908,6 +2926,16 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) + GenTree* addr = tree->Addr(); + + GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree); ++ ++ // zkVM: uGC never relocates or collects, so a GC ref store is just a plain ++ // store. Skip the helper entirely and fall through to the inline-store path ++ // below, which emits a single sd from the data/addr registers (no t3/t4 ++ // marshalling, no call/ret, no helper body). See JitRiscV64InlineWriteBarrier. ++ if ((writeBarrierForm != GCInfo::WBF_NoBarrier) && (JitConfig.RiscV64InlineWriteBarrier() != 0)) ++ { ++ writeBarrierForm = GCInfo::WBF_NoBarrier; ++ } ++ + if (writeBarrierForm != GCInfo::WBF_NoBarrier) + { + // data and addr must be in registers. +diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp +index 54d2b2f399f..cdbaf86905a 100644 +--- a/src/coreclr/jit/lsrariscv64.cpp ++++ b/src/coreclr/jit/lsrariscv64.cpp +@@ -706,7 +706,12 @@ int LinearScan::BuildNode(GenTree* tree) + { + assert(dstCount == 0); + +- if (m_compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree->AsStoreInd())) ++ // zkVM: when ref stores are inlined (see JitRiscV64InlineWriteBarrier ++ // / genCodeForStoreInd), do NOT pin addr/data to t3/t4 - treat the ++ // store as an ordinary indir so the operands stay in whatever ++ // registers LSRA picks and the t3/t4 copies disappear. ++ if ((JitConfig.RiscV64InlineWriteBarrier() == 0) && ++ m_compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree->AsStoreInd())) + { + srcCount = BuildGCWriteBarrier(tree); + break; From e82831e6fb977fcb32c7f49572f05e67115b278a Mon Sep 17 00:00:00 2001 From: Maxim Menshikov Date: Fri, 29 May 2026 15:06:06 +0100 Subject: [PATCH 2/3] Update patch to dotnet 10 Signed-off-by: Maxim Menshikov --- .../24_inline_write_barrier_riscv64.patch | 34 ++++++++----------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/patches/bflat-runtime/24_inline_write_barrier_riscv64.patch b/patches/bflat-runtime/24_inline_write_barrier_riscv64.patch index 78fcff4..30658d0 100644 --- a/patches/bflat-runtime/24_inline_write_barrier_riscv64.patch +++ b/patches/bflat-runtime/24_inline_write_barrier_riscv64.patch @@ -1,27 +1,23 @@ -diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h -index 6e36a55a67a..688db223f6a 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h -@@ -35,6 +35,14 @@ CONFIG_INTEGER(AltJitLimit, "AltJitLimit", 0) - // If AltJit hits an assert, fall back to the fallback JIT. Useful in conjunction with DOTNET_ContinueOnAssert=1 - CONFIG_INTEGER(AltJitSkipOnAssert, "AltJitSkipOnAssert", 0) - +@@ -516,6 +516,14 @@ + CONFIG_INTEGER(JitInlinePolicyProfile, W("JitInlinePolicyProfile"), 0) + CONFIG_INTEGER(JitInlinePolicyProfileThreshold, W("JitInlinePolicyProfileThreshold"), 40) + CONFIG_INTEGER(JitObjectStackAllocation, W("JitObjectStackAllocation"), 0) ++ +// zkVM/RISC-V64: uGC is non-moving, non-generational and single-threaded, so a +// managed reference store needs no card mark or checked write barrier - it is a +// plain store. When set (default), the RISC-V64 codegen emits ref stores inline +// instead of calling the (already barrier-free) RhpAssignRef* / ASSIGN_BYREF -+// helpers, saving the t3/t4 arg marshalling, the call/ret and the helper body. ++// helpers. Kept in the release-visible section so it applies in the shipped JIT. +// Set DOTNET_JitRiscV64InlineWriteBarrier=0 to restore the out-of-line helper call. -+CONFIG_INTEGER(RiscV64InlineWriteBarrier, "JitRiscV64InlineWriteBarrier", 1) -+ - // Breaks when using internal logging on a particular token value. - CONFIG_INTEGER(BreakOnDumpToken, "BreakOnDumpToken", 0xffffffff) ++CONFIG_INTEGER(RiscV64InlineWriteBarrier, W("JitRiscV64InlineWriteBarrier"), 1) + + CONFIG_INTEGER(JitEECallTimingInfo, W("JitEECallTimingInfo"), 0) -diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp -index 540d92a9060..b33ad238dc6 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp -@@ -2230,8 +2232,24 @@ void CodeGen::genCodeForCpObj(GenTreeBlk* cpObjNode) +@@ -2527,8 +2527,24 @@ } else { @@ -48,7 +44,7 @@ index 540d92a9060..b33ad238dc6 100644 gcPtrCount--; } ++i; -@@ -2908,6 +2926,16 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) +@@ -3040,6 +3056,16 @@ GenTree* addr = tree->Addr(); GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree); @@ -65,21 +61,19 @@ index 540d92a9060..b33ad238dc6 100644 if (writeBarrierForm != GCInfo::WBF_NoBarrier) { // data and addr must be in registers. -diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp -index 54d2b2f399f..cdbaf86905a 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp -@@ -706,7 +706,12 @@ int LinearScan::BuildNode(GenTree* tree) +@@ -552,7 +552,12 @@ { assert(dstCount == 0); -- if (m_compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree->AsStoreInd())) +- if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree->AsStoreInd())) + // zkVM: when ref stores are inlined (see JitRiscV64InlineWriteBarrier + // / genCodeForStoreInd), do NOT pin addr/data to t3/t4 - treat the + // store as an ordinary indir so the operands stay in whatever + // registers LSRA picks and the t3/t4 copies disappear. + if ((JitConfig.RiscV64InlineWriteBarrier() == 0) && -+ m_compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree->AsStoreInd())) ++ compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree->AsStoreInd())) { srcCount = BuildGCWriteBarrier(tree); break; From 1af404a18621317787aaaba9953270ad3d16822d Mon Sep 17 00:00:00 2001 From: Maxim Menshikov Date: Fri, 29 May 2026 15:28:26 +0100 Subject: [PATCH 3/3] RiscV64 write barriers: drop config toggle, always inline Simplify the inline write-barrier patch by removing the JitRiscV64InlineWriteBarrier config knob and the out-of-line fallback paths. GC ref stores are now unconditionally emitted as plain inline stores on RISC-V64, since uGC never relocates and the helpers are already barrier-free. LSRA no longer pins addr/data to t3/t4. Signed-off-by: Maxim Menshikov --- .../24_inline_write_barrier_riscv64.patch | 80 ++++++------------- 1 file changed, 26 insertions(+), 54 deletions(-) diff --git a/patches/bflat-runtime/24_inline_write_barrier_riscv64.patch b/patches/bflat-runtime/24_inline_write_barrier_riscv64.patch index 30658d0..565055e 100644 --- a/patches/bflat-runtime/24_inline_write_barrier_riscv64.patch +++ b/patches/bflat-runtime/24_inline_write_barrier_riscv64.patch @@ -1,79 +1,51 @@ ---- a/src/coreclr/jit/jitconfigvalues.h -+++ b/src/coreclr/jit/jitconfigvalues.h -@@ -516,6 +516,14 @@ - CONFIG_INTEGER(JitInlinePolicyProfile, W("JitInlinePolicyProfile"), 0) - CONFIG_INTEGER(JitInlinePolicyProfileThreshold, W("JitInlinePolicyProfileThreshold"), 40) - CONFIG_INTEGER(JitObjectStackAllocation, W("JitObjectStackAllocation"), 0) -+ -+// zkVM/RISC-V64: uGC is non-moving, non-generational and single-threaded, so a -+// managed reference store needs no card mark or checked write barrier - it is a -+// plain store. When set (default), the RISC-V64 codegen emits ref stores inline -+// instead of calling the (already barrier-free) RhpAssignRef* / ASSIGN_BYREF -+// helpers. Kept in the release-visible section so it applies in the shipped JIT. -+// Set DOTNET_JitRiscV64InlineWriteBarrier=0 to restore the out-of-line helper call. -+CONFIG_INTEGER(RiscV64InlineWriteBarrier, W("JitRiscV64InlineWriteBarrier"), 1) - - CONFIG_INTEGER(JitEECallTimingInfo, W("JitEECallTimingInfo"), 0) - --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp -@@ -2527,8 +2527,24 @@ +@@ -2527,8 +2527,15 @@ } else { - // In the case of a GC-Pointer we'll call the ByRef write barrier helper - genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); -+ if (JitConfig.RiscV64InlineWriteBarrier() != 0) -+ { -+ // zkVM: the ByRef barrier helper is reduced to a bare -+ // *dst++ = *src++ (uGC has no card table). Inline it to drop -+ // the per-slot call/ret. Mirrors __wrap_RhpByRefAssignRef: -+ // ld t,0(t5); addi t5,t5,8; sd t,0(t3); addi t3,t3,8. -+ emit->emitIns_R_R_I(INS_ld, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); -+ emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_WRITE_BARRIER_SRC_BYREF, -+ REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); -+ emit->emitIns_R_R_I(INS_sd, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); -+ emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_WRITE_BARRIER_DST_BYREF, -+ REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); -+ } -+ else -+ { -+ // In the case of a GC-Pointer we'll call the ByRef write barrier helper -+ genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE); -+ } ++ // zkVM: the ByRef barrier helper is reduced to a bare ++ // *dst++ = *src++ (uGC has no card table). Inline it to drop the ++ // per-slot call/ret. Mirrors __wrap_RhpByRefAssignRef. ++ emit->emitIns_R_R_I(INS_ld, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, 0); ++ emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_WRITE_BARRIER_SRC_BYREF, ++ REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE); ++ emit->emitIns_R_R_I(INS_sd, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, 0); ++ emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_WRITE_BARRIER_DST_BYREF, ++ REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE); gcPtrCount--; } ++i; -@@ -3040,6 +3056,16 @@ +@@ -3040,6 +3047,12 @@ GenTree* addr = tree->Addr(); GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(tree); + -+ // zkVM: uGC never relocates or collects, so a GC ref store is just a plain -+ // store. Skip the helper entirely and fall through to the inline-store path -+ // below, which emits a single sd from the data/addr registers (no t3/t4 -+ // marshalling, no call/ret, no helper body). See JitRiscV64InlineWriteBarrier. -+ if ((writeBarrierForm != GCInfo::WBF_NoBarrier) && (JitConfig.RiscV64InlineWriteBarrier() != 0)) -+ { -+ writeBarrierForm = GCInfo::WBF_NoBarrier; -+ } ++ // zkVM: uGC never relocates or collects and the RhpAssignRef* helpers are ++ // already reduced to a bare store, so a GC ref store is just a plain store. ++ // Force the inline-store path below (no t3/t4 marshalling, no call/ret). ++ writeBarrierForm = GCInfo::WBF_NoBarrier; + if (writeBarrierForm != GCInfo::WBF_NoBarrier) { // data and addr must be in registers. --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp -@@ -552,7 +552,12 @@ +@@ -552,12 +552,9 @@ { assert(dstCount == 0); - if (compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree->AsStoreInd())) -+ // zkVM: when ref stores are inlined (see JitRiscV64InlineWriteBarrier -+ // / genCodeForStoreInd), do NOT pin addr/data to t3/t4 - treat the -+ // store as an ordinary indir so the operands stay in whatever -+ // registers LSRA picks and the t3/t4 copies disappear. -+ if ((JitConfig.RiscV64InlineWriteBarrier() == 0) && -+ compiler->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(tree->AsStoreInd())) +- { +- srcCount = BuildGCWriteBarrier(tree); +- break; +- } +- ++ // zkVM: ref stores are inlined (see genCodeForStoreInd), so never ++ // take the write-barrier path - always build an ordinary indir so ++ // the operands are not pinned to t3/t4. + srcCount = BuildIndir(tree->AsIndir()); + if (!tree->gtGetOp2()->isContained()) { - srcCount = BuildGCWriteBarrier(tree); - break;