A surprisingly small emulator that runs Linux, Doom, and a C compiler.
The entire CPU is a single C++ file. The instruction decoder is a switch statement.
There are no JIT, no MMU, no page tables. Plays Doom at full speed,
and runs TinyCC — a C compiler compiling C inside the emulator.
Boots Linux 6.1
This project demonstrates that a simple, readable RISC-V implementation can do remarkable things.
| 🎮 Doom | Full DOOM running in RISCV c++ emulation layer. Compiles from source and runs at launch. |
| 🐧 Linux 6.1 | Real nommu kernel boots to an interactive BusyBox shell. |
| ⚙️ TinyCC | A C compiler running inside the emulator, compiling C programs. |
| 🎬 Video | Software-rendered frame sequences at real-time speed. |
| 🌄 Voxel terrain | Height-map voxel renderer written entirely in guest C. |
| 🔊 Audio | PCM playback through the emulated sound peripheral. |
All of this from ~200 lines of core C++ and a thin C# peripheral layer.
template<bool MExt, bool FExt, bool AExt, bool Priv>
static __forceinline void do_step() {
if constexpr (Priv) {
if (check_interrupts()) { regs[0] = 0; mtime++; return; }
if (wfi_pending) { regs[0] = 0; mtime++; return; }
}
const uint32_t instr = mem_read<uint32_t>(pc);
const uint32_t opcode = instr & 0x7F;
const int rd = (instr >> 7) & 0x1F;
const int rs1 = (instr >> 15) & 0x1F;
const int rs2 = (instr >> 20) & 0x1F;
const uint32_t f3 = (instr >> 12) & 0x7;
const uint32_t f7 = (instr >> 25) & 0x7F;
const int32_t s1 = (int32_t)regs[rs1];
const uint32_t u1 = regs[rs1];
const int32_t s2 = (int32_t)regs[rs2];
const uint32_t u2 = regs[rs2];
uint32_t nextpc = pc + 4;
uint32_t trap_cause = 0;
uint32_t trap_tval = 0;
switch (opcode) {
case 0x37: regs[rd] = instr & 0xFFFFF000u; break; // LUI
case 0x17: regs[rd] = pc + (instr & 0xFFFFF000u); break; // AUIPC
case 0x6F: regs[rd] = pc + 4; nextpc = pc + j_imm(instr); break; // JAL
case 0x67: { uint32_t t = (uint32_t)(s1 + i_imm(instr)) & ~1u; // JALR
regs[rd] = pc + 4; nextpc = t; break; }
case 0x63: { // BRANCH
int taken;
switch (f3) {
case 0: taken = u1 == u2; break; case 1: taken = u1 != u2; break;
case 4: taken = s1 < s2; break; case 5: taken = s1 >= s2; break;
case 6: taken = u1 < u2; break; case 7: taken = u1 >= u2; break;
default: taken = 0;
}
if (taken) nextpc = pc + b_imm(instr);
break;
}
case 0x03: { // LOAD
uint32_t addr = (uint32_t)(s1 + i_imm(instr));
switch (f3) {
case 0: regs[rd] = (uint32_t)(int8_t) mem_read<uint8_t> (addr); break;
case 1: regs[rd] = (uint32_t)(int16_t)mem_read<uint16_t>(addr); break;
case 2: regs[rd] = mem_read<uint32_t>(addr); break;
case 4: regs[rd] = mem_read<uint8_t> (addr); break;
case 5: regs[rd] = mem_read<uint16_t>(addr); break;
}
break;
}
case 0x23: { // STORE
uint32_t addr = (uint32_t)(s1 + s_imm(instr));
switch (f3) {
case 0: mem_write<uint8_t> (addr, (uint8_t) u2); break;
case 1: mem_write<uint16_t>(addr, (uint16_t)u2); break;
case 2: mem_write<uint32_t>(addr, u2); break;
}
break;
}
case 0x13: { // OP-IMM
const int32_t imm = i_imm(instr);
const int sh = (instr >> 20) & 0x1F;
uint32_t r;
switch (f3) {
case 0: r = (uint32_t)(s1 + imm); break;
case 1: r = u1 << sh; break;
case 2: r = s1 < imm ? 1u : 0u; break;
case 3: r = u1 < (uint32_t)imm ? 1u : 0u; break;
case 4: r = u1 ^ (uint32_t)imm; break;
case 5: r = f7 == 0x20 ? (uint32_t)(s1 >> sh) : u1 >> sh; break;
case 6: r = u1 | (uint32_t)imm; break;
case 7: r = u1 & (uint32_t)imm; break;
default: r = 0;
}
regs[rd] = r;
break;
}
case 0x33: { // OP
uint32_t r;
if (MExt && f7 == 0x01) {
r = exec_m(f3, s1, u1, s2, u2);
} else {
const int sh = s2 & 0x1F;
switch (f3) {
case 0: r = f7 == 0x20 ? (uint32_t)(s1 - s2) : (uint32_t)(s1 + s2); break;
case 1: r = u1 << sh; break;
case 2: r = s1 < s2 ? 1u : 0u; break;
case 3: r = u1 < u2 ? 1u : 0u; break;
case 4: r = u1 ^ u2; break;
case 5: r = f7 == 0x20 ? (uint32_t)(s1 >> sh) : u1 >> sh; break;
case 6: r = u1 | u2; break;
case 7: r = u1 & u2; break;
default: r = 0;
}
}
regs[rd] = r;
break;
}
case 0x2F: // AMO (RV32A)
if constexpr (AExt) {
const uint32_t irmid = (instr >> 27) & 0x1F;
const uint32_t addr = u1;
if (irmid == 2) { // LR.W
regs[rd] = mem_read<uint32_t>(addr);
rsv_addr = addr;
} else if (irmid == 3) { // SC.W
if (rsv_addr == addr) { mem_write<uint32_t>(addr, u2); regs[rd] = 0; }
else regs[rd] = 1;
rsv_addr = ~0u;
} else {
uint32_t old = mem_read<uint32_t>(addr);
regs[rd] = old;
uint32_t nw;
switch (irmid) {
case 1: nw = u2; break; // SWAP
case 0: nw = old + u2; break; // ADD
case 4: nw = old ^ u2; break; // XOR
case 12: nw = old & u2; break; // AND
case 8: nw = old | u2; break; // OR
case 16: nw = (int32_t)u2 < (int32_t)old ? u2 : old; break; // MIN
case 20: nw = (int32_t)u2 > (int32_t)old ? u2 : old; break; // MAX
case 24: nw = u2 < old ? u2 : old; break; // MINU
case 28: nw = u2 > old ? u2 : old; break; // MAXU
default: nw = old; break;
}
mem_write<uint32_t>(addr, nw);
}
}
break;
case 0x07: // FLW
if constexpr (FExt) if (f3 == 2) fregs[rd] = mem_read<uint32_t>((uint32_t)(s1 + i_imm(instr)));
break;
case 0x27: // FSW
if constexpr (FExt) if (f3 == 2) mem_write<uint32_t>((uint32_t)(s1 + s_imm(instr)), fregs[rs2]);
break;
case 0x43: case 0x47: case 0x4B: case 0x4F: // FMADD/FMSUB/FNMSUB/FNMADD
if constexpr (FExt) {
const int rs3 = (int)((instr >> 27) & 0x1F);
const float fa = f_get(rs1), fb = f_get(rs2), fc = f_get(rs3);
float fr;
switch (opcode) {
case 0x43: fr = fa*fb + fc; break;
case 0x47: fr = fa*fb - fc; break;
case 0x4B: fr = -fa*fb + fc; break;
default: fr = -fa*fb - fc; break;
}
f_set(rd, fr);
}
break;
case 0x53: // OP-FP
if constexpr (FExt) exec_fp_opfp(instr, rd, rs1, rs2, f3, f7);
break;
case 0x0F: break; // FENCE / FENCE.I — NOP
case 0x73: { // SYSTEM
const uint32_t f3s = (instr >> 12) & 0x7;
trap_cause = exec_system<Priv>(instr, rd, f3s, nextpc, trap_tval);
break;
}
} // switch (opcode)
if constexpr (Priv) {
if (trap_cause) { do_trap(trap_cause, trap_tval); regs[0] = 0; mtime++; return; }
}
regs[0] = 0;
pc = nextpc;
if constexpr (Priv) { if (priv_mode == 0) umode_count++; }
mtime++;
}- RV32I — all 40 base instructions
- M-extension —
MUL/MULH/DIV/REMfamily (opt-in, big speedup for Doom) - A-extension — atomic instructions (
LR.W,SC.W,AMO*) needed for Linux SMP primitives - M/S/U privilege modes — CSRs, traps,
MRET/SRET, timer interrupts,WFI - Memory-mapped peripherals — UART, framebuffer, keyboard, mouse, audio, RTC, CLINT
- ELF loader — loads
PT_LOADsegments from standard ELF32 binaries - SDL2 frontend — hardware-accelerated window at ~120 fps via Silk.NET.SDL
- Bare-metal C runtime — libc, malloc, softfloat, VFS, syscall shim for writing guest programs in C
- Integration test suite — compiles C programs to RV32I ELF and asserts on output
Core/ C# emulator engine (P/Invoke shell, memory bus, peripherals)
Native/ C++ CPU hot path (single-file, ClangCL vcxproj)
Frontend/ SDL2 window (rendering, input, audio) via Silk.NET.SDL
Examples/
Doom/ Full Doom port (PureDOOM, compiles at launch)
Linux/ Boot Linux 6.1 nommu kernel to an interactive shell
Runner/ Generic ELF runner with all peripherals wired
Video/ Software-rendered video playback demo
Voxel/ Voxel terrain renderer demo
Sound/ PCM audio playback demo
Input/ Keyboard and mouse input demo
TinyCC/ TinyCC C compiler running inside the emulator
RiscVEmulator.Tests/ Integration tests (compile C → ELF → run → assert)
Programs/ C test programs + linker.ld
Runtime/ Bare-metal C library (libc, malloc, softfloat, syscalls, VFS)
Emulator (C# P/Invoke shell)
│ pins Memory.Data[] → passes IntPtr to native
│
├── rv32i_core.dll (C++ hot path — ClangCL)
│ CPU registers, PC, CSRs, mtime — all live in native
│ step loop runs entirely in C++
│ MMIO / ECALL → callbacks into C#
│
├── MemoryBus Routes MMIO by address range → peripheral
│ ├── Memory Pinned byte-array RAM (shared with C++, zero-copy)
│ └── IPeripheral[] UART, timer, framebuffer, keyboard, mouse, audio, RTC
│
└── ElfLoader Loads PT_LOAD segments, returns entry point
The C# layer allocates RAM and pins it with GCHandle. The native DLL receives an IntPtr and reads/writes directly — no copies. MMIO accesses above the RAM ceiling call back into C#, which routes them through MemoryBus to the appropriate peripheral.
| Address | Size | Device |
|---|---|---|
0x00000000 |
16 MB (configurable) | RAM |
0x02000000 |
64 KB | CLINT Timer (standard SiFive layout) |
0x10000000 |
256 B | UART 16550 (console I/O) |
0x10001000 |
256 B | Keyboard controller (scancode FIFO) |
0x10002000 |
256 B | Mouse controller (relative deltas + buttons) |
0x10003000 |
256 B | Real-Time Clock (wall-clock µs / ms / epoch) |
0x20000000 |
256 KB | Framebuffer (320×200 RGBA8888) |
0x20100000 |
256 B | Display control (resolution, vsync, palette) |
0x30000000 |
1 MB | Audio PCM buffer |
0x30100000 |
256 B | Audio control (sample rate, channels, play/stop) |
See MEMORY_MAP.md for full register-level details.
| Tool | Purpose |
|---|---|
| .NET 10 SDK | Build and run C# projects |
| Visual Studio 2022 with C++ workload + Clang/LLVM | Build native rv32i_core.dll (ClangCL toolset) |
LLVM/Clang in PATH |
Cross-compile guest programs to RV32I ELF |
lld linker in PATH |
Link RV32I ELF binaries (-fuse-ld=lld) |
Windows only. The native project uses the ClangCL toolset and builds a
.dll.
# Full solution — builds C++ DLL and all C# projects
& "C:\Program Files\Microsoft Visual Studio\2022\Community\MSBuild\Current\Bin\MSBuild.exe" RiscVEmulator.slnThe native rv32i_core.dll is automatically copied to every C# output directory via ProjectReference.
The classic. Compiles doom_main.c (using PureDOOM) to RV32I ELF at startup, then runs it with a real doom1.wad.
cd Examples\Doom\bin\Debug\net10.0
dotnet Examples.Doom.dll [--wad path\to\doom.wad] [--scale 3] [--no-grab]- Mouse is grabbed by default; press Escape or Alt+F4 to exit.
--no-m-extdisables the M-extension (much slower — avoid unless testing).
Boots Linux 6.1.14 (nommu, Buildroot) to an interactive shell. Downloads a ~10 MB pre-built kernel image on first run.
cd Examples\Linux\bin\Debug\net10.0
dotnet Examples.Linux.dll --download # first run: fetch kernel + DTB
dotnet Examples.Linux.dll # subsequent runs use cacheLog in as root (no password). Press Ctrl+C to exit the emulator (the signal is not forwarded to the guest).
Options:
--kernel <path> Use a custom kernel flat binary
--dtb <path> Use a custom DTB
--ram <MB> Guest RAM in MB (default: 128)
--download Download and cache the pre-built kernel image
The kernel and DTB are cached in ~/.cache/riscvemu/linux/.
Generic ELF runner. Wires up all peripherals and opens an SDL window.
dotnet Examples.Runner.dll <elf-file> [--scale 3] [--ram 16] [--m-ext] [--load <file> <hex-addr>]| Example | Description |
|---|---|
Examples.Video |
Software video renderer — plays a raw frame sequence |
Examples.Voxel |
Voxel terrain with height-map rendering |
Examples.Sound |
PCM audio playback via the audio peripheral |
Examples.Input |
Keyboard and mouse event demo |
Examples.TinyCC |
TinyCC running inside the emulator — a C compiler in C |
dotnet test --no-build RiscVEmulator.TestsTests compile small C programs with clang → RV32I ELF, load them into the emulator, run them, and assert on console output and exit code. The bare-metal runtime (Runtime/) provides libc, malloc, softfloat, and a VFS shim.
Requirements: clang and lld for riscv32-unknown-elf must be in PATH.
Guest programs are ordinary C compiled for bare-metal RV32I:
clang --target=riscv32-unknown-elf -march=rv32i -mabi=ilp32 \
-nostdlib -O3 -fuse-ld=lld \
-T RiscVEmulator.Tests/Programs/linker.ld \
my_program.c RiscVEmulator.Tests/Runtime/runtime.c \
RiscVEmulator.Tests/Runtime/libc.c \
-o my_program.elfThe runtime provides:
printf/puts/scanf/ string functions (via UART MMIO)malloc/free(heap grows upward from BSS end)- Soft-float and soft-double (IEEE 754 in software)
- VFS with
open/read/write/lseek - Syscall shim (
exit,write) overECALL
Default memory layout:
0x00001000 ELF entry point
↓ .text / .rodata / .data / .bss
↓ heap (grows up)
↑ stack (grows down)
0x009FFF00 initial stack pointer
| Extension | Status | Notes |
|---|---|---|
| RV32I | ✅ All 40 instructions | |
| M | ✅ opt-in | Emulator.EnableMExtension = true |
| A | ✅ opt-in | Emulator.EnableAExtension = true (required for Linux) |
| Zicsr | ✅ | When EnablePrivMode = true |
| M/S/U privilege | ✅ opt-in | Emulator.EnablePrivMode = true (required for Linux) |
| F / D (float) | ❌ hardware | Use softfloat.c in guest |
| Interrupts | ✅ | Timer interrupt via CLINT; requires EnablePrivMode |
| FENCE | NOP | |
| EBREAK | halts CPU |
| a7 | Name | Behavior |
|---|---|---|
| 64 | write |
Output bytes to UartDevice.OutputHandler |
| 93 | exit |
Halt emulator, set ExitCode = a0 |
| 94 | exit_group |
Same as exit |
Other syscall numbers are silently ignored. The runtime in Runtime/syscalls.c maps additional POSIX calls (time, file I/O via VFS) onto these.