diff --git a/Contributing.md b/Contributing.md new file mode 100644 index 000000000..f44d3c877 --- /dev/null +++ b/Contributing.md @@ -0,0 +1,9 @@ +# Contributing + +Contributions are welcome! You can help by: +- Adding new RISC-V instructions or extensions +- Improving the pipeline or memory modules +- Adding more RISCoF test cases +- Fixing bugs in existing modules + +Please fork the repository, create a feature branch, and submit a pull request. diff --git a/NucleusRV_B.png b/NucleusRV_B.png new file mode 100644 index 000000000..9b9b78b1f Binary files /dev/null and b/NucleusRV_B.png differ diff --git a/NucleusRV_w.png b/NucleusRV_w.png new file mode 100644 index 000000000..655b961d5 Binary files /dev/null and b/NucleusRV_w.png differ diff --git a/README.md b/README.md index cff73210b..b2d91e29c 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,24 @@ -# NucleusRV + + + + NucleusRV Logo + [![Join the chat at https://gitter.im/merledu/nucleusrv](https://badges.gitter.im/merledu/nucleusrv.svg)](https://gitter.im/merledu/nucleusrv?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) A chisel based riscv 5-stage pipelined cpu design, implementing 32-bit version of the ISA (incomplete). +## Supported Extensions + +| Extension | Description | +|-----------|-------------| +| I | Base Integer Instructions | +| M | Integer Multiplication & Division | +| F | Single-Precision Floating Point | +| C | Compressed Instructions | +| A | Atomic Instructions | + + ## Dependencies @@ -28,16 +43,43 @@ python3 gen_verilog.py ### Running RISC-V assembly ```sh -python3 simulate.py --sbt_args "--imem " nucleusrv.components.NRVDriver Top +python3 simulate.py --sbt_args "--imem " nucleusrv.components.NRVDriver Top + ``` ### Running RISC-V Architectural Tests * Make sure to have the RISC-V GNU Toolchain and Verilator in your `PATH`. -* Create a python virtual environment and follow the `README.md` in `riscof/riscv-arch-test/`. +* Create a python virtual environment. +```sh +python3 -m venv .venv +source .venv/bin/activate +``` +* Install Python Dependencies for RISCoF +```sh +pip install --upgrade pip +pip install git+https://github.com/riscv/riscof.git +``` * Run `run_riscv_arch_tests.py` in root directory. ```sh python3 run_riscv_arch_tests.py ``` +* RISCoF Architecture Tests: +```sh +cd riscof/riscv-arch-test/riscv-ctg +pip install -e . +``` +```sh +cd ../riscv-isac +pip install -e . +cd ../../.. +``` +* Verify Installation +```sh +riscof --help +spike --help +``` +You should see RISCoF and Spike help options displayed. + ### Building C Programs * In `tools/tests` directory, create a folder and write c program in the `main.c` file diff --git a/nrv_B.png b/nrv_B.png new file mode 100644 index 000000000..2f19f7f85 Binary files /dev/null and b/nrv_B.png differ diff --git a/src/main/scala/components/AtomicAlu.scala b/src/main/scala/components/AtomicAlu.scala new file mode 100644 index 000000000..9de7b7930 --- /dev/null +++ b/src/main/scala/components/AtomicAlu.scala @@ -0,0 +1,49 @@ +package nucleusrv.components +import chisel3._ +import chisel3.util._ + +class AMOALU extends Module { + val io = IO(new Bundle { + val memData = Input(UInt(32.W)) // Data loaded from memory (old value at rs1 address) + val src2 = Input(UInt(32.W)) // Value from rs2 register + val amoOp = Input(UInt(4.W)) // Operation code from decoder + val result = Output(UInt(32.W)) // Computed result to write back + }) + io.result := 0.U + + // AMOoperation encodin + val AMO_ADD = 1.U + val AMO_SWAP = 2.U + val AMO_XOR = 3.U + val AMO_AND = 4.U + val AMO_OR = 5.U + val AMO_MIN = 6.U + val AMO_MAX = 7.U + val AMO_MINU = 8.U + val AMO_MAXU = 9.U + + // Signed for comparisons + val s_memData = io.memData.asSInt + val s_src2 = io.src2.asSInt + + switch(io.amoOp) { + is(AMO_ADD) { + io.result := io.memData + io.src2 } + is(AMO_SWAP) { + io.result := io.src2 } // Swap old with new + is(AMO_XOR) { + io.result := io.memData ^ io.src2 } + is(AMO_AND) { + io.result := io.memData & io.src2 } + is(AMO_OR) { + io.result := io.memData | io.src2 } + is(AMO_MIN) { + io.result := Mux(s_memData < s_src2, io.memData, io.src2) } + is(AMO_MAX) { + io.result := Mux(s_memData > s_src2, io.memData, io.src2) } + is(AMO_MINU) { + io.result := Mux(io.memData < io.src2, io.memData, io.src2) } + is(AMO_MAXU) { + io.result := Mux(io.memData > io.src2, io.memData, io.src2) } + } +} diff --git a/src/main/scala/components/AtomicDecoder.scala b/src/main/scala/components/AtomicDecoder.scala new file mode 100644 index 000000000..46c68074e --- /dev/null +++ b/src/main/scala/components/AtomicDecoder.scala @@ -0,0 +1,67 @@ +package nucleusrv.components +import chisel3._ +import chisel3.util._ + + +class AtomicDecodeOut extends Bundle { + val isLR = Bool() + val isSC = Bool() + val isAMO = Bool() + val amoOp = UInt(4.W) // Encoded value for AMO operation type +} + +class AtomicDecoder extends Module { + val io = IO(new Bundle { + val instr = Input(UInt(32.W)) + val out = Output(new AtomicDecodeOut) + }) + + io.out.isLR := false.B + io.out.isSC := false.B + io.out.isAMO := false.B + io.out.amoOp := 0.U + + val opcode = io.instr(6,0) + val funct5 = io.instr(31,27) + val funct3 = io.instr(14,12) + + // opcode for atomic instructions + val OPCODE_ATOMIC = "b0101111".U + + when(opcode === OPCODE_ATOMIC && funct3 === "b010".U) { + switch(funct5) { + is("b00010".U) { + io.out.isLR := true.B } // LR.W + is("b00011".U) { + io.out.isSC := true.B } // SC.W + + is("b00000".U) { + io.out.isAMO := true.B; + io.out.amoOp := 1.U } // AMOADD + is("b00001".U) { + io.out.isAMO := true.B; + io.out.amoOp := 2.U } // AMOSWAP + is("b00100".U) { + io.out.isAMO := true.B; + io.out.amoOp := 3.U } // AMOXOR + is("b01100".U) { + io.out.isAMO := true.B; + io.out.amoOp := 4.U } // AMOAND + is("b01000".U) { + io.out.isAMO := true.B; + io.out.amoOp := 5.U } // AMOOR + is("b10000".U) { + io.out.isAMO := true.B; + io.out.amoOp := 6.U } // AMOMIN + is("b10100".U) { + io.out.isAMO := true.B; + io.out.amoOp := 7.U } // AMOMAX + is("b11000".U) { + io.out.isAMO := true.B; + io.out.amoOp := 8.U } // AMOMINU + is("b11100".U) { + io.out.isAMO := true.B; + io.out.amoOp := 9.U } // AMOMAXU + } + } +} diff --git a/src/main/scala/components/Bus.scala b/src/main/scala/components/Bus.scala new file mode 100644 index 000000000..1ec31dc33 --- /dev/null +++ b/src/main/scala/components/Bus.scala @@ -0,0 +1,40 @@ +package nucleusrv.components + +import chisel3._ +import chisel3.util._ + +class BusIO(addrWidth: Int, dataWidth: Int) extends Bundle { + val addr = Input(UInt(addrWidth.W)) + val wdata = Input(UInt(dataWidth.W)) + val rdata = Output(UInt(dataWidth.W)) + val wen = Input(Bool()) + val ren = Input(Bool()) +} + +class Bus(addrWidth: Int = 32, dataWidth: Int = 32) extends Module { + val io = IO(new Bundle { + val cpu = Flipped(new BusIO(addrWidth, dataWidth)) + val imem = new BusIO(addrWidth, dataWidth) + val dmem = new BusIO(addrWidth, dataWidth) + }) + + // Default signals + io.imem := 0.U.asTypeOf(io.imem) + io.dmem := 0.U.asTypeOf(io.dmem) + io.cpu.rdata := 0.U + + // Address decode: <0x1000_0000 -> imem, else -> dmem + when(io.cpu.addr < "h10000000".U) { + io.imem.addr := io.cpu.addr + io.imem.wdata := io.cpu.wdata + io.imem.wen := io.cpu.wen + io.imem.ren := io.cpu.ren + io.cpu.rdata := io.imem.rdata + }.otherwise { + io.dmem.addr := io.cpu.addr + io.dmem.wdata := io.cpu.wdata + io.dmem.wen := io.cpu.wen + io.dmem.ren := io.cpu.ren + io.cpu.rdata := io.dmem.rdata + } +} diff --git a/src/main/scala/components/Core.scala b/src/main/scala/components/Core.scala index a548fcff2..cbd169a91 100644 --- a/src/main/scala/components/Core.scala +++ b/src/main/scala/components/Core.scala @@ -57,6 +57,13 @@ class Core(implicit val config:Configs) extends Module{ val id_reg_fcsr_o_data = if (F) Some(RegInit(0.U(32.W))) else None val id_reg_is_f = if (F) Some(RegInit(0.B)) else None + // Atomic signals ID-EX + val id_reg_isAMO = RegInit(false.B) + val id_reg_isLR = RegInit(false.B) + val id_reg_isSC = RegInit(false.B) + val id_reg_amoOp = RegInit(0.U(4.W)) + + // EX-MEM Registers val ex_reg_branch = RegInit(0.U(32.W)) val ex_reg_zero = RegInit(0.U(32.W)) @@ -76,7 +83,10 @@ class Core(implicit val config:Configs) extends Module{ val ex_reg_f_read = if (F) Some(Reg(Vec(3, Bool()))) else None val ex_reg_f_except = if (F) Some(RegInit(VecInit(Vector.fill(5)(0.B)))) else None val ex_reg_is_f = if (F) Some(RegInit(0.B)) else None - + // Atomic signals EX-MEM + val ex_reg_isAMO = RegInit(false.B) + val ex_reg_amoOp = RegInit(0.U(4.W)) + // MEM-WB Registers val mem_reg_rd = RegInit(0.U(32.W)) val mem_reg_ins = RegInit(0.U(32.W)) @@ -180,7 +190,7 @@ class Core(implicit val config:Configs) extends Module{ when(ID.ifid_flush) { if_reg_ins := 0.U } - + /**************** * Decode Stage * @@ -230,6 +240,11 @@ class Core(implicit val config:Configs) extends Module{ ID.f_read_reg.get(2)(i) := mem_reg_f_read.get(i) } } + // ID-EX A + id_reg_isAMO := ID.isAMO + id_reg_isLR := ID.isLR + id_reg_isSC := ID.isSC + id_reg_amoOp := ID.amoOp /***************** * Execute Stage * @@ -283,6 +298,18 @@ class Core(implicit val config:Configs) extends Module{ ex_reg_is_f.get := EX.is_f_o.get ID.f_except.get(0) <> EX.exceptions.get } + // forward atomic control from ID->EX to EX->MEM + ex_reg_isAMO := id_reg_isAMO + ex_reg_amoOp := id_reg_amoOp + + // AMO ALU instance (use in MEM stage) + val amoALU = Module(new AMOALU) + + // ----- AMO wiring in Memory stage ----- + // Connect AMOALU inputs to data read from memory.. rs2 (ex_reg_wd) + amoALU.io.memData := MEM.io.readData + amoALU.io.src2 := ex_reg_wd + amoALU.io.amoOp := ex_reg_amoOp /**************** * Memory Stage * @@ -302,7 +329,12 @@ class Core(implicit val config:Configs) extends Module{ // // } otherwise{ mem_reg_rd := MEM.io.readData - mem_reg_result := ex_reg_result + // mem_reg_result := ex_reg_result + + // If this is an AMO, WB should receive the original memory value (MEM.io.readData). + // For other cases keep previous behavior. + mem_reg_result := Mux(ex_reg_isAMO, MEM.io.readData, ex_reg_result) + // mem_reg_ctl_memToReg := ex_reg_ctl_memToReg mem_reg_ctl_regWrite <> ex_reg_ctl_regWrite mem_reg_ins := ex_reg_ins @@ -313,14 +345,28 @@ class Core(implicit val config:Configs) extends Module{ ex_reg_result := EX.ALUresult // } mem_reg_wra := ex_reg_wra - mem_reg_ctl_memToReg := ex_reg_ctl_memToReg + // mem_reg_ctl_memToReg := ex_reg_ctl_memToReg + + // Force the memToReg selector to choose memory result for AMO + // (assuming memToReg==1 means load -> uses MEM.io.readData in WB) + mem_reg_ctl_memToReg := Mux(ex_reg_isAMO, 1.U, ex_reg_ctl_memToReg) + mem_reg_is_csr := ex_reg_is_csr mem_reg_csr_data := ex_reg_csr_data EX.ex_mem_regWrite <> ex_reg_ctl_regWrite MEM.io.aluResultIn := ex_reg_result - MEM.io.writeData := ex_reg_wd - MEM.io.readEnable := ex_reg_ctl_memRead - MEM.io.writeEnable := ex_reg_ctl_memWrite + // MEM.io.writeData := ex_reg_wd +// If ex_reg_isAMO: writeData should be amoALU result; otherwise normal ex_reg_wd + MEM.io.writeData := Mux(ex_reg_isAMO, amoALU.io.result, ex_reg_wd) + + // MEM.io.readEnable := ex_reg_ctl_memRead + // For readEnable we keep original control (AMO still needs a read) + MEM.io.readEnable := ex_reg_ctl_memRead || ex_reg_isAMO + + //MEM.io.writeEnable := ex_reg_ctl_memWrite + // Ensure we assert writeEnable for AMO (RMW needs write back) + MEM.io.writeEnable := ex_reg_ctl_memWrite || ex_reg_isAMO + MEM.io.f3 := ex_reg_ins(14,12) EX.mem_result := ex_reg_result ID.csr_Mem := ex_reg_is_csr diff --git a/src/main/scala/components/InstructionDecode.scala b/src/main/scala/components/InstructionDecode.scala index a87e902cd..991658601 100755 --- a/src/main/scala/components/InstructionDecode.scala +++ b/src/main/scala/components/InstructionDecode.scala @@ -83,6 +83,12 @@ class InstructionDecode( // RVFI pins val raddr = if (TRACE) Some(Output(Vec(3, UInt(5.W)))) else None val rd_wdata = if (TRACE) Some(Output(UInt(32.W))) else None + + // Atomic Outputpins + val isAMO = Bool() + val isLR = Bool() + val isSC = Bool() + val amoOp = UInt(4.W) }) val is_f = if (F) Some(WireInit(0.B)) else None @@ -98,6 +104,16 @@ class InstructionDecode( ).map(io.id_instruction(6, 0) === _.U).reduce(_ || _) io.is_f.get := is_f.get } + // Atomic Decoder + val atomicDecoder = Module(new AtomicDecoder) + atomicDecoder.io.instr := io.id_instruction + + io.isAMO := atomicDecoder.io.out.isAMO + io.isLR := atomicDecoder.io.out.isLR + io.isSC := atomicDecoder.io.out.isSC + io.amoOp := atomicDecoder.io.out.amoOp + + // CSR val csr = if (Zicsr) Some(Module(new CSR())) else None diff --git a/src/main/scala/components/top_new.scala b/src/main/scala/components/top_new.scala new file mode 100644 index 000000000..541908c00 --- /dev/null +++ b/src/main/scala/components/top_new.scala @@ -0,0 +1,115 @@ +package nucleusrv.components + +import chisel3._ +import chisel3.stage.ChiselStage +import nucleusrv.tracer._ + +class Top(programFile: Option[String], dataFile: Option[String]) extends Module { + + val io = IO(new Bundle { + val pin = Output(UInt(32.W)) + val rvfi = new TracerO + }) + + // Core configuration + implicit val config: Configs = Configs( + XLEN = 32, + M = true, + F = true, + C = false, + Zicsr = true, + TRACE = true + ) + + // 2 NucleusRV core + val core0 = Module(new Core()) + val core1 = Module(new Core()) + core0.io.stall := false.B + core1.io.stall := false.B + + // caches (16KB each 64B line) + val icache0 = Module(new Cache(16384, 64, CacheAccessType.ReadOnly)) // icache for Core0 + val dcache0 = Module(new Cache(16384, 64, CacheAccessType.ReadWrite)) // dcache for Core0 + + val icache1 = Module(new Cache(16384, 64, CacheAccessType.ReadOnly)) // icache for Core1 + val dcache1 = Module(new Cache(16384, 64, CacheAccessType.ReadWrite)) // dcache for Core1 + + // Shared Bus + Memory system + val bus0 = Module(new Bus()) // Core0 bus to imem,dmem + val bus1 = Module(new Bus()) // Core1 bus to imem,dmem + + val imem = Module(new SRamTop(programFile)) + val dmem = Module(new SRamTop(dataFile)) + + // Connect Core0 to dcaches, icache + core0.io.imemReq.addr := icache0.io.addr + core0.io.imemRsp.data := icache0.io.rdata + core0.io.imemRsp.valid := icache0.io.valid + + core0.io.dmemReq.addr := dcache0.io.addr + core0.io.dmemReq.wdata := dcache0.io.wdata + core0.io.dmemReq.wen := dcache0.io.wen + core0.io.dmemRsp.data := dcache0.io.rdata + core0.io.dmemRsp.valid := dcache0.io.valid + + // Connect Core1 to dcaches, icache + + core1.io.imemReq.addr := icache1.io.addr + core1.io.imemRsp.data := icache1.io.rdata + core1.io.imemRsp.valid := icache1.io.valid + + core1.io.dmemReq.addr := dcache1.io.addr + core1.io.dmemReq.wdata := dcache1.io.wdata + core1.io.dmemReq.wen := dcache1.io.wen + core1.io.dmemRsp.data := dcache1.io.rdata + core1.io.dmemRsp.valid := dcache1.io.valid + + + // Connect caches to bus + + // Core0 cache to Bus0 + bus0.io.cpu.addr := Mux(icache0.io.valid, icache0.io.addr, dcache0.io.addr) + bus0.io.cpu.wdata := dcache0.io.wdata + bus0.io.cpu.wen := dcache0.io.wen + bus0.io.cpu.ren := !dcache0.io.wen + dcache0.io.rdata := bus0.io.cpu.rdata + icache0.io.rdata := bus0.io.cpu.rdata + + // Core1 caches to Bus1 + bus1.io.cpu.addr := Mux(icache1.io.valid, icache1.io.addr, dcache1.io.addr) + bus1.io.cpu.wdata := dcache1.io.wdata + bus1.io.cpu.wen := dcache1.io.wen + bus1.io.cpu.ren := !dcache1.io.wen + dcache1.io.rdata := bus1.io.cpu.rdata + icache1.io.rdata := bus1.io.cpu.rdata + + + // Shared Memory connection + bus0.io.imem <> imem.io + bus0.io.dmem <> dmem.io + + bus1.io.imem <> imem.io + bus1.io.dmem <> dmem.io + + io.pin := core0.io.pin | core1.io.pin + + if (config.TRACE) { + val tracer = Module(new Tracer) + tracer.rvfi_i <> core0.io.rvfi.get + io.rvfi <> tracer.rvfi_o + } +} + +object NRVDriver { + def main(args: Array[String]): Unit = { + val IMem = if (args.contains("--imem")) Some(args(args.indexOf("--imem") + 1)) else None + val DMem = if (args.contains("--dmem")) Some(args(args.indexOf("--dmem") + 1)) else None + new ChiselStage().emitVerilog( + new Top(IMem, DMem), + if (args.contains("--target-dir")) args.slice( + args.indexOf("--target-dir"), + args.indexOf("--target-dir") + 2 + ) else Array() + ) + } +}