diff --git a/.gitignore b/.gitignore
index c9a4dc03a..3f67d5ca1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -369,3 +369,7 @@ riscof_work/
.ammonite/
metals.sbt
mem_data/
+
+.docs_venv/
+docs/_build/
+
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 000000000..edb57da7e
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,27 @@
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the OS, Python version, and other tools you might need
+build:
+ os: ubuntu-24.04
+ tools:
+ python: "3.11"
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+ configuration: docs/conf.py
+
+python:
+ install:
+ - requirements: docs/requirements.txt
+
+# Optionally, but recommended,
+# declare the Python requirements required to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+# python:
+# install:
+# - requirements: docs/requirements.txt
+
diff --git a/docs/01_overview/index.rst b/docs/01_overview/index.rst
deleted file mode 100644
index 15a27911a..000000000
--- a/docs/01_overview/index.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-Overview of NucleusRV
-=====================
-
-NucluesRV is an embedded 32 bit RISC-V core. It is written in Chisel and it implements base ISA I,
-compressed instructions `C`, multipy and divsion instructions `M` and floating point unit `F`.
diff --git a/docs/02_user/index.rst b/docs/02_user/index.rst
deleted file mode 100644
index 7c6131540..000000000
--- a/docs/02_user/index.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-NucleusRV User Guide
-====================
-
-This user guide provides information necessary to setup and run NucluesRV. It is aimed at
-software developers writing software for NucleusRV and hardware developers integrating
-NucleusRV into a design
diff --git a/docs/03_dev/images/if_module.svg b/docs/03_dev/images/if_module.svg
deleted file mode 100644
index 80c67ee0e..000000000
--- a/docs/03_dev/images/if_module.svg
+++ /dev/null
@@ -1,3 +0,0 @@
-
-
-
\ No newline at end of file
diff --git a/docs/03_dev/index.rst b/docs/03_dev/index.rst
deleted file mode 100644
index 531c0785e..000000000
--- a/docs/03_dev/index.rst
+++ /dev/null
@@ -1,12 +0,0 @@
-NucleusRV Developer Guide
-=========================
-
-This section gives detailed explanation of the source code and different design decision.
-It describes contribution guidelines and will be helpful for people making changes to
-NucleusRV code base.
-
-.. toctree::
- :maxdepth: 1
- :caption: In this section
-
- instruction_fetch.rst
diff --git a/docs/03_dev/instruction_fetch.rst b/docs/03_dev/instruction_fetch.rst
deleted file mode 100644
index 5d68196d6..000000000
--- a/docs/03_dev/instruction_fetch.rst
+++ /dev/null
@@ -1,22 +0,0 @@
-Instruction Fetch
-=================
-
-.. figure:: images/if_module.svg
- :name: if_stage
- :align: center
-
- Instruction Fetch (IF) stage
-
-The Instruction Fetch (IF) stage fetches one instruction from the memory, increments the PC and supplys the instruction to Instruction Decode (ID) stage. It serves one instruction per cycle.
-
-The core supports misaligned instruction address by allowing the program counter to increment by 2. This happens when a compressed instruction is encountered. The :code:`is_comp` signal denotes that whehter we have received compressed instruction.
-
-We cannot pass misaligned address that we may have ended up with after incrementing the program counter by 2 and thats where the Realigner module comes in. This module takes as input the instruction address (PC) and corresponding instruction and make sures that instruction address (PC) is word aligned. The module operates as follows: If the address is aligned, it is passed to instruction memory to fetch the instruction as is. If it is misaligned, the state machine performs the following actions:
-
-1. Store the upper half-word of current instruction, halt the PC for one cycle, and send the address to the next instruction. Meanwhile, NOP will be fed to the core.
-
-2. After one cycle, when the instruction arrives, the lower half-word of this instruction is concatenated with the previously stored upper-half word. And this new instruction will be fed to the core.
-
-The instruction is then passed to Compressed Decoder which decodes the 16 bit instruction instruction into its equivalent 32 bit instruction and also sets the :code:`is_comp` flag. The next pc address is calculated on the basis of this flag, that is, it increments by 2 if it is true and by 4 if false.
-
-The calculations for jump and branch addresses are done in Instruction Decode stage and then passed to Instruction fetch when the branch is taken.
diff --git a/docs/Makefile b/docs/Makefile
index 057e40abb..d4bb2cbb9 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -1,4 +1,3 @@
-
# Minimal makefile for Sphinx documentation
#
@@ -7,7 +6,7 @@
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
-BUILDDIR = build
+BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
diff --git a/docs/concepts/configuration.rst b/docs/concepts/configuration.rst
new file mode 100644
index 000000000..8f4203ff3
--- /dev/null
+++ b/docs/concepts/configuration.rst
@@ -0,0 +1,51 @@
+Configuring NucleusRV: Extensions and Core Options
+==================================================
+
+NucleusRV is highly configurable via the ``Configs`` case class in Scala/Chisel.
+
+The Configs Case Class
+----------------------
+
+.. code-block:: scala
+
+ case class Configs(
+ XLEN : Int = 32,
+ M : Boolean = true,
+ F : Boolean = true,
+ C : Boolean = false,
+ A : Boolean = true,
+ Zicsr : Boolean = true,
+ TRACE : Boolean = true,
+ HARTID : Int = 1,
+ ARCHID : Int = 1
+ )
+
+Parameter Reference
+-------------------
+
+- ``XLEN``: Data width (currently only 32 is supported).
+- ``M``: Enable/Disable Integer Multiply/Divide.
+- ``F``: Enable/Disable Floating-Point Unit.
+- ``C``: Enable/Disable Compressed instruction support.
+- ``A``: Enable/Disable Atomic Memory Operations.
+- ``Zicsr``: Enable/Disable CSR instructions.
+- ``TRACE``: Enable/Disable RVFI tracing (useful for verification).
+- ``HARTID``: Hardware Thread ID (drives ``mhartid`` CSR).
+- ``ARCHID``: Architecture ID (drives ``marchid`` CSR).
+
+Instantiating with Custom Configuration
+--------------------------------------
+
+In your top-level Chisel module, you can instantiate the core with a custom config:
+
+.. code-block:: scala
+
+ implicit val config: Configs = Configs(M = true, C = true, TRACE = false)
+ val core = Module(new Core())
+
+Common Configurations
+---------------------
+
+- **Minimal RV32I**: Disable all optional extensions to minimize area.
+- **Full RV32IMAFC**: Enable all supported extensions.
+- **Synthesis Config**: Disable ``TRACE`` when synthesizing for hardware to save area.
diff --git a/docs/concepts/isa-support.rst b/docs/concepts/isa-support.rst
new file mode 100644
index 000000000..e708c04a2
--- /dev/null
+++ b/docs/concepts/isa-support.rst
@@ -0,0 +1,43 @@
+RISC-V ISA Extensions Supported by NucleusRV
+==========================================
+
+NucleusRV implements the RV32IMAFC instruction set.
+
+Supported Extensions
+--------------------
+
+- **I (Base Integer)**: Always enabled. Includes ALU, Load/Store, and Branch instructions.
+- **M (Integer Multiply/Divide)**: Configurable. Implements multiplication and division.
+- **A (Atomic Memory Operations)**: Configurable. Implements Load-Reserved/Store-Conditional and Atomic Memory Operations.
+- **F (Single-Precision Floating-Point)**: Configurable. Implements IEEE 754 floating-point instructions.
+- **C (Compressed Instructions)**: Configurable. Implements 16-bit compressed instructions to reduce code size.
+- **Zicsr (CSR Instructions)**: Configurable. Enables access to Control and Status Registers.
+
+Extension Details
+-----------------
+
+M Extension
+~~~~~~~~~~~
+- Multiplier: Single-cycle multiplication.
+- Divider: Multi-cycle (up to 34 cycles) division/remainder unit.
+
+A Extension
+~~~~~~~~~~~
+- LR.W / SC.W: Implemented using a reservation file.
+- AMOs: Two-cycle read-modify-write sequence.
+
+C Extension
+~~~~~~~~~~~
+- Realigner: Handles misaligned instruction boundaries.
+- CompressedDecoder: Expands 16-bit instructions to 32-bit equivalents.
+
+MISA Register
+-------------
+
+The ``misa`` register reports the supported extensions based on the core configuration.
+The extensions bits are:
+- Bit 8 (I): Base Integer.
+- Bit 12 (M): Multiplication.
+- Bit 0 (A): Atomics.
+- Bit 5 (F): Floating-Point.
+- Bit 2 (C): Compressed.
diff --git a/docs/concepts/memory-interface.rst b/docs/concepts/memory-interface.rst
new file mode 100644
index 000000000..060ec3a04
--- /dev/null
+++ b/docs/concepts/memory-interface.rst
@@ -0,0 +1,36 @@
+NucleusRV Memory Interface: Instruction and Data Ports
+======================================================
+
+NucleusRV uses separate instruction and data memory ports (Harvard architecture) based on a simple Decoupled request/response protocol.
+
+Bundle Definitions
+------------------
+
+The memory interface uses ``MemRequestIO`` and ``MemResponseIO`` bundles wrapped in ``Decoupled`` interfaces.
+
+MemRequestIO Fields
+~~~~~~~~~~~~~~~~~~~
+
+- ``addrRequest``: 32-bit byte address.
+- ``dataRequest``: 32-bit write data (ignored on reads).
+- ``activeByteLane``: 4-bit byte-enable mask.
+- ``isWrite``: 1 for write, 0 for read.
+
+MemResponseIO Fields
+~~~~~~~~~~~~~~~~~~~~
+
+- ``dataResponse``: 32-bit read data.
+
+Handshake Protocol
+------------------
+
+A transaction occurs when both ``valid`` and ``ready`` are asserted on the same clock edge.
+
+- **Request**: Core drives ``valid`` and request fields; Memory drives ``ready``.
+- **Response**: Memory drives ``valid`` and ``dataResponse``; Core drives ``ready``.
+
+Timing
+------
+
+- **Instruction Port**: Fetches occur every cycle unless stalled or waiting for response.
+- **Data Port**: Stores and loads initiate in the EX/MEM stage transition and complete when ``dmemRsp.valid`` is asserted.
diff --git a/docs/concepts/pipeline.rst b/docs/concepts/pipeline.rst
new file mode 100644
index 000000000..37b8e0e0f
--- /dev/null
+++ b/docs/concepts/pipeline.rst
@@ -0,0 +1,26 @@
+NucleusRV 5-Stage Pipeline Architecture
+=======================================
+
+NucleusRV uses a classic 5-stage pipeline to achieve balanced throughput and frequency.
+
+Pipeline Stages
+---------------
+
+1. **Instruction Fetch (IF)**: Fetches instructions from instruction memory. Includes the PC unit, Realigner, and Compressed Decoder.
+2. **Instruction Decode (ID)**: Decodes the instruction, reads the register file, and generates control signals. Includes the Immediate Generator and Hazard Unit.
+3. **Execute (EX)**: Performs ALU operations, multiplication, division, and floating-point operations. Handles data forwarding.
+4. **Memory (MEM)**: Accesses data memory for load and store instructions. Manages atomic memory operations.
+5. **Write-Back (WB)**: Writes results back to the register file.
+
+Hazard Handling
+---------------
+
+- **Data Hazards**: Handled via forwarding from EX and MEM stages to the ID stage.
+- **Load-Use Hazards**: Handled by stalling the pipeline for one cycle.
+- **Control Hazards**: Branches and jumps are resolved in the ID stage, resulting in a 1-cycle penalty for taken branches.
+- **Atomic Hazards**: Special handling for read-modify-write sequences in the MEM stage.
+
+Pipeline Registers
+------------------
+
+Inter-stage registers are named following the ``_reg_`` pattern (e.g., ``if_reg_pc``, ``ex_reg_result``).
diff --git a/docs/conf.py b/docs/conf.py
index a9a5f18b8..bffc7930d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,35 +1,27 @@
# Configuration file for the Sphinx documentation builder.
-# -- Project information
-
project = 'NucleusRV'
-copyright = '2022, Usman Zain'
-author = 'Usman Zain'
+copyright = '2026, Kinza Fatima'
+author = 'Kinza Fatima'
release = '0.1'
version = '0.1.0'
-# -- General configuration
-
extensions = [
'sphinx.ext.duration',
'sphinx.ext.doctest',
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'sphinx.ext.intersphinx',
+ 'sphinx_copybutton',
]
-intersphinx_mapping = {
- 'python': ('https://docs.python.org/3/', None),
- 'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
-}
-intersphinx_disabled_domains = ['std']
-
templates_path = ['_templates']
-
-# -- Options for HTML output
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
html_theme = 'sphinx_rtd_theme'
-# -- Options for EPUB output
-epub_show_urls = 'footnote'
+intersphinx_mapping = {
+ 'python': ('https://docs.python.org/3/', None),
+ 'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
+}
diff --git a/docs/guides/arch-tests.rst b/docs/guides/arch-tests.rst
new file mode 100644
index 000000000..6c5d41668
--- /dev/null
+++ b/docs/guides/arch-tests.rst
@@ -0,0 +1,37 @@
+Run RISC-V Architecture Compliance Tests
+========================================
+
+Validate NucleusRV against official RISC-V architectural test suites using the ``riscof`` framework.
+
+Setup the Environment
+---------------------
+
+1. Create a Python virtual environment:
+
+ .. code-block:: bash
+
+ python3 -m venv .venv
+ source .venv/bin/activate
+
+2. Install ``riscof`` and required plugins:
+
+ .. code-block:: bash
+
+ pip3 install git+https://github.com/riscv/riscof.git
+ pip3 install -e riscof/riscv-arch-test/riscv-ctg riscof/riscv-arch-test/riscv-isac
+
+Run the Tests
+-------------
+
+Execute the test runner script:
+
+.. code-block:: bash
+
+ python3 run_riscv_arch_tests.py
+
+This script invokes ``riscof``, which runs tests on both NucleusRV (the DUT) and Spike (the reference), comparing their memory signatures.
+
+Review Results
+--------------
+
+Results are stored in ``riscof/riscof_work/``. View the ``report.html`` file for a detailed pass/fail summary.
diff --git a/docs/guides/building-c-programs.rst b/docs/guides/building-c-programs.rst
new file mode 100644
index 000000000..4262f9d71
--- /dev/null
+++ b/docs/guides/building-c-programs.rst
@@ -0,0 +1,32 @@
+Build and Run C Programs on NucleusRV
+=====================================
+
+NucleusRV includes a build system to compile C programs for the core.
+
+Create a Test Directory
+-----------------------
+
+Create a directory for your C project in ``tools/tests/``:
+
+.. code-block:: bash
+
+ mkdir tools/tests/my_test
+
+Add your C source files and a ``main.c``.
+
+Build with Make
+---------------
+
+Run ``make`` from the ``tools/`` directory:
+
+.. code-block:: bash
+
+ cd tools
+ make PROGRAM=my_test
+
+This will generate ``out/program.hex``, which you can use for simulation.
+
+Simulate
+--------
+
+Generate Verilog using ``NRVDriver`` and run with Verilator as described in the :doc:`../quickstart`.
diff --git a/docs/guides/fpga-integration.rst b/docs/guides/fpga-integration.rst
new file mode 100644
index 000000000..69c7b52db
--- /dev/null
+++ b/docs/guides/fpga-integration.rst
@@ -0,0 +1,31 @@
+Integrate NucleusRV into an FPGA Design
+=======================================
+
+Instantiate NucleusRV in your FPGA project and connect it to your system.
+
+Generate Synthesis-Ready Verilog
+--------------------------------
+
+Ensure you disable ``TRACE`` in your config to save area and remove debug ports:
+
+.. code-block:: scala
+
+ implicit val config: Configs = Configs(TRACE = false)
+
+Emit Verilog using ``NRVDriver``.
+
+Connect the Core
+----------------
+
+Instantiate the ``Top`` module in your HDL. Connect the instruction and data memory interfaces to your on-chip memory (e.g., BRAM).
+
+.. image:: /_static/fpga_integration_diagram.png
+ :alt: FPGA Integration Diagram
+
+Synthesis Tips
+--------------
+
+- **Clock Domain**: NucleusRV is a single-clock design.
+- **Reset Polarity**: The reset port is active-high synchronous.
+- **Area Optimization**: Disable unused extensions (M, F, A, C) in ``Configs``.
+- **Timing Closure**: The core is designed for 100MHz+ on modern FPGAs.
diff --git a/docs/guides/running-assembly.rst b/docs/guides/running-assembly.rst
new file mode 100644
index 000000000..292bb0ce6
--- /dev/null
+++ b/docs/guides/running-assembly.rst
@@ -0,0 +1,39 @@
+Simulate RISC-V Assembly Programs with NucleusRV
+===============================================
+
+Learn how to write, assemble, and simulate RISC-V assembly programs on NucleusRV.
+
+Write Your Assembly File
+------------------------
+
+Create an assembly file (e.g., ``program.S``) in the ``tools/`` directory:
+
+.. code-block:: assembler
+
+ .section .text, "ax"
+ .global _start
+ _start:
+ li x1, 10
+ li x2, 20
+ add x3, x1, x2
+ nop
+
+Assemble and Convert to Hex
+---------------------------
+
+Use the RISC-V GCC toolchain to assemble and link your program:
+
+.. code-block:: bash
+
+ riscv32-unknown-elf-gcc -march=rv32gcv -mabi=ilp32 -static -nostdlib -nostartfiles -T tools/link.ld program.S -o program.elf
+ python3 tools/makehex.py program.elf 2048 > imem.hex
+
+Generate Verilog and Simulate
+-----------------------------
+
+Follow the :doc:`../quickstart` guide to generate Verilog using your ``imem.hex`` and run the simulation with Verilator.
+
+Verify Output
+-------------
+
+The simulation output will show retiring instructions and register updates. You can also inspect the generated waveform file.
diff --git a/docs/guides/waveform-simulation.rst b/docs/guides/waveform-simulation.rst
new file mode 100644
index 000000000..6ce338444
--- /dev/null
+++ b/docs/guides/waveform-simulation.rst
@@ -0,0 +1,35 @@
+Generate and View Waveforms with NucleusRV
+==========================================
+
+NucleusRV supports waveform tracing during simulation to help with debugging.
+
+Enabling Waveform Tracing
+-------------------------
+
+Pass the ``--trace`` flag to Verilator during compilation:
+
+.. code-block:: bash
+
+ verilator --cc --exe --build --trace --no-timing $NRV_ROOT/tb_Top.cpp Top.v
+
+Where Waveforms are Saved
+-------------------------
+
+During simulation, a VCD file is generated at ``logs/top.vcd`` within your simulation directory.
+
+Opening Waveforms in GTKWave
+----------------------------
+
+Use GTKWave to view the signal transitions:
+
+.. code-block:: bash
+
+ gtkwave logs/top.vcd
+
+Key Signals to Watch
+--------------------
+
+- ``clock`` and ``reset``
+- ``io_imemReq_valid`` / ``io_imemRsp_valid`` (Instruction Fetch)
+- ``io_dmemReq_valid`` / ``io_dmemRsp_valid`` (Data Memory Access)
+- ``io_pin`` (Write-back data output)
diff --git a/docs/index.rst b/docs/index.rst
index e077835f8..892e1b121 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,27 +1,24 @@
-NucleusRV: A 32 bit RISC-V core
-===================================
+NucleusRV Documentation
+=======================
-.. note::
-
- This project is under active development.
-
-**NucleusRV** is a 32-bit 5 stage pipelined RISC-V core written in Chisel. It implements I base ISA, M multiply and divide, and C compressed instructions (RV32IMC). NucleusRV has been taped out in Google's sponsered OpenMPW-6 shuttle on SKY130nm process node.
-
-The documentation is split into 3 sections.
-
-The :doc:`Overview <01_overview/index>` section explores the features of NucleusRV from bird's eye view.
-
-The :doc:`User Guide <02_user/index>` section provides information necessary to setup and run
-NucleusRV. It is aimed at software developers writing software for NucleusRV.
-
-The :doc:`Developer Guide <03_dev/index>` section gives detailed explanation of source code and different
-design decisions. It highlights contribution guidelines and will be helpful for people making
-changes to NucleusRV
+NucleusRV is an open-source 32-bit 5-stage pipelined RISC-V core (RV32IMAFC) written in Chisel. It is designed for high configurability and ease of integration into FPGA and ASIC designs.
.. toctree::
:maxdepth: 2
- :hidden:
-
- 01_overview/index.rst
- 02_user/index.rst
- 03_dev/index.rst
+ :caption: Table of Contents
+
+ introduction
+ quickstart
+ installation
+ concepts/isa-support
+ concepts/pipeline
+ concepts/memory-interface
+ concepts/configuration
+ guides/running-assembly
+ guides/building-c-programs
+ guides/arch-tests
+ guides/waveform-simulation
+ guides/fpga-integration
+ reference/csr-map
+ reference/config-options
+ reference/pipeline-stages
diff --git a/docs/installation.rst b/docs/installation.rst
new file mode 100644
index 000000000..6d899596b
--- /dev/null
+++ b/docs/installation.rst
@@ -0,0 +1,61 @@
+Install NucleusRV Dependencies and Toolchain
+============================================
+
+To build and simulate NucleusRV, you need to install several tools on your Linux system.
+
+Install Verilator
+-----------------
+
+Verilator is required for simulation:
+
+.. code-block:: bash
+
+ sudo apt-get install -y git help2man perl python3 make autoconf g++ flex bison ccache
+ sudo apt-get install -y libgoogle-perftools-dev numactl perl-doc libfl2 libfl-dev zlib1g zlib1g-dev
+
+ git clone https://github.com/verilator/verilator.git
+ cd verilator
+ git checkout stable
+ autoconf
+ ./configure
+ make -j$(nproc)
+ sudo make install
+
+Install RISC-V GNU Toolchain
+----------------------------
+
+Install the toolchain for RV32:
+
+.. code-block:: bash
+
+ git clone https://github.com/riscv/riscv-gnu-toolchain
+ cd riscv-gnu-toolchain
+ ./configure --prefix=/opt/riscv --with-arch=rv32gc --with-abi=ilp32
+ make -j$(nproc)
+
+Add the toolchain to your ``PATH``:
+
+.. code-block:: bash
+
+ export PATH=/opt/riscv/bin:$PATH
+
+Install Java, sbt, and Scala
+----------------------------
+
+NucleusRV is written in Chisel (Scala):
+
+.. code-block:: bash
+
+ sudo apt-get install -y default-jdk
+ # Install sbt via official repositories
+ # ... (follow sbt installation instructions for your distro)
+
+Verify Your Setup
+-----------------
+
+.. code-block:: bash
+
+ verilator --version
+ riscv32-unknown-elf-gcc --version
+ java -version
+ sbt --version
diff --git a/docs/introduction.rst b/docs/introduction.rst
new file mode 100644
index 000000000..ff30a4dbd
--- /dev/null
+++ b/docs/introduction.rst
@@ -0,0 +1,37 @@
+NucleusRV: RV32IMAFC Processor Core for FPGA & ASIC
+===================================================
+
+NucleusRV is an open-source 32-bit RISC-V processor core implementing the RV32IMAFC extensions. It is written in Chisel, enabling high-parameterization and easy generation of Verilog.
+
+Key Features
+------------
+
+- **Configurable Extensions**: Enable or disable M, A, F, C, and Zicsr extensions via configuration.
+- **Classic 5-Stage Pipeline**: Fetch, Decode, Execute, Memory, and Write-Back stages.
+- **Verilator Simulation**: Includes a C++ testbench for fast simulation and verification.
+- **FPGA Friendly**: Designed to be synthesized on various FPGA families.
+
+Supported ISA Extensions
+------------------------
+
+- **I**: Base Integer instruction set.
+- **M**: Integer Multiply and Divide.
+- **A**: Atomic Memory Operations.
+- **F**: Single-Precision Floating-Point.
+- **C**: Compressed 16-bit instructions.
+- **Zicsr**: Control and Status Register instructions.
+
+Where to Go Next
+----------------
+
+- :doc:`quickstart` to run your first simulation.
+- :doc:`installation` to set up your environment.
+- :doc:`concepts/isa-support` for technical details on ISA extensions.
+
+What you can do with NucleusRV
+------------------------------
+
+* **Install dependencies**: Set up Verilator, RISC-V GNU toolchain, sbt, and Scala.
+* **Generate Verilog**: Use ``NRVDriver`` to emit synthesis-ready Verilog.
+* **Simulate with Verilator**: Run programs and verify execution with the included testbench.
+* **Integrate into your design**: Connect separate instruction and data memory ports to your system.
diff --git a/docs/quickstart.rst b/docs/quickstart.rst
new file mode 100644
index 000000000..0a48c1f7e
--- /dev/null
+++ b/docs/quickstart.rst
@@ -0,0 +1,64 @@
+Get Started with NucleusRV
+==========================
+
+Learn how to clone NucleusRV, generate Verilog from Chisel sources, and run your first simulation with Verilator in under 10 minutes.
+
+Clone the Repository
+--------------------
+
+Clone the repository with submodules:
+
+.. code-block:: bash
+
+ git clone --recurse-submodules https://github.com/merledu/nucleusrv.git
+ cd nucleusrv
+
+Prepare Your Program
+--------------------
+
+Generate a hex file from an assembly or C program using the provided tools:
+
+.. code-block:: bash
+
+ cd tools
+ make PROGRAM=fibonacci
+ cd ..
+
+Generate Verilog
+----------------
+
+Use the ``NRVDriver`` to generate Verilog from Chisel:
+
+.. code-block:: bash
+
+ sbt "nucleusrv.components.NRVDriver \
+ --imem tools/out/program.hex \
+ --dmem tools/out/program.hex \
+ --target-dir generated"
+
+Compile with Verilator
+----------------------
+
+Set ``NRV_ROOT`` and compile:
+
+.. code-block:: bash
+
+ export NRV_ROOT=$PWD
+ cd generated
+ verilator --cc --exe --build --trace --no-timing $NRV_ROOT/tb_Top.cpp Top.v
+
+Run the Simulation
+------------------
+
+.. code-block:: bash
+
+ ./obj_dir/VTop
+
+Inspect Waveforms
+-----------------
+
+If tracing was enabled, waveforms are saved in the ``logs/`` directory:
+
+.. code-block:: bash
+
+ gtkwave logs/top.vcd
diff --git a/docs/reference/config-options.rst b/docs/reference/config-options.rst
new file mode 100644
index 000000000..aa97e7b0e
--- /dev/null
+++ b/docs/reference/config-options.rst
@@ -0,0 +1,78 @@
+NucleusRV Configuration Options Reference
+=========================================
+
+Complete reference for the ``Configs`` class parameters.
+
+Case Class Definition
+---------------------
+
+.. code-block:: scala
+
+ case class Configs(
+ XLEN : Int = 32,
+ M : Boolean = true,
+ F : Boolean = true,
+ C : Boolean = false,
+ A : Boolean = true,
+ Zicsr : Boolean = true,
+ TRACE : Boolean = true,
+ HARTID : Int = 1,
+ ARCHID : Int = 1
+ )
+
+Parameter Details
+-----------------
+
+XLEN
+~~~~
+- Type: ``Int``
+- Default: ``32``
+- Support: Only 32 is currently supported.
+
+M
+~
+- Type: ``Boolean``
+- Default: ``true``
+- Description: Enables the Multiply/Divide Unit (MDU).
+
+F
+~
+- Type: ``Boolean``
+- Default: ``true``
+- Description: Enables the Floating-Point Unit (FPU).
+
+C
+~
+- Type: ``Boolean``
+- Default: ``false``
+- Description: Enables Compressed instruction support (Realigner and CompressedDecoder).
+
+A
+~
+- Type: ``Boolean``
+- Default: ``true``
+- Description: Enables Atomic instruction support (AMOALU and ReservationFile).
+
+Zicsr
+~~~~~
+- Type: ``Boolean``
+- Default: ``true``
+- Description: Enables CSR instructions and the CSR file.
+
+TRACE
+~~~~~
+- Type: ``Boolean``
+- Default: ``true``
+- Description: Enables the RVFI (RISC-V Formal Interface) tracing ports.
+
+HARTID
+~~~~~~
+- Type: ``Int``
+- Default: ``1``
+- Description: Sets the hardware thread ID.
+
+ARCHID
+~~~~~~
+- Type: ``Int``
+- Default: ``1``
+- Description: Sets the architecture ID.
diff --git a/docs/reference/csr-map.rst b/docs/reference/csr-map.rst
new file mode 100644
index 000000000..6af521958
--- /dev/null
+++ b/docs/reference/csr-map.rst
@@ -0,0 +1,104 @@
+CSR Address Map Reference for NucleusRV
+=======================================
+
+NucleusRV implements a subset of RISC-V Machine-Mode CSRs and Floating-Point CSRs.
+
+Machine-Mode CSRs
+-----------------
+
++---------+-----------+-----------------------------------+
+| Address | Name | Description |
++=========+===========+===================================+
+| 0x300 | mstatus | Machine status register |
++---------+-----------+-----------------------------------+
+| 0x301 | misa | Machine ISA register |
++---------+-----------+-----------------------------------+
+| 0x304 | mie | Machine interrupt-enable register |
++---------+-----------+-----------------------------------+
+| 0x305 | mtvec | Machine trap-handler base address |
++---------+-----------+-----------------------------------+
+| 0x340 | mscratch | Scratch register for machine trap |
++---------+-----------+-----------------------------------+
+| 0x341 | mepc | Machine exception program counter |
++---------+-----------+-----------------------------------+
+| 0x342 | mcause | Machine trap cause |
++---------+-----------+-----------------------------------+
+| 0x343 | mtval | Machine bad address or instruction|
++---------+-----------+-----------------------------------+
+| 0x344 | mip | Machine interrupt pending |
++---------+-----------+-----------------------------------+
+| 0xF12 | marchid | Architecture ID |
++---------+-----------+-----------------------------------+
+| 0xF14 | mhartid | Hardware thread ID |
++---------+-----------+-----------------------------------+
+
+Floating-Point CSRs
+-------------------
+
+Enabled when ``F = true`` in the configuration.
+
++---------+-----------+-----------------------------------+
+| Address | Name | Description |
++=========+===========+===================================+
+| 0x001 | fflags | Floating-Point Accrued Exceptions |
++---------+-----------+-----------------------------------+
+| 0x002 | frm | Floating-Point Dynamic Rounding |
++---------+-----------+-----------------------------------+
+| 0x003 | fcsr | Floating-Point Control and Status |
++---------+-----------+-----------------------------------+
+
+Performance Counters
+--------------------
+
++---------+-----------+-----------------------------------+
+| Address | Name | Description |
++=========+===========+===================================+
+| 0xB00 | mcycle | Machine cycle counter |
++---------+-----------+-----------------------------------+
+| 0xB02 | minstret | Machine instructions-retired |
++---------+-----------+-----------------------------------+
+| 0x320 | mcountinh | Machine counter-inhibit register |
++---------+-----------+-----------------------------------+
+
+Detailed Field Maps
+-------------------
+
+MSTATUS Field Map
+~~~~~~~~~~~~~~~~~
+
+* **[22] TSR**: Trap SRET
+* **[21] TW**: Timeout Wait
+* **[20] TVM**: Trap Virtual Memory
+* **[19] MXR**: Make Executable Readable
+* **[18] SUM**: permit Supervisor User Memory access
+* **[17] MPRV**: Memory Privilege
+* **[12:11] MPP**: Machine Previous Privilege Mode
+* **[8] SPP**: Supervisor Previous Privilege Mode
+* **[7] MPIE**: Machine Previous Interrupt Enable
+* **[6] UBE**: User Big-Endian
+* **[5] SPIE**: Supervisor Previous Interrupt Enable
+* **[3] MIE**: Machine Interrupt Enable
+* **[1] SIE**: Supervisor Interrupt Enable
+
+MIE and MIP Field Map
+~~~~~~~~~~~~~~~~~~~~~
+
+* **[11] MEIE/MEIP**: Machine External Interrupt
+* **[7] MTIE/MTIP**: Machine Timer Interrupt
+* **[3] MSIE/MSIP**: Machine Software Interrupt
+
+FCSR Field Map
+~~~~~~~~~~~~~~
+
+* **[7:5] FRM**: Floating-Point Rounding Mode
+* **[4] NV**: Invalid Operation
+* **[3] DZ**: Divide by Zero
+* **[2] OF**: Overflow
+* **[1] UF**: Underflow
+* **[0] NX**: Inexact
+
+MCOUNTINHIBIT Field Map
+~~~~~~~~~~~~~~~~~~~~~~~
+
+* **[2] IR**: Inhibit ``minstret``
+* **[0] CY**: Inhibit ``mcycle``
diff --git a/docs/reference/pipeline-stages.rst b/docs/reference/pipeline-stages.rst
new file mode 100644
index 000000000..e860c1ee9
--- /dev/null
+++ b/docs/reference/pipeline-stages.rst
@@ -0,0 +1,93 @@
+NucleusRV Pipeline Stages and Signal Reference
+==============================================
+
+Detailed reference for each of NucleusRV's five pipeline stages.
+
+Stage 1: IF — Instruction Fetch
+-------------------------------
+
+Key Responsibilities:
+- Instruction fetch via ``imemReq``.
+- Handle misaligned instructions (if ``C = true``).
+- Update PC.
+
+Key Signals:
+- ``if_reg_pc``: PC of the current fetch.
+- ``if_reg_ins``: Fetched instruction.
+- ``ral_halt_o``: Halt signal from Realigner.
+
+Stage 2: ID — Instruction Decode
+--------------------------------
+
+Key Responsibilities:
+- Decode opcode and generate control signals.
+- Read register file (rs1, rs2, rs3).
+- Immediate generation.
+- Branch resolution.
+
+Key Signals:
+- ``id_reg_pc``: PC transmitted from IF.
+- ``id_reg_rd1``, ``id_reg_rd2``: Operands from register file.
+- ``ctl_aluOp``, ``ctl_memRead``, etc.: Control signals.
+
+Stage 3: EX — Execute
+---------------------
+
+Key Responsibilities:
+- ALU operations.
+- Data forwarding.
+- Multi-cycle M and F operations.
+
+Key Signals:
+- ``ex_reg_result``: ALU result.
+- ``stall``: Stall signal for multi-cycle ops.
+
+Stage 4: MEM — Memory Access
+----------------------------
+
+Key Responsibilities:
+- Data memory access via ``dmemReq``.
+- AMO sequencing.
+
+Key Signals:
+- ``dmemRsp.valid``: Response validity.
+- ``mem_reg_result``: Data read or ALU result.
+
+Stage 5: WB — Write Back
+------------------------
+
+Key Responsibilities:
+- Register file write.
+- Debug output (``io.pin``).
+
+Key Signals:
+- ``wb_data``: Final data to be written back.
+- ``ID.writeData``: Write data to register file.
+- ``ID.writeReg``: Target register for write-back.
+
+Top-level IO Port Reference
+---------------------------
+
+* **clock / reset**: System clock and active-high synchronous reset.
+* **io_stall**: External stall input (active-high).
+* **io_pin**: 32-bit debug output (retired instruction data).
+
+Memory Interface Ports
+~~~~~~~~~~~~~~~~~~~~~~
+
+* **io_imemReq**: Instruction memory request bundle.
+ * ``bits.addrRequest``: Byte address.
+ * ``valid``: Request valid.
+ * ``ready``: Memory ready to accept.
+* **io_imemRsp**: Instruction memory response bundle.
+ * ``bits.dataResponse``: 32-bit instruction data.
+ * ``valid``: Instruction data valid.
+* **io_dmemReq**: Data memory request bundle.
+ * ``bits.isWrite``: Write enable.
+ * ``bits.activeByteLane``: 4-bit byte-mask.
+* **io_dmemRsp**: Data memory response bundle.
+ * ``bits.dataResponse``: Read data.
+
+RVFI Interface
+~~~~~~~~~~~~~~
+Enabled when ``TRACE = true``. Provides a complete trace of every retired instruction for formal verification and logging.
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 000000000..eaa78b276
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,3 @@
+sphinx
+sphinx-rtd-theme
+sphinx-copybutton
diff --git a/riscof/nucleusrv/nucleusrv_isa.yaml b/riscof/nucleusrv/nucleusrv_isa.yaml
index 693bf5eb2..37c5a92cd 100644
--- a/riscof/nucleusrv/nucleusrv_isa.yaml
+++ b/riscof/nucleusrv/nucleusrv_isa.yaml
@@ -1,6 +1,8 @@
hart_ids: [0]
hart0:
- ISA: RV32IMFZicsr
+ #ISA: RV32IMFZicsr
+ ISA: RV32IA
+ #ISA: RV32I
physical_addr_sz: 32
User_Spec_Version: '2.3'
supported_xlen: [32]
diff --git a/riscof/nucleusrv/riscof_nucleusrv.py b/riscof/nucleusrv/riscof_nucleusrv.py
index 27480a3ad..799a9dfa0 100644
--- a/riscof/nucleusrv/riscof_nucleusrv.py
+++ b/riscof/nucleusrv/riscof_nucleusrv.py
@@ -85,6 +85,7 @@ def initialise(self, suite, work_dir, archtest_env):
# add more utility snippets here
self.objcopy = 'riscv{0}-unknown-elf-objcopy -O binary -j {3} {1} {2}'
self.hexdump = 'hexdump -v -e \'1/4 "%08x\\n"\' {0} > {1}'
+ self.objdump = 'riscv{0}-unknown-elf-objdump -D -Mno-aliases {1} > {1}.objdump'
def build(self, isa_yaml, platform_yaml):
@@ -175,6 +176,7 @@ def runTests(self, testList):
self.objcopy.format(self.xlen, elf, 'dmem.bin', '.data'),
self.hexdump.format('imem.bin', 'imem.hex'),
self.hexdump.format('dmem.bin', 'dmem.hex'),
+ self.objdump.format(self.xlen, elf),
f'cd {self.dut}',
self.sbt.format(
os.path.join(testentry['work_dir'], 'imem.hex'),
@@ -191,7 +193,8 @@ def runTests(self, testList):
self.objcopy.format(self.xlen, elf, 'imem.bin', '.text.init'),
self.objcopy.format(self.xlen, elf, 'dmem.bin', '.data'),
self.hexdump.format('imem.bin', 'imem.hex'),
- self.hexdump.format('dmem.bin', 'dmem.hex')
+ self.hexdump.format('dmem.bin', 'dmem.hex'),
+ self.objdump.format(self.xlen, elf)
))
# concatenate all commands that need to be executed within a make-target.
diff --git a/src/main/resources/sram.v b/src/main/resources/sram.v
index a2fa59eec..2c05f7cfc 100644
--- a/src/main/resources/sram.v
+++ b/src/main/resources/sram.v
@@ -56,27 +56,27 @@ module sram #(
input [NUM_WMASKS-1:0] wmask0; // write mask
input [ADDR_WIDTH-1:0] addr0;
input [DATA_WIDTH-1:0] din0;
- output [DATA_WIDTH-1:0] dout0;
+ output reg [DATA_WIDTH-1:0] dout0;
input clk1; // clock
input csb1; // active low chip select
input [ADDR_WIDTH-1:0] addr1;
output [DATA_WIDTH-1:0] dout1;
- reg csb0_reg;
- reg web0_reg;
- reg [NUM_WMASKS-1:0] wmask0_reg;
- reg [ADDR_WIDTH-1:0] addr0_reg;
- reg [DATA_WIDTH-1:0] din0_reg;
- reg [DATA_WIDTH-1:0] dout0;
+ //reg csb0_reg;
+ //reg web0_reg;
+ //reg [NUM_WMASKS-1:0] wmask0_reg;
+ //reg [ADDR_WIDTH-1:0] addr0_reg;
+ //reg [DATA_WIDTH-1:0] din0_reg;
+ //reg [DATA_WIDTH-1:0] dout0;
// All inputs are registers
- always @(posedge clk0)
- begin
- csb0_reg = csb0;
- web0_reg = web0;
- wmask0_reg = wmask0;
- addr0_reg = addr0;
- din0_reg = din0;
+ //always @(posedge clk0)
+ //begin
+ // csb0_reg = csb0;
+ // web0_reg = web0;
+ // wmask0_reg = wmask0;
+ // addr0_reg = addr0;
+ // din0_reg = din0;
//dout0 = 32'bx0;
/*`ifdef DBG
if ( !csb0_reg && web0_reg )
@@ -84,7 +84,7 @@ module sram #(
if ( !csb0_reg && !web0_reg )
$display($time," Writing %m addr0=%b din0=%b wmask0=%b",addr0_reg,din0_reg,wmask0_reg);
`endif
-*/ end
+*/ //end
reg csb1_reg;
reg [ADDR_WIDTH-1:0] addr1_reg;
@@ -115,15 +115,15 @@ initial
// Write Operation : When web0 = 0, csb0 = 0
always @ (negedge clk0)
begin : MEM_WRITE0
- if ( !csb0_reg && !web0_reg ) begin
- if (wmask0_reg[0])
- mem[addr0_reg][7:0] = din0_reg[7:0];
- if (wmask0_reg[1])
- mem[addr0_reg][15:8] = din0_reg[15:8];
- if (wmask0_reg[2])
- mem[addr0_reg][23:16] = din0_reg[23:16];
- if (wmask0_reg[3])
- mem[addr0_reg][31:24] = din0_reg[31:24];
+ if ( !csb0 && !web0 ) begin
+ if (wmask0[0])
+ mem[addr0][7:0] = din0[7:0];
+ if (wmask0[1])
+ mem[addr0][15:8] = din0[15:8];
+ if (wmask0[2])
+ mem[addr0][23:16] = din0[23:16];
+ if (wmask0[3])
+ mem[addr0][31:24] = din0[31:24];
end
end
@@ -131,8 +131,8 @@ initial
// Read Operation : When web0 = 1, csb0 = 0
always @ (negedge clk0)
begin : MEM_READ0
- if (!csb0_reg && web0_reg)
- dout0 <= #(DELAY) mem[addr0_reg];
+ if (!csb0 && web0)
+ dout0 <= mem[addr0];
end
// Memory Read Block Port 1
diff --git a/src/main/scala/components/CSR.scala b/src/main/scala/components/CSR.scala
index d2580c83d..6896aa888 100644
--- a/src/main/scala/components/CSR.scala
+++ b/src/main/scala/components/CSR.scala
@@ -6,7 +6,7 @@ import chisel3._
import chisel3.util._
import nucleusrv.csr._
-class CSR extends Module{
+class CSR(F: Boolean) extends Module{
val io = IO(new Bundle{
val i_misa_value = Input(UInt(32.W))
val i_mhartid_value = Input(UInt(32.W))
@@ -18,11 +18,11 @@ class CSR extends Module{
val i_addr = Input(UInt(12.W))
val i_w_en = Input(Bool())
val i_instr_retired = Input(Bool())
- val f_except = Input(Vec(5, Bool()))
- val fcsr_o_data = Output(UInt(32.W))
+ val f_except = if (F) Some(Input(Vec(5, Bool()))) else None
+ val fcsr_o_data = if (F) Some(Output(UInt(32.W))) else None
})
- val csrRegFile = Module(new CSRRegFile)
+ val csrRegFile = Module(new CSRRegFile(F))
dontTouch(csrRegFile.io)
csrRegFile.io.CSR.i_data := Mux(io.i_opr(2), io.i_imm, io.i_data)
@@ -33,16 +33,19 @@ class CSR extends Module{
csrRegFile.io.CSR.i_addr := io.i_addr
csrRegFile.io.CSR.i_w_en := io.i_w_en
csrRegFile.io.MINSTRET.i_instr_retired := io.i_instr_retired
- csrRegFile.io.FCSR.except <> io.f_except
-
- io.o_data := csrRegFile.io.CSR.o_data
- io.fcsr_o_data := Cat(
+ if (F) {
+ csrRegFile.io.FCSR.get.except <> io.f_except.get
+ io.fcsr_o_data.get := Cat(
"b0".U(24.W),
- csrRegFile.io.FCSR.frm,
- csrRegFile.io.FCSR.nv,
- csrRegFile.io.FCSR.dz,
- csrRegFile.io.FCSR.of,
- csrRegFile.io.FCSR.uf,
- csrRegFile.io.FCSR.nx
+ csrRegFile.io.FCSR.get.frm,
+ csrRegFile.io.FCSR.get.nv,
+ csrRegFile.io.FCSR.get.dz,
+ csrRegFile.io.FCSR.get.of,
+ csrRegFile.io.FCSR.get.uf,
+ csrRegFile.io.FCSR.get.nx
)
+ }
+
+ io.o_data := csrRegFile.io.CSR.o_data
+
}
diff --git a/src/main/scala/components/Configs.scala b/src/main/scala/components/Configs.scala
index 2db1fa248..9131b2b36 100644
--- a/src/main/scala/components/Configs.scala
+++ b/src/main/scala/components/Configs.scala
@@ -2,11 +2,11 @@ package nucleusrv.components
case class Configs(
XLEN : Int = 32,
- M : Boolean = true,
- F : Boolean = true,
+ M : Boolean = false,
+ F : Boolean = false,
C : Boolean = false,
A : Boolean = true,
- Zicsr : Boolean = true,
+ Zicsr : Boolean = false,
TRACE : Boolean = true,
HARTID : Int = 1,
ARCHID : Int = 1
diff --git a/src/main/scala/components/Core.scala b/src/main/scala/components/Core.scala
index 7da0961f6..b8d18549f 100644
--- a/src/main/scala/components/Core.scala
+++ b/src/main/scala/components/Core.scala
@@ -61,10 +61,10 @@ class Core(implicit val config:Configs) extends Module{
val id_reg_is_f = if (F) Some(RegInit(0.B)) else None
// Atomic signals ID-EX
- val id_reg_isAMO = RegInit(false.B)
- val id_reg_isLR = RegInit(false.B)
- val id_reg_isSC = RegInit(false.B)
- val id_reg_amoOp = RegInit(0.U(5.W))
+ val id_reg_isAMO = if (A) Some(RegInit(false.B)) else None
+ val id_reg_isLR = if (A) Some(RegInit(false.B)) else None
+ val id_reg_isSC = if (A) Some(RegInit(false.B)) else None
+ val id_reg_amoOp = if (A) Some(RegInit(0.U(5.W))) else None
// EX-MEM Registers
val ex_reg_branch = RegInit(0.U(32.W))
@@ -87,10 +87,10 @@ class Core(implicit val config:Configs) extends Module{
val ex_reg_is_f = if (F) Some(RegInit(0.B)) else None
// Atomic signals EX-MEM
- val ex_reg_isAMO = RegInit(false.B)
- val ex_reg_isLR = RegInit(false.B)
- val ex_reg_isSC = RegInit(false.B)
- val ex_reg_amoOp = RegInit(0.U(5.W))
+ val ex_reg_isAMO = if (A) Some(RegInit(false.B)) else None
+ val ex_reg_isLR = if (A) Some(RegInit(false.B)) else None
+ val ex_reg_isSC = if (A) Some(RegInit(false.B)) else None
+ val ex_reg_amoOp = if (A) Some(RegInit(0.U(5.W))) else None
// MEM-WB Registers
val mem_reg_rd = RegInit(0.U(32.W))
@@ -109,22 +109,22 @@ class Core(implicit val config:Configs) extends Module{
val mem_reg_is_f = if (F) Some(RegInit(0.B)) else None
// Atomic signals MEM-WB
- val mem_reg_isAMO = RegInit(false.B)
- val mem_reg_isLR = RegInit(false.B)
- val mem_reg_isSC = RegInit(false.B)
+ val mem_reg_isAMO = if (A) Some(RegInit(false.B)) else None
+ val mem_reg_isLR = if (A) Some(RegInit(false.B)) else None
+ val mem_reg_isSC = if (A) Some(RegInit(false.B)) else None
// AMO state tracking
- val amo_read_done = RegInit(false.B)
- val amo_old_value = RegInit(0.U(32.W))
- val sc_issued = RegInit(false.B)
+ val amo_read_done = if (A) Some(RegInit(false.B)) else None
+ val amo_old_value = if (A) Some(RegInit(0.U(32.W))) else None
+ val sc_issued = if (A) Some(RegInit(false.B)) else None
//Pipeline Units
val IF = Module(new InstructionFetch).io
- val ID = Module(new InstructionDecode(F, Zicsr, TRACE)).io
- val EX = Module(new Execute(F, M = M, TRACE = TRACE)).io
- val MEM = Module(new MemoryFetch(TRACE))
+ val ID = Module(new InstructionDecode(A, F, Zicsr, TRACE)).io
+ val EX = Module(new Execute(F, M = M, A, TRACE = TRACE)).io
+ val MEM = Module(new MemoryFetch(A, TRACE))
- val reservationFile = Module(new ReservationFile).io
+ val reservationFile = if (A) Some(Module(new ReservationFile).io) else None
/*****************
* Fetch Stage *
@@ -154,7 +154,8 @@ class Core(implicit val config:Configs) extends Module{
is_comp := CD.is_comp
}
else {
- IF.address := pc.io.out.asUInt
+ //IF.address := pc.io.out.asUInt
+ IF.address := Mux(ID.hdu_pcWrite, pc.io.out.asUInt, if_reg_pc)
instruction := IF.instruction
}
@@ -172,10 +173,10 @@ class Core(implicit val config:Configs) extends Module{
IF.stall := io.stall || EX.stall || ID.stall || IF_stall || ID.pcSrc || MEM.io.stall
- val halt = Mux(((EX.stall || ID.stall || io.imemReq.valid) | ral_halt_o || MEM.io.stall), 1.B, 0.B)
+ val halt = dontTouch(Mux(((EX.stall || ID.stall /*|| !io.imemReq.valid*/) || ral_halt_o || MEM.io.stall || (if (A) MEM.io.amo_stall.get else 0.B)), 1.B, 0.B))
pc.io.halt := halt
val npc = Mux(
- ID.hdu_pcWrite,
+ ID.hdu_pcWrite && !halt,
Mux(
ID.pcSrc,
ID.pcPlusOffset.asSInt,
@@ -185,7 +186,7 @@ class Core(implicit val config:Configs) extends Module{
)
pc.io.in := dontTouch(npc)
- when(ID.hdu_if_reg_write && !MEM.io.stall) {
+ when(ID.hdu_if_reg_write && (if (A) !MEM.io.amo_stall.get else 1.B)) {
if_reg_pc := pc.io.out.asUInt
if_reg_ins := instruction
}
@@ -197,7 +198,7 @@ class Core(implicit val config:Configs) extends Module{
* Decode Stage *
****************/
- when(!MEM.io.stall) {
+ when (!EX.stall && !MEM.io.stall && (if (A) !MEM.io.amo_stall.get else 1.B)) {
id_reg_rd1 := ID.readData1
id_reg_rd2 := ID.readData2
id_reg_imm := ID.immediate
@@ -215,13 +216,26 @@ class Core(implicit val config:Configs) extends Module{
id_reg_ctl_aluOp := ID.ctl_aluOp
id_reg_ctl_jump := ID.ctl_jump
id_reg_ctl_aluSrc1 := ID.ctl_aluSrc1
- id_reg_is_csr := ID.is_csr.get
- id_reg_csr_data := ID.csr_o_data.get
+ if (Zicsr) {
+ id_reg_is_csr := ID.is_csr.get
+ id_reg_csr_data := ID.csr_o_data.get
+ }
- id_reg_isAMO := ID.isAMO
- id_reg_isLR := ID.isLR
- id_reg_isSC := ID.isSC
- id_reg_amoOp := ID.amoOp
+ if (A) {
+ id_reg_isAMO.get := ID.isAMO.get
+ id_reg_isLR.get := ID.isLR.get
+ id_reg_isSC.get := ID.isSC.get
+ id_reg_amoOp.get := ID.amoOp.get
+ }
+
+ if (F) {
+ id_reg_f_read.get <> ID.f_read.get
+ id_reg_rd3.get := ID.readData3.get
+ if (Zicsr) {
+ id_reg_fcsr_o_data.get := ID.fcsr_o_data.get
+ }
+ id_reg_is_f.get := ID.is_f.get
+ }
}
ID.id_instruction := if_reg_ins
@@ -235,17 +249,19 @@ class Core(implicit val config:Configs) extends Module{
val misa = (1 << 30).U | (1 << 8).U |
Mux(M.B, (1 << 12).U, 0.U) |
Mux(C.B, (1 << 2).U, 0.U)
- ID.csr_i_misa.get := misa
- ID.csr_i_marchid.get := ARCHID.U
- ID.csr_i_mhartid.get := HARTID.U
+ if (Zicsr) {
+ ID.csr_i_misa.get := misa
+ ID.csr_i_marchid.get := ARCHID.U
+ ID.csr_i_mhartid.get := HARTID.U
+ }
ID.id_ex_regWr := id_reg_ctl_regWrite(0)
ID.ex_mem_regWr := ex_reg_ctl_regWrite(0)
if (F) {
- id_reg_f_read.get <> ID.f_read.get
- id_reg_rd3.get := ID.readData3.get
- id_reg_fcsr_o_data.get := ID.fcsr_o_data.get
- id_reg_is_f.get := ID.is_f.get
+ //id_reg_f_read.get <> ID.f_read.get
+ //id_reg_rd3.get := ID.readData3.get
+ //id_reg_fcsr_o_data.get := ID.fcsr_o_data.get
+ //id_reg_is_f.get := ID.is_f.get
for (i <- 0 until 2) {
ID.f_read_reg.get(0)(i) := id_reg_f_read.get(i)
ID.f_read_reg.get(1)(i) := ex_reg_f_read.get(i)
@@ -257,9 +273,11 @@ class Core(implicit val config:Configs) extends Module{
* Execute Stage *
******************/
- EX.isAMO := id_reg_isAMO
- EX.isLR := id_reg_isLR
- EX.isSC := id_reg_isSC
+ if (A) {
+ EX.isAMO.get := id_reg_isAMO.get
+ EX.isLR.get := id_reg_isLR.get
+ EX.isSC.get := id_reg_isSC.get
+ }
EX.immediate := id_reg_imm
EX.readData1 := id_reg_rd1
@@ -272,11 +290,13 @@ class Core(implicit val config:Configs) extends Module{
EX.ctl_aluSrc1 := id_reg_ctl_aluSrc1
// AMO alu connections
- EX.amo_memData := amo_old_value
- EX.amo_src2 := ex_reg_wd
- EX.amo_op_code := ex_reg_amoOp
+ if (A) {
+ EX.amo_memData.get := amo_old_value.get
+ EX.amo_src2.get := ex_reg_wd
+ EX.amo_op_code.get := ex_reg_amoOp.get
+ }
- when(!MEM.io.stall) {
+ when (!MEM.io.stall && (if (A) !MEM.io.amo_stall.get else 1.B)) {
ex_reg_pc := id_reg_pc
ex_reg_wra := id_reg_wra
ex_reg_ins := id_reg_ins
@@ -288,10 +308,20 @@ class Core(implicit val config:Configs) extends Module{
ex_reg_ctl_memWrite := id_reg_ctl_memWrite
ex_reg_wd := EX.writeData
ex_reg_result := EX.ALUresult
- ex_reg_isAMO := id_reg_isAMO
- ex_reg_isLR := id_reg_isLR
- ex_reg_isSC := id_reg_isSC
- ex_reg_amoOp := id_reg_amoOp
+
+
+ if (A) {
+ ex_reg_isAMO.get := id_reg_isAMO.get
+ ex_reg_isLR.get := id_reg_isLR.get
+ ex_reg_isSC.get := id_reg_isSC.get
+ ex_reg_amoOp.get := id_reg_amoOp.get
+ }
+
+ if (F) {
+ ex_reg_f_read.get <> id_reg_f_read.get
+ ex_reg_f_except.get <> EX.exceptions.get
+ ex_reg_is_f.get := EX.is_f_o.get
+ }
}
ID.id_ex_mem_read := id_reg_ctl_memRead
@@ -309,26 +339,29 @@ class Core(implicit val config:Configs) extends Module{
ID.ex_mem_result := ex_reg_result
ID.mem_wb_result := mem_reg_result
- EX.wb_result := mem_reg_result
- EX.mem_result := ex_reg_result
+ //EX.wb_result := mem_reg_result
+ EX.mem_result := MuxCase(ex_reg_result, List(
+ ex_reg_ctl_memRead -> MEM.io.readData,
+ (ex_reg_is_csr && ex_reg_ctl_regWrite(0)) -> ex_reg_csr_data
+ ))
ID.ex_result := EX.ALUresult
ID.csr_Ex := id_reg_is_csr
ID.csr_Ex_data := id_reg_csr_data
ID.ex_stall := EX.stall
- when(EX.stall || MEM.io.stall){
- id_reg_wra := id_reg_wra
- id_reg_ctl_regWrite <> id_reg_ctl_regWrite
- }
+ //when(EX.stall || MEM.io.stall){
+ // id_reg_wra := id_reg_wra
+ // id_reg_ctl_regWrite <> id_reg_ctl_regWrite
+ //}
if (F) {
- ex_reg_f_read.get <> id_reg_f_read.get
+ //ex_reg_f_read.get <> id_reg_f_read.get
EX.f_read.get <> id_reg_f_read.get
EX.readData3.get := id_reg_rd3.get
EX.fcsr_o_data.get := id_reg_fcsr_o_data.get
EX.is_f_i.get := id_reg_is_f.get
- ex_reg_f_except.get <> EX.exceptions.get
- ex_reg_is_f.get := EX.is_f_o.get
+ //ex_reg_f_except.get <> EX.exceptions.get
+ //ex_reg_is_f.get := EX.is_f_o.get
ID.f_except.get(0) <> EX.exceptions.get
}
@@ -340,105 +373,148 @@ class Core(implicit val config:Configs) extends Module{
MEM.io.dccmRsp <> io.dmemRsp
// RESERVATIONFILE
- reservationFile.set := ex_reg_isLR && io.dmemRsp.valid
- val sc_success = ex_reg_isSC && reservationFile.matchAddr
- reservationFile.clear := (ex_reg_isSC && (io.dmemReq.fire || (!sc_success && !sc_issued))) ||
- (ex_reg_ctl_memWrite && !ex_reg_isSC && !ex_reg_isAMO)
- reservationFile.addrIn := ex_reg_result
+ val sc_success = if (A) Some(ex_reg_isSC.get && reservationFile.get.matchAddr) else None
+ if (A) {
+ reservationFile.get.set := ex_reg_isLR.get && io.dmemRsp.valid
+ //val sc_success = ex_reg_isSC.get && reservationFile.get.matchAddr
+ reservationFile.get.clear := (ex_reg_isSC.get && (io.dmemReq.fire || (!sc_success.get && !sc_issued.get))) ||
+ (ex_reg_ctl_memWrite && !ex_reg_isSC.get && !ex_reg_isAMO.get)
+ reservationFile.get.addrIn := ex_reg_result
+ }
+ //reservationFile.set := ex_reg_isLR && io.dmemRsp.valid
+ //val sc_success = ex_reg_isSC && reservationFile.matchAddr
+ //reservationFile.clear := (ex_reg_isSC && (io.dmemReq.fire || (!sc_success && !sc_issued))) ||
+ // (ex_reg_ctl_memWrite && !ex_reg_isSC && !ex_reg_isAMO)
+ //reservationFile.addrIn := ex_reg_result
- MEM.io.readEnable := ex_reg_ctl_memRead || (ex_reg_isAMO && !amo_read_done) || ex_reg_isLR
+ MEM.io.readEnable := ex_reg_ctl_memRead || (
+ if (A) (ex_reg_isAMO.get && !amo_read_done.get) || ex_reg_isLR.get else 0.B
+ )
+ //MEM.io.readEnable := ex_reg_ctl_memRead || (ex_reg_isAMO && !amo_read_done) || ex_reg_isLR
// ex_reg_ctl_memWrite enable it here for SC and for AMO
// Disable default memWrite for SC/AMO to ensure we only write when allowed
- MEM.io.writeEnable := (ex_reg_ctl_memWrite && !ex_reg_isSC && !ex_reg_isAMO) || (ex_reg_isAMO && amo_read_done) || (ex_reg_isSC && sc_success && !sc_issued)
+ MEM.io.writeEnable := (
+ if (A)
+ (ex_reg_ctl_memWrite && !ex_reg_isSC.get && !ex_reg_isAMO.get) || (ex_reg_isAMO.get && amo_read_done.get) || (ex_reg_isSC.get && sc_success.get && !sc_issued.get)
+ else
+ ex_reg_ctl_memWrite
+ )
+ //MEM.io.writeEnable := (ex_reg_ctl_memWrite && !ex_reg_isSC && !ex_reg_isAMO) || (ex_reg_isAMO && amo_read_done) || (ex_reg_isSC && sc_success && !sc_issued)
- MEM.io.writeData := ex_reg_wd
+ //MEM.io.writeData := ex_reg_wd
+ MEM.io.writeData := (if (A) Mux(
+ (mem_reg_isAMO.get && (mem_reg_wra =/= 0.U) && (
+ (mem_reg_wra === ex_reg_ins(19, 15)) || (mem_reg_wra === ex_reg_ins(24, 20))
+ )),
+ mem_reg_rd,
+ ex_reg_wd
+ ) else ex_reg_wd)
// atomic signals to Mem
- MEM.io.isAMO := ex_reg_isAMO
- MEM.io.isLR := ex_reg_isLR
- MEM.io.isSC := ex_reg_isSC
- MEM.io.amoOp := ex_reg_amoOp
- MEM.io.amo_alu_result_in := EX.amo_result
+ if (A) {
+ MEM.io.isAMO.get := ex_reg_isAMO.get
+ MEM.io.isLR.get := ex_reg_isLR.get
+ MEM.io.isSC.get := ex_reg_isSC.get
+ MEM.io.amoOp.get := ex_reg_amoOp.get
+ MEM.io.amo_alu_result_in.get := EX.amo_result.get
+ }
MEM.io.aluResultIn := ex_reg_result
MEM.io.f3 := ex_reg_ins(14,12)
- EX.mem_result := ex_reg_result
+ //EX.mem_result := ex_reg_result
ID.csr_Mem := ex_reg_is_csr
ID.csr_Mem_data := ex_reg_csr_data
- ID.ex_is_amo := id_reg_isAMO
- ID.mem_is_amo := ex_reg_isAMO
- // ID.addr_id is internal (readData1).
- ID.addr_ex := EX.ALUresult
- ID.addr_mem := ex_reg_result
-
- // AMO state machine track read completion and capture old value
- when(ex_reg_isAMO && !amo_read_done && io.dmemRsp.valid) {
- // First cycle: read completes, capture old value
- amo_read_done := true.B
- amo_old_value := io.dmemRsp.bits.dataResponse
- }.elsewhen(ex_reg_isAMO && amo_read_done && io.dmemRsp.valid) {
- // Second cycle: write completes, reset state
- amo_read_done := false.B
- amo_old_value := amo_old_value // Keep value stable
- }.elsewhen(!ex_reg_isAMO) {
- amo_read_done := false.B
- }
+ val sc_matched = if (A) Some(RegInit(false.B)) else None
+ if (A) {
+ ID.ex_is_amo.get := id_reg_isAMO.get
+ ID.mem_is_amo.get := ex_reg_isAMO.get
+ // ID.addr_id is internal (readData1).
+ ID.addr_ex.get := EX.ALUresult
+ ID.addr_mem.get := ex_reg_result
+
+ // AMO state machine track read completion and capture old value
+ when(ex_reg_isAMO.get && !amo_read_done.get && io.dmemRsp.valid) {
+ // First cycle: read completes, capture old value
+ amo_read_done.get := true.B
+ amo_old_value.get := io.dmemRsp.bits.dataResponse
+ }.elsewhen(ex_reg_isAMO.get && amo_read_done.get && io.dmemRsp.valid) {
+ // Second cycle: write completes, reset state
+ amo_read_done.get := false.B
+ amo_old_value.get := amo_old_value.get // Keep value stable
+ }.elsewhen(!ex_reg_isAMO.get) {
+ amo_read_done.get := false.B
+ }
- // SC Execution
- // We use sc_issued to track if wehve already tried to execute this specific SC instruction
- when(ex_reg_isSC && !MEM.io.stall) {
- sc_issued := true.B
- }
+ // SC Execution
+ // We use sc_issued to track if wehve already tried to execute this specific SC instruction
+ when(ex_reg_isSC.get && !MEM.io.stall) {
+ sc_issued.get := true.B
+ }
- // Reset sc_issued only when we advance to a NEW instruction (ex_reg changes)
- // We detect a "new" instruction when we are no longer stalled
- when(!MEM.io.stall && !EX.stall) {
- sc_issued := false.B
- }
+ // Reset sc_issued only when we advance to a NEW instruction (ex_reg changes)
+ // We detect a "new" instruction when we are no longer stalled
+ when(!MEM.io.stall && !EX.stall) {
+ sc_issued.get := false.B
+ }
- // SC Match Latch: failure to latch success means we might return 1 (fail)
- // after the reservation is cleared but before stall ends.
- val sc_matched = RegInit(false.B)
- when(ex_reg_isSC && reservationFile.matchAddr) {
- sc_matched := true.B
- }
- when(!MEM.io.stall) {
- sc_matched := false.B
- }
+ // SC Match Latch: failure to latch success means we might return 1 (fail)
+ // after the reservation is cleared but before stall ends.
+ when(ex_reg_isSC.get && reservationFile.get.matchAddr) {
+ sc_matched.get := true.B
+ }
+ when(!MEM.io.stall) {
+ sc_matched.get := false.B
+ }
+ }
// MEM-WB REGISTE
// sc_success is true if we have a match currently OR if we already matched
- val sc_success_latched = (ex_reg_isSC && reservationFile.matchAddr) || sc_matched
- val sc_result = Mux(sc_success_latched, 0.U, 1.U)
+ val sc_success_latched = if (A) Some((ex_reg_isSC.get && reservationFile.get.matchAddr) || sc_matched.get) else None
+ val sc_result = if (A) Some(Mux(sc_success_latched.get, 0.U, 1.U)) else None
+ //val sc_success_latched = (ex_reg_isSC && reservationFile.matchAddr) || sc_matched
+ //val sc_result = Mux(sc_success_latched, 0.U, 1.U)
- when(!MEM.io.stall) {
- mem_reg_rd := Mux(ex_reg_isAMO, amo_old_value, MEM.io.readData)
- mem_reg_result := Mux(ex_reg_isSC, sc_result, ex_reg_result)
- mem_reg_ctl_regWrite <> ex_reg_ctl_regWrite
+ when (!MEM.io.stall && (if (A) !MEM.io.amo_stall.get else 1.B)) {
+ mem_reg_rd := (if (A) Mux(ex_reg_isAMO.get, amo_old_value.get, MEM.io.readData) else MEM.io.readData)
mem_reg_ins := ex_reg_ins
- mem_reg_pc := ex_reg_pc
+ mem_reg_result := (if (A) Mux(ex_reg_isSC.get, sc_result.get, ex_reg_result) else ex_reg_result)
mem_reg_wra := ex_reg_wra
mem_reg_ctl_memToReg := ex_reg_ctl_memToReg
+ mem_reg_ctl_regWrite(0) := ex_reg_ctl_regWrite(0)
+ if (F) {
+ mem_reg_ctl_regWrite(1) := ex_reg_ctl_regWrite(1)
+ }
+ mem_reg_pc := ex_reg_pc
mem_reg_is_csr := ex_reg_is_csr
mem_reg_csr_data := ex_reg_csr_data
- mem_reg_isAMO := ex_reg_isAMO
- mem_reg_isLR := ex_reg_isLR
- mem_reg_isSC := ex_reg_isSC
+
+ if (A) {
+ mem_reg_isAMO.get := ex_reg_isAMO.get
+ mem_reg_isLR.get := ex_reg_isLR.get
+ mem_reg_isSC.get := ex_reg_isSC.get
+ }
+
+ if (F) {
+ mem_reg_f_read.get <> ex_reg_f_read.get
+ mem_reg_f_except.get <> ex_reg_f_except.get
+ mem_reg_is_f.get := ex_reg_is_f.get
+ }
}
if (F) {
- mem_reg_f_read.get <> ex_reg_f_read.get
- mem_reg_f_except.get <> ex_reg_f_except.get
- mem_reg_is_f.get := ex_reg_is_f.get
+ //mem_reg_f_read.get <> ex_reg_f_read.get
+ //mem_reg_f_except.get <> ex_reg_f_except.get
+ //mem_reg_is_f.get := ex_reg_is_f.get
ID.f_except.get(1) <> ex_reg_f_except.get
}
EX.ex_mem_regWrite <> ex_reg_ctl_regWrite
+ ID.mem_stall := MEM.io.stall
/********************
* Write Back Stage *
@@ -453,6 +529,9 @@ class Core(implicit val config:Configs) extends Module{
}.elsewhen(mem_reg_ctl_memToReg === 2.U) {
wb_data := mem_reg_pc + 4.U
wb_addr := mem_reg_wra
+ }.elsewhen (mem_reg_is_csr && mem_reg_ctl_regWrite(0)) {
+ wb_data := mem_reg_csr_data
+ wb_addr := mem_reg_wra
}.otherwise {
wb_data := mem_reg_result
wb_addr := mem_reg_wra
@@ -463,7 +542,7 @@ class Core(implicit val config:Configs) extends Module{
ID.writeData := wb_data
EX.wb_result := wb_data
EX.mem_wb_regWrite <> mem_reg_ctl_regWrite
- ID.writeReg := wb_addr
+ ID.writeReg := dontTouch(wb_addr)
ID.ctl_writeEnable <> mem_reg_ctl_regWrite
ID.csr_Wb := mem_reg_is_csr
ID.csr_Wb_data := mem_reg_csr_data
@@ -485,14 +564,16 @@ class Core(implicit val config:Configs) extends Module{
** instruction retire logic **
*****************************/
val instruction_retired = WireInit(false.B)
- instruction_retired := mem_reg_ins =/= 0.U && !ID.ifid_flush && !(MEM.io.stall || io.stall) && (!mem_reg_ctl_memToReg === 1.U || io.dmemRsp.valid)
- ID.csr_i_instr_retired.get := instruction_retired
+ if (Zicsr) {
+ instruction_retired := mem_reg_ins =/= 0.U && !ID.ifid_flush && !(MEM.io.stall || io.stall) && (!mem_reg_ctl_memToReg === 1.U || io.dmemRsp.valid)
+ ID.csr_i_instr_retired.get := instruction_retired
+ }
/**************
** RVFI PINS **
**************/
if (TRACE) {
- io.rvfi.get.bool := (mem_reg_ins =/= 0.U) && !clock.asBool
+ io.rvfi.get.bool := (mem_reg_ins =/= 0.U) && RegNext(!MEM.io.stall) && (if (A) RegNext(!MEM.io.amo_stall.get) else 1.B) && !clock.asBool
io.rvfi.get.uint2 := 3.U
io.rvfi.get.uint4 := delays(1, MEM.io.wmask.get)
@@ -513,7 +594,7 @@ class Core(implicit val config:Configs) extends Module{
0.U
),
Mux(
- delays(1, ex_reg_ctl_memRead).asBool,
+ delays(2, ex_reg_ctl_memRead).asBool,
mem_reg_rd,
0.U
),
@@ -526,4 +607,4 @@ class Core(implicit val config:Configs) extends Module{
r => io.rvfi.get.uint32(r._2) := r._1
)
}
-}
\ No newline at end of file
+}
diff --git a/src/main/scala/components/Execute.scala b/src/main/scala/components/Execute.scala
index 2a418b4b3..63aaf8e43 100755
--- a/src/main/scala/components/Execute.scala
+++ b/src/main/scala/components/Execute.scala
@@ -7,6 +7,7 @@ import FBitPats._
class Execute(
F: Boolean,
M: Boolean = false,
+ A: Boolean,
TRACE: Boolean
) extends Module {
val io = IO(new Bundle {
@@ -30,9 +31,9 @@ class Execute(
val ctl_aluSrc1 = Input(UInt(2.W))
// AMO control signals
- val isAMO = Input(Bool())
- val isLR = Input(Bool())
- val isSC = Input(Bool())
+ val isAMO = if (A) Some(Input(Bool())) else None
+ val isLR = if (A) Some(Input(Bool())) else None
+ val isSC = if (A) Some(Input(Bool())) else None
val writeData = Output(UInt(32.W))
val ALUresult = Output(UInt(32.W))
@@ -48,20 +49,22 @@ class Execute(
val is_f_o = if (F) Some(Output(Bool())) else None
val exceptions = if (F) Some(Output(Vec(5, Bool()))) else None
// AMO signals from Core (for looping back ex_reg values)
- val amo_memData = Input(UInt(32.W))
- val amo_src2 = Input(UInt(32.W))
- val amo_op_code = Input(UInt(5.W))
- val amo_result = Output(UInt(32.W))
+ val amo_memData = if (A) Some(Input(UInt(32.W))) else None
+ val amo_src2 = if (A) Some(Input(UInt(32.W))) else None
+ val amo_op_code = if (A) Some(Input(UInt(5.W))) else None
+ val amo_result = if (A) Some(Output(UInt(32.W))) else None
})
val alu = Module(new ALU)
val aluCtl = Module(new AluControl)
- val amoAlu = Module(new AMOALU)
- amoAlu.io.memData := io.amo_memData
- amoAlu.io.src2 := io.amo_src2
- amoAlu.io.amoOp := io.amo_op_code
- io.amo_result := amoAlu.io.result
+ val amoAlu = if (A) Some(Module(new AMOALU)) else None
+ if (A) {
+ amoAlu.get.io.memData := io.amo_memData.get
+ amoAlu.get.io.src2 := io.amo_src2.get
+ amoAlu.get.io.amoOp := io.amo_op_code.get
+ io.amo_result.get := amoAlu.get.io.result
+ }
val fu = Module(new ForwardingUnit(F)).io
@@ -105,8 +108,10 @@ class Execute(
val aluIn1 = MuxCase(
inputMux1,
+ (if (A) Array((io.isAMO.get || io.isLR.get || io.isSC.get) -> inputMux1) // AMO.. rs1 is the address
+ else Array()) ++
Array(
- (io.isAMO || io.isLR || io.isSC) -> inputMux1, // AMO.. rs1 is the address
+ //(io.isAMO || io.isLR || io.isSC) -> inputMux1, // AMO.. rs1 is the address
(io.ctl_aluSrc1 === 1.U) -> io.pcAddress,
(io.ctl_aluSrc1 === 2.U) -> 0.U
)
@@ -115,7 +120,7 @@ class Execute(
// ALU Input 2 Selection
// For AMO/LR/SC: use 0..no offset, just pass rs1 through
val aluIn2 = Mux(
- io.isAMO || io.isLR || io.isSC,
+ if (A) io.isAMO.get || io.isLR.get || io.isSC.get else 0.B,
0.U, // AMO no offset, ALU computes rs1 + 0 = rs1
Mux(io.ctl_aluSrc, inputMux2, io.immediate)
)
@@ -206,7 +211,9 @@ class Execute(
val f_multi_cycle_inst = if (F) Some(dontTouch(Vector(fdiv_s, fsqrt_s).map(
f => f === io.id_ex_ins
).reduce(_ || _))) else None
- val f_stall = if (F) Some((io.func7 === "b0001100".U) || (io.func7 === "b0101100".U) || (!fpu.get.div_sqrt_ready)) else None
+ //val f_stall = if (F) Some((io.func7 === "b0001100".U) || (io.func7 === "b0101100".U) || (!fpu.get.div_sqrt_ready)) else None
+ val f_stall = if (F) Some(f_multi_cycle_inst.get && (!fpu.get.div_sqrt_valid_out)) else None
+ val f_in_valid = if (F) Some(RegInit(0.B)) else None
if (F) {
fpu.get.rm := Mux(
io.func3 === 7.U,
@@ -244,9 +251,14 @@ class Execute(
).zipWithIndex.map(
f => (f._1 === io.id_ex_ins) -> (f._2 + 1).U
))
- fpu.get.div_sqrt_valid := f_multi_cycle_inst.get
+ fpu.get.div_sqrt_valid := f_multi_cycle_inst.get && !f_in_valid.get
io.exceptions.get <> fpu.get.exceptions
io.is_f_o.get := io.is_f_i.get | RegNext(f_stall.get)
+ when (fpu.get.div_sqrt_valid_out) {
+ f_in_valid.get := 0.B
+ }.elsewhen (f_multi_cycle_inst.get && !f_in_valid.get) {
+ f_in_valid.get := 1.B
+ }
}
io.ALUresult := MuxCase(alu.io.result, (
@@ -261,13 +273,13 @@ class Execute(
) else Vector()
))
- io.stall := (
+ dontTouch(io.stall) := (
if (M) (
io.func7 === 1.U && ~div_en.get && (io.func3 === 4.U || io.func3 === 5.U || io.func3 === 6.U || io.func3 === 7.U)
) || (div_en.get && counter.get < 32.U)
else false.B
) || (
- if (F) f_stall.get else false.B
+ if (F) dontTouch(f_stall.get) else false.B
)
// Write Data: for AMO/LR/SC, this is rs2: sourceB operand
@@ -276,4 +288,4 @@ class Execute(
if (TRACE) {
io.rs1_rdata.get := inputMux1
}
-}
\ No newline at end of file
+}
diff --git a/src/main/scala/components/HazardUnit.scala b/src/main/scala/components/HazardUnit.scala
index 158052bf6..c1bafd9b0 100755
--- a/src/main/scala/components/HazardUnit.scala
+++ b/src/main/scala/components/HazardUnit.scala
@@ -1,7 +1,7 @@
package nucleusrv.components
import chisel3._
-class HazardUnit extends Module {
+class HazardUnit(A: Boolean) extends Module {
val io = IO(new Bundle {
val id_ex_memRead = Input(Bool())
val ex_mem_memRead = Input(Bool())
@@ -15,13 +15,13 @@ class HazardUnit extends Module {
val jump = Input(UInt(2.W))
val branch = Input(Bool())
- val id_is_amo = Input(Bool()) // instruction in ID is AMO
- val ex_is_amo = Input(Bool()) // instruction in EX is AMO
- val mem_is_amo = Input(Bool()) // instruction in MEM is AMO
+ val id_is_amo = if (A) Some(Input(Bool())) else None // instruction in ID is AMO
+ val ex_is_amo = if (A) Some(Input(Bool())) else None // instruction in EX is AMO
+ val mem_is_amo = if (A) Some(Input(Bool())) else None // instruction in MEM is AMO
- val addr_id = Input(UInt(32.W))
- val addr_ex = Input(UInt(32.W))
- val addr_mem = Input(UInt(32.W))
+ val addr_id = if (A) Some(Input(UInt(32.W))) else None
+ val addr_ex = if (A) Some(Input(UInt(32.W))) else None
+ val addr_mem = if (A) Some(Input(UInt(32.W))) else None
val if_reg_write = Output(Bool())
val pc_write = Output(Bool())
@@ -30,8 +30,10 @@ class HazardUnit extends Module {
val take_branch = Output(Bool())
// Forwarding signals for ID stage
- val operandForwardEX = Output(Bool())
- val operandForwardMEM = Output(Bool())
+ val operandForwardEX = if (A) Some(Output(Bool())) else None
+ val operandForwardMEM = if (A) Some(Output(Bool())) else None
+
+ val stall = Input(Bool())
})
io.ctl_mux := true.B
@@ -42,21 +44,27 @@ class HazardUnit extends Module {
// Standard Load-Use Hazard Detection
val standard_stall = WireDefault(false.B)
- when (
- (io.id_ex_memRead || io.branch)
- && ((io.id_ex_rd === io.id_rs1) || (io.id_ex_rd === io.id_rs2))
- && (
- ((io.id_ex_rd =/= 0.U) && (io.id_rs1 =/= 0.U))
- || ((io.id_ex_rd =/= 0.U) && (io.id_rs2 =/= 0.U))
- ) && !io.id_ex_branch
- ) {
- }
-
- when (
- (io.ex_mem_memRead || io.branch)
- && ((io.ex_mem_rd === io.id_rs1) || (io.ex_mem_rd === io.id_rs2))
- ) {
+ when (io.stall) {
+ io.ctl_mux := false.B
+ io.pc_write := false.B
+ io.if_reg_write := false.B
+ io.take_branch := false.B
}
+ //when (
+ // (io.id_ex_memRead || io.branch)
+ // && ((io.id_ex_rd === io.id_rs1) || (io.id_ex_rd === io.id_rs2))
+ // && (
+ // ((io.id_ex_rd =/= 0.U) && (io.id_rs1 =/= 0.U))
+ // || ((io.id_ex_rd =/= 0.U) && (io.id_rs2 =/= 0.U))
+ // ) && !io.id_ex_branch
+ //) {
+ //}
+
+ //when (
+ // (io.ex_mem_memRead || io.branch)
+ // && ((io.ex_mem_rd === io.id_rs1) || (io.ex_mem_rd === io.id_rs2))
+ //) {
+ //}
// Branch flush hazard
when(io.taken || (io.jump =/= 0.U)) {
@@ -65,80 +73,82 @@ class HazardUnit extends Module {
io.ifid_flush := false.B
}
- //AMO Hazard Detection and Forwarding
- val rs1_ID = io.id_rs1
- val rs2_ID = io.id_rs2
- val rd_EX = io.id_ex_rd
- val rd_MEM = io.ex_mem_rd
- val isAMO_EX = io.ex_is_amo
- val isAMO_MEM = io.mem_is_amo
- val isLoad_MEM = io.ex_mem_memRead
- val addr_EX = io.addr_ex
- val addr_MEM = io.addr_mem
- val addr_ID = io.addr_id
-
- val amo_stall = WireDefault(false.B)
- val forward_EX = WireDefault(false.B)
- val forward_MEM = WireDefault(false.B)
-
- io.operandForwardEX := forward_EX
- io.operandForwardMEM := forward_MEM
-
- // Hazard detection
- when(io.id_is_amo) {
-
- // MEM has load or AMO, check address match
- when(isAMO_MEM || isLoad_MEM) {
- when(addr_ID === addr_MEM) {
- forward_MEM := true.B // Forward from MEM
- } .otherwise {
- amo_stall := true.B // Stall if not match
+ if (A) {
+ //AMO Hazard Detection and Forwarding
+ val rs1_ID = io.id_rs1
+ val rs2_ID = io.id_rs2
+ val rd_EX = io.id_ex_rd
+ val rd_MEM = io.ex_mem_rd
+ val isAMO_EX = io.ex_is_amo.get
+ val isAMO_MEM = io.mem_is_amo.get
+ val isLoad_MEM = io.ex_mem_memRead
+ val addr_EX = io.addr_ex.get
+ val addr_MEM = io.addr_mem.get
+ val addr_ID = io.addr_id.get
+
+ val amo_stall = WireDefault(false.B)
+ val forward_EX = WireDefault(false.B)
+ val forward_MEM = WireDefault(false.B)
+
+ io.operandForwardEX.get := forward_EX
+ io.operandForwardMEM.get := forward_MEM
+
+ // Hazard detection
+ when(io.id_is_amo.get) {
+
+ // MEM has load or AMO, check address match
+ when(isAMO_MEM || isLoad_MEM) {
+ when(addr_ID === addr_MEM) {
+ forward_MEM := true.B // Forward from MEM
+ } .otherwise {
+ amo_stall := true.B // Stall if not match
+ }
}
- }
- // EX has AMO, check address match
- when(isAMO_EX) {
- when(addr_ID === addr_EX) {
- forward_EX := true.B // Forward from EX
+ // EX has AMO, check address match
+ when(isAMO_EX) {
+ when(addr_ID === addr_EX) {
+ forward_EX := true.B // Forward from EX
+ }
}
- }
- // Both EX and MEM have hazards
- when(isAMO_EX && (isAMO_MEM || isLoad_MEM)) {
- when(addr_ID === addr_EX) {
- forward_EX := true.B
- forward_MEM := false.B // Priority to EX
- amo_stall := false.B // Clear any previous stall
- } .elsewhen(addr_ID === addr_MEM) {
- forward_MEM := true.B
- forward_EX := false.B
- amo_stall := false.B // Clear any previous stall
- } .otherwise {
- amo_stall := true.B // Stall 1 cycle if neither match
- forward_EX := false.B
- forward_MEM := false.B
+ // Both EX and MEM have hazards
+ when(isAMO_EX && (isAMO_MEM || isLoad_MEM)) {
+ when(addr_ID === addr_EX) {
+ forward_EX := true.B
+ forward_MEM := false.B // Priority to EX
+ amo_stall := false.B // Clear any previous stall
+ } .elsewhen(addr_ID === addr_MEM) {
+ forward_MEM := true.B
+ forward_EX := false.B
+ amo_stall := false.B // Clear any previous stall
+ } .otherwise {
+ amo_stall := true.B // Stall 1 cycle if neither match
+ forward_EX := false.B
+ forward_MEM := false.B
+ }
}
- }
- // Load and AMO addresses differ
- when(!forward_EX && !forward_MEM && (isLoad_MEM || isAMO_MEM || isAMO_EX)) {
- amo_stall := true.B
- }
+ // Load and AMO addresses differ
+ when(!forward_EX && !forward_MEM && (isLoad_MEM || isAMO_MEM || isAMO_EX)) {
+ amo_stall := true.B
+ }
- // Consecutive AMOs optimization
- when(isAMO_EX && isAMO_MEM) {
- when(addr_EX =/= addr_MEM) {
- amo_stall := false.B // Allow different AMO addresses
+ // Consecutive AMOs optimization
+ when(isAMO_EX && isAMO_MEM) {
+ when(addr_EX =/= addr_MEM) {
+ amo_stall := false.B // Allow different AMO addresses
+ }
}
}
- }
- // Combined Stall
- when(standard_stall || amo_stall) {
- io.ctl_mux := false.B
- io.pc_write := false.B
- io.if_reg_write := false.B
- io.take_branch := false.B
+ // Combined Stall
+ when(standard_stall || amo_stall) {
+ io.ctl_mux := false.B
+ io.pc_write := false.B
+ io.if_reg_write := false.B
+ io.take_branch := false.B
+ }
}
}
diff --git a/src/main/scala/components/InstructionDecode.scala b/src/main/scala/components/InstructionDecode.scala
index d312f4bea..ed3c7c0c8 100755
--- a/src/main/scala/components/InstructionDecode.scala
+++ b/src/main/scala/components/InstructionDecode.scala
@@ -4,6 +4,7 @@ import chisel3._
import chisel3.util._
class InstructionDecode(
+ A: Boolean,
F: Boolean,
Zicsr: Boolean,
TRACE: Boolean
@@ -40,6 +41,7 @@ class InstructionDecode(
val dmem_data = Input(UInt(32.W))
val ex_stall = Input(Bool())
+ val mem_stall = Input(Bool())
//Outputs
val immediate = Output(UInt(32.W))
@@ -72,7 +74,7 @@ class InstructionDecode(
val csr_i_instr_retired = if (Zicsr) Some(Input(Bool())) else None
val csr_o_data = if (Zicsr) Some(Output(UInt(32.W))) else None
val is_csr = if (Zicsr) Some(Output(Bool())) else None
- val fcsr_o_data = if (Zicsr) Some(Output(UInt(32.W))) else None
+ val fcsr_o_data = if (Zicsr && F) Some(Output(UInt(32.W))) else None
// F pins
val f_read_reg = if (F) Some(Input(Vec(3, Vec(2, Bool())))) else None
@@ -87,31 +89,39 @@ class InstructionDecode(
val rd_wdata = if (TRACE) Some(Output(UInt(32.W))) else None
// Atomic Outputpins
- val isAMO = Output(Bool())
- val isLR = Output(Bool())
- val isSC = Output(Bool())
- val amoOp = Output(UInt(5.W))
- val aq = Output(Bool())
- val rl = Output(Bool())
+ val isAMO = if (A) Some(Output(Bool())) else None
+ val isLR = if (A) Some(Output(Bool())) else None
+ val isSC = if (A) Some(Output(Bool())) else None
+ val amoOp = if (A) Some(Output(UInt(5.W))) else None
+ val aq = if (A) Some(Output(Bool())) else None
+ val rl = if (A) Some(Output(Bool())) else None
// HDU new inputs
- val ex_is_amo = Input(Bool())
- val mem_is_amo = Input(Bool())
- val addr_ex = Input(UInt(32.W))
- val addr_mem = Input(UInt(32.W))
+ val ex_is_amo = if (A) Some(Input(Bool())) else None
+ val mem_is_amo = if (A) Some(Input(Bool())) else None
+ val addr_ex = if (A) Some(Input(UInt(32.W))) else None
+ val addr_mem = if (A) Some(Input(UInt(32.W))) else None
})
//atomic instruction detection
- val atomicDecoder = Module(new AtomicDecoder)
- atomicDecoder.io.instr := io.id_instruction
+ val atomicDecoder = if (A) Some(Module(new AtomicDecoder)) else None
+ if (A) {
+ atomicDecoder.get.io.instr := io.id_instruction
+ io.isAMO.get := atomicDecoder.get.io.out.isAMO
+ io.isLR.get := atomicDecoder.get.io.out.isLR
+ io.isSC.get := atomicDecoder.get.io.out.isSC
+ io.amoOp.get := atomicDecoder.get.io.out.amoOp
+ io.aq.get := atomicDecoder.get.io.out.aq
+ io.rl.get := atomicDecoder.get.io.out.rl
+ }
- io.isAMO := atomicDecoder.io.out.isAMO
- io.isLR := atomicDecoder.io.out.isLR
- io.isSC := atomicDecoder.io.out.isSC
- io.amoOp := atomicDecoder.io.out.amoOp
- io.aq := atomicDecoder.io.out.aq
- io.rl := atomicDecoder.io.out.rl
+ //io.isAMO.get := atomicDecoder.get.io.out.isAMO
+ //io.isLR.get := atomicDecoder.get.io.out.isLR
+ //io.isSC.get := atomicDecoder.get.io.out.isSC
+ //io.amoOp.get := atomicDecoder.get.io.out.amoOp
+ //io.aq.get := atomicDecoder.get.io.out.aq
+ //io.rl.get := atomicDecoder.get.io.out.rl
val is_f = if (F) Some(WireInit(0.B)) else None
if (F) {
@@ -128,7 +138,7 @@ class InstructionDecode(
}
// CSR
- val csr = if (Zicsr) Some(Module(new CSR())) else None
+ val csr = if (Zicsr) Some(Module(new CSR(F))) else None
if (Zicsr) {
csr.get.io.i_misa_value := io.csr_i_misa.get
csr.get.io.i_mhartid_value := io.csr_i_mhartid.get
@@ -138,15 +148,17 @@ class InstructionDecode(
csr.get.io.i_addr := io.id_instruction(31,20)
csr.get.io.i_w_en := io.is_csr.get && (io.id_instruction(19, 15) =/= 0.U)
csr.get.io.i_instr_retired := io.csr_i_instr_retired.get
- csr.get.io.f_except <> io.f_except.get(2)
+ if (F) {
+ csr.get.io.f_except.get <> io.f_except.get(2)
+ io.fcsr_o_data.get := csr.get.io.fcsr_o_data.get
+ }
io.is_csr.get := io.id_instruction(6, 0) === "b1110011".U
io.csr_o_data.get := MuxCase(csr.get.io.o_data, Vector(
- ((io.id_instruction(31, 20) === 1.U) || (io.id_instruction(31, 20) === 3.U)) -> (csr.get.io.o_data | (0 until 3).map(
+ ((io.id_instruction(31, 20) === 1.U) || (io.id_instruction(31, 20) === 3.U)) -> (csr.get.io.o_data | (if (F) (0 until 3).map(
f => Mux(io.is_f_in.get(f), io.f_except.get(f).asUInt, 0.U)
- ).reduce(_ | _))
+ ).reduce(_ | _) else 0.B))
))
- io.fcsr_o_data.get := csr.get.io.fcsr_o_data
}
val csrController = if (Zicsr) Some(Module(new CSRController())) else None
@@ -166,7 +178,7 @@ class InstructionDecode(
}
//Hazard Detection Unit
- val hdu = Module(new HazardUnit)
+ val hdu = Module(new HazardUnit(A))
hdu.io.dmem_resp_valid := io.dmem_resp_valid
hdu.io.id_ex_memRead := io.id_ex_mem_read
hdu.io.ex_mem_memRead := io.ex_mem_mem_read
@@ -177,6 +189,7 @@ class InstructionDecode(
hdu.io.id_rs2 := io.id_instruction(24, 20)
hdu.io.jump := io.ctl_jump
hdu.io.branch := io.ctl_branch
+ hdu.io.stall := io.ex_stall || io.mem_stall
io.hdu_pcWrite := hdu.io.pc_write
io.hdu_if_reg_write := hdu.io.if_reg_write
@@ -214,12 +227,16 @@ class InstructionDecode(
val registerRs3 = if (F) Some(io.id_instruction(31, 27)) else None
val readData1 = WireInit(0.U(32.W))
val readData2 = WireInit(0.U(32.W))
- val writeData = dontTouch(Mux(io.csr_Wb, io.csr_Wb_data, io.writeData))
+ val writeData = dontTouch(MuxCase(io.writeData, List(
+ (io.csr_Ex && io.id_ex_regWr && ((registerRs1 === io.id_ex_rd) || (registerRs2 === io.id_ex_rd)) && (io.id_ex_rd =/= 0.U)) -> io.csr_Ex_data,
+ (io.csr_Mem && io.ex_mem_regWr && ((registerRs1 === io.ex_mem_rd) || (registerRs2 === io.ex_mem_rd)) && (io.ex_mem_rd =/= 0.U)) -> io.csr_Mem_data,
+ (io.csr_Wb && io.ctl_writeEnable(0) && ((registerRs1 === io.writeReg) || (registerRs2 === io.writeReg)) && (io.writeReg =/= 0.U)) -> io.csr_Wb_data
+ )))
registers.io.readAddress(0) := registerRs1
registers.io.readAddress(1) := registerRs2
registers.io.writeEnable(0) := io.ctl_writeEnable(0) || (!io.ex_stall && io.csr_Wb)
registers.io.writeAddress := registerRd
- registers.io.writeData := writeData
+ registers.io.writeData := io.writeData
if (F) {
registers.io.readAddress(2) := registerRs3.get
registers.io.f_read.get <> control.io.f_read.get
@@ -240,7 +257,7 @@ class InstructionDecode(
) {
readData1 := 0.U
}.otherwise{
- readData1 := io.writeData
+ readData1 := writeData
}
}.otherwise {
readData1 := registers.io.readData(0)
@@ -259,7 +276,7 @@ class InstructionDecode(
) {
readData2 := 0.U
}.otherwise{
- readData2 := io.writeData
+ readData2 := writeData
}
}.otherwise{
readData2 := registers.io.readData(1)
@@ -268,18 +285,20 @@ class InstructionDecode(
if (F) {
io.readData3.get := Mux(
(io.writeReg === registerRs3.get) && (io.ctl_writeEnable(1) && control.io.f_read.get(2)),
- io.writeData,
+ writeData,
registers.io.readData(2)
)
}
- // AMO Forwarding (Memory Dependency) overrides Register Forwarding
- when(hdu.io.operandForwardEX) {
- io.readData1 := io.ex_result
- io.readData2 := io.ex_result
- }.elsewhen(hdu.io.operandForwardMEM) {
- io.readData1 := io.ex_mem_result
- io.readData2 := io.ex_mem_result
+ if (A) {
+ // AMO Forwarding (Memory Dependency) overrides Register Forwarding
+ when(hdu.io.operandForwardEX.get) {
+ io.readData1 := io.ex_result
+ io.readData2 := io.ex_result
+ }.elsewhen(hdu.io.operandForwardMEM.get) {
+ io.readData1 := io.ex_mem_result
+ io.readData2 := io.ex_mem_result
+ }
}
@@ -292,6 +311,11 @@ class InstructionDecode(
val input2 = Wire(UInt(32.W))
when (
+ (registerRs1 === io.ex_ins(11, 7))
+ && (if (F) !io.f_read_reg.get(0)(0) else 1.B)
+ ) {
+ input1 := io.ex_result
+ }.elsewhen (
(registerRs1 === io.ex_mem_ins(11, 7))
&& (if (F) !io.f_read_reg.get(1)(0) else 1.B)
) {
@@ -305,6 +329,11 @@ class InstructionDecode(
input1 := readData1
}
when (
+ (registerRs2 === io.ex_ins(11, 7))
+ && (if (F) !io.f_read_reg.get(0)(1) else 1.B)
+ ) {
+ input2 := io.ex_result
+ }.elsewhen (
(registerRs2 === io.ex_mem_ins(11, 7))
&& (if (F) !io.f_read_reg.get(1)(1) else 1.B)
) {
@@ -379,28 +408,34 @@ class InstructionDecode(
io.func7 := 0.U
}
- io.stall := io.func7 === 1.U && (io.func3 === 4.U || io.func3 === 5.U || io.func3 === 6.U || io.func3 === 7.U)
+ dontTouch(io.stall) := io.func7 === 1.U && (io.func3 === 4.U || io.func3 === 5.U || io.func3 === 6.U || io.func3 === 7.U)
val csr_iData_cases = Array(
1.U -> io.ex_result,
2.U -> Mux(io.ex_mem_mem_read, io.dmem_data, io.ex_mem_result),
- 3.U -> io.writeData,
+ 3.U -> writeData,
4.U -> io.csr_Ex_data,
5.U -> io.csr_Mem_data,
6.U -> io.csr_Wb_data
)
if (Zicsr) {
- csr.get.io.i_data := MuxLookup(csrController.get.io.forwardRS1, registers.io.readData(0), csr_iData_cases)
+ csr.get.io.i_data := MuxLookup(
+ csrController.get.io.forwardRS1,
+ registers.io.readData(0),
+ csr_iData_cases
+ )
}
- hdu.io.id_is_amo := io.isAMO
- hdu.io.ex_is_amo := io.ex_is_amo
- hdu.io.mem_is_amo := io.mem_is_amo
-
- hdu.io.addr_id := readData1 // RS1 is address for AMO
- hdu.io.addr_ex := io.addr_ex
- hdu.io.addr_mem := io.addr_mem
+ if (A) {
+ hdu.io.id_is_amo.get := io.isAMO.get
+ hdu.io.ex_is_amo.get := io.ex_is_amo.get
+ hdu.io.mem_is_amo.get := io.mem_is_amo.get
+
+ hdu.io.addr_id.get := readData1 // RS1 is address for AMO
+ hdu.io.addr_ex.get := io.addr_ex.get
+ hdu.io.addr_mem.get := io.addr_mem.get
+ }
// RVFI
@@ -415,4 +450,4 @@ class InstructionDecode(
io.rd_wdata.get := writeData
}
-}
\ No newline at end of file
+}
diff --git a/src/main/scala/components/InstructionFetch.scala b/src/main/scala/components/InstructionFetch.scala
index 9fcf7671e..71d6e592f 100755
--- a/src/main/scala/components/InstructionFetch.scala
+++ b/src/main/scala/components/InstructionFetch.scala
@@ -14,15 +14,16 @@ class InstructionFetch extends Module {
val rst = Wire(Bool())
rst := reset.asBool
+ io.coreInstrResp.ready := true.B
- dontTouch(io.stall)
- val state_reg = dontTouch(RegInit(0.U))
- val next_state = dontTouch(MuxCase(state_reg, Vector(
- ((state_reg === 0.U) || ((state_reg === 2.U) && io.coreInstrResp.valid)) -> 1.U, // valid
- ((state_reg === 1.U) && io.coreInstrReq.ready && !io.stall) -> 2.U // ready
- )))
- state_reg := next_state
- io.coreInstrResp.ready := state_reg === 2.U
+ //dontTouch(io.stall)
+ //val state_reg = dontTouch(RegInit(0.U))
+ //val next_state = dontTouch(MuxCase(state_reg, Vector(
+ // ((state_reg === 0.U) || ((state_reg === 2.U) && io.coreInstrResp.valid)) -> 1.U, // valid
+ // ((state_reg === 1.U) && io.coreInstrReq.ready && !io.stall) -> 2.U // ready
+ //)))
+ //state_reg := next_state
+ //io.coreInstrResp.ready := state_reg === 2.U
// io.coreInstrReq.ready := Mux(rst, false.B, true.B)
@@ -30,12 +31,8 @@ class InstructionFetch extends Module {
io.coreInstrReq.bits.isWrite := false.B
io.coreInstrReq.bits.dataRequest := DontCare
- io.coreInstrReq.bits.addrRequest := Mux(
- io.coreInstrReq.ready,
- Cat("b00".U, io.address(31, 2)),
- DontCare
- )
- io.coreInstrReq.valid := (state_reg === 1.U) & !io.stall
+ io.coreInstrReq.bits.addrRequest := io.address >> 2
+ io.coreInstrReq.valid := !(rst /*|| io.stall*/)
io.instruction := Mux(
io.coreInstrResp.valid,
diff --git a/src/main/scala/components/MemoryFetch.scala b/src/main/scala/components/MemoryFetch.scala
index 325a794c9..1c353a6f4 100755
--- a/src/main/scala/components/MemoryFetch.scala
+++ b/src/main/scala/components/MemoryFetch.scala
@@ -2,7 +2,10 @@ package nucleusrv.components
import chisel3._
import chisel3.util._
-class MemoryFetch(TRACE: Boolean) extends Module {
+class MemoryFetch(
+ A: Boolean,
+ TRACE: Boolean
+) extends Module {
val io = IO(new Bundle {
val aluResultIn: UInt = Input(UInt(32.W))
val writeData: UInt = Input(UInt(32.W))
@@ -13,38 +16,39 @@ class MemoryFetch(TRACE: Boolean) extends Module {
val f3 = Input(UInt(3.W))
// AMO / LR / SC
- val isAMO = Input(Bool())
- val isLR = Input(Bool())
- val isSC = Input(Bool())
- val amoOp = Input(UInt(5.W))
- val amoRdVal = Output(UInt(32.W)) // old memory value for rd
+ val isAMO = if (A) Some(Input(Bool())) else None
+ val isLR = if (A) Some(Input(Bool())) else None
+ val isSC = if (A) Some(Input(Bool())) else None
+ val amoOp = if (A) Some(Input(UInt(5.W))) else None
+ val amoRdVal = if (A) Some(Output(UInt(32.W))) else None // old memory value for rd
+ val amo_stall = if (A) Some(Output(Bool())) else None
val dccmReq = Decoupled(new MemRequestIO)
val dccmRsp = Flipped(Decoupled(new MemResponseIO))
val wmask = if (TRACE) Some(Output(UInt(4.W))) else None
- val amo_alu_result_in = Input(UInt(32.W)) // Result from Execute stage
+ val amo_alu_result_in = if (A) Some(Input(UInt(32.W))) else None // Result from Execute stage
})
io.dccmRsp.ready := true.B
// val amoALU = Module(new AMOALU) -- Moved to Execute
- val amo_old_value = RegInit(0.U(32.W)) // Register to capture old memory value
+ val amo_old_value = if (A) Some(RegInit(0.U(32.W))) else None // Register to capture old memory value
val wdata = Wire(Vec(4, UInt(8.W)))
val rdata = Wire(UInt(32.W))
- val offset = RegInit(0.U(2.W))
- val funct3 = RegInit(0.U(3.W))
+ val offset = WireInit(0.U(2.W))
+ val funct3 = WireInit(0.U(3.W))
val offsetSW = io.aluResultIn(1,0)
- when(!io.dccmRsp.valid){
+ //when(!io.dccmRsp.valid){
funct3 := io.f3
offset := io.aluResultIn(1,0)
- }.otherwise{
- funct3 := funct3
- offset := offset
- }
+ //}.otherwise{
+ //funct3 := funct3
+ //offset := offset
+ //}
wdata(0) := io.writeData(7,0)
wdata(1) := io.writeData(15,8)
@@ -52,7 +56,9 @@ class MemoryFetch(TRACE: Boolean) extends Module {
wdata(3) := io.writeData(31,24)
/* Store Byte */
- when(io.writeEnable && io.f3 === "b000".U && !io.isAMO && !io.isSC){
+ when(io.writeEnable && io.f3 === "b000".U && (
+ if (A) !io.isAMO.get && !io.isSC.get else 1.B
+ )){
when(offsetSW === 0.U){
io.dccmReq.bits.activeByteLane := "b0001".U
if (TRACE) io.wmask.get := "b0001".U
@@ -80,7 +86,10 @@ class MemoryFetch(TRACE: Boolean) extends Module {
}
}
/* Store Half Word */
- .elsewhen(io.writeEnable && io.f3 === "b001".U && !io.isAMO && !io.isSC){
+ .elsewhen(io.writeEnable && io.f3 === "b001".U && (
+ if (A) !io.isAMO.get && !io.isSC.get else 1.B
+ )){
+ // offset will either be 0 or 2 since address will be 0x0000 or 0x0002
when(offsetSW === 0.U){
io.dccmReq.bits.activeByteLane := "b0011".U
if (TRACE) io.wmask.get := "b0011".U
@@ -107,18 +116,24 @@ class MemoryFetch(TRACE: Boolean) extends Module {
}
// Capture old memory value ONLY when AMO read completes
- when(io.dccmRsp.valid && io.readEnable && io.isAMO) {
- amo_old_value := rdata
+ if (A) {
+ when(io.dccmRsp.valid && io.readEnable && io.isAMO.get) {
+ amo_old_value.get := rdata
+ }
}
// io.amoRdVal is unused by Core, keeping 0
- io.amoRdVal := 0.U
+ if (A) {
+ io.amoRdVal.get := 0.U
+ }
// For AMO writes: use AMOALU result (from input), for normal stores: use wdata
- val writeDataFinal = Mux(io.isAMO && io.writeEnable, io.amo_alu_result_in, wdata.asUInt)
+ val writeDataFinal = if (A) Mux(io.isAMO.get && io.writeEnable, io.amo_alu_result_in.get, wdata.asUInt) else wdata.asUInt
+ val addr = Cat("b00".U, (io.aluResultIn & "h3FFFFFFF".U)(31, 2))
+ //val addr = io.aluResultIn
io.dccmReq.bits.dataRequest := writeDataFinal
- io.dccmReq.bits.addrRequest := Cat("b00".U, (io.aluResultIn & "h3FFFFFFF".U)(31, 2))
+ io.dccmReq.bits.addrRequest := dontTouch(addr)
io.dccmReq.bits.isWrite := io.writeEnable
io.dccmReq.valid := Mux(io.writeEnable | io.readEnable, true.B, false.B)
@@ -128,13 +143,22 @@ class MemoryFetch(TRACE: Boolean) extends Module {
// Wait, if we stall here, the pipeline holds 'ex_reg' as AMO.
// Core state machine will advance 'amo_read_done' to true.
// So next cycle, this stall condition will clear because 'readEnable' will be false (amo_read_done is true).
- val amo_transition_stall = io.isAMO && io.readEnable && io.dccmRsp.valid
- io.stall := ((io.writeEnable || io.readEnable) && !io.dccmRsp.valid) || amo_transition_stall
+ val amo_transition_stall = if (A) Some(io.isAMO.get && io.readEnable && io.dccmRsp.valid) else None
+ dontTouch(io.stall) := (false.B
+ // io.writeEnable && io.dccmReq.ready
+ //) || (
+ // io.readEnable && !io.dccmRsp.valid
+ )
+ if (A) {
+ io.amo_stall.get := amo_transition_stall.get
+ }
rdata := Mux(io.dccmRsp.valid, io.dccmRsp.bits.dataResponse, DontCare)
when(io.readEnable) {
- when(funct3 === "b010".U || io.isAMO || io.isLR) {
+ when(funct3 === "b010".U || (
+ if (A) (io.isAMO.get || io.isLR.get) else 0.B
+ )) {
// load word or AMO/LR
io.readData := rdata
}
diff --git a/src/main/scala/components/PC.scala b/src/main/scala/components/PC.scala
index a6606079b..a8177c0ca 100644
--- a/src/main/scala/components/PC.scala
+++ b/src/main/scala/components/PC.scala
@@ -13,6 +13,8 @@ class PC extends Module{
val pc_reg = RegInit((0x0-0x4).asSInt(32.W))
pc_reg := io.in
io.out := pc_reg
- io.pc4 := Mux(io.halt, pc_reg, pc_reg + 4.S)
- io.pc2 := Mux(io.halt, pc_reg, pc_reg + 2.S)
+ //io.pc4 := Mux(io.halt, pc_reg, pc_reg + 4.S)
+ //io.pc2 := Mux(io.halt, pc_reg, pc_reg + 2.S)
+ io.pc4 := pc_reg + 4.S
+ io.pc2 := pc_reg + 2.S
}
diff --git a/src/main/scala/components/SRamTop.scala b/src/main/scala/components/SRamTop.scala
index 119a101a1..e7944a041 100644
--- a/src/main/scala/components/SRamTop.scala
+++ b/src/main/scala/components/SRamTop.scala
@@ -11,15 +11,15 @@ class SRamTop(val programFile:Option[String] ) extends Module {
val rsp = Decoupled(new MemResponseIO)
})
- val state_reg = dontTouch(RegInit(0.U))
- state_reg := MuxCase(state_reg, Vector(
- (state_reg === 0.U) || ((state_reg === 2.U) && io.rsp.ready), // ready
- (state_reg === 1.U) && io.req.valid // valid
- ).zipWithIndex.map(
- s => s._1 -> (s._2 + 1).U
- ))
- io.rsp.valid := state_reg === 2.U
- io.req.ready := state_reg === 1.U
+ //val validReg = RegInit(false.B)
+ //val req_valid = RegInit(true.B)
+ //io.rsp.valid := validReg
+ io.req.ready := true.B
+ //when (io.req.valid && io.req.bits.isWrite && req_valid) {
+ // req_valid := false.B
+ //} otherwise {
+ // req_valid := true.B
+ //}
val rdata = Wire(UInt(32.W))
@@ -32,7 +32,7 @@ class SRamTop(val programFile:Option[String] ) extends Module {
sram.io.clk_i := clk
sram.io.rst_i := rst
- sram.io.csb_i := !io.req.valid
+ sram.io.csb_i := 1.B
sram.io.we_i := DontCare
sram.io.wmask_i := DontCare
sram.io.addr_i := DontCare
@@ -47,23 +47,32 @@ class SRamTop(val programFile:Option[String] ) extends Module {
when(io.req.valid && !io.req.bits.isWrite) {
// READ
// rdata := mem.read(io.req.bits.addrRequest/4.U)
+ //validReg := true.B
+ sram.io.csb_i := false.B
sram.io.we_i := true.B
sram.io.addr_i := io.req.bits.addrRequest
-
+ io.rsp.valid := true.B
rdata := sram.io.rdata_o
} .elsewhen(io.req.valid && io.req.bits.isWrite) {
// WRITE
// mem.write(io.req.bits.addrRequest/4.U, wdata, mask)
// validReg := true.B
// rdata map (_ := DontCare)
+ sram.io.csb_i := false.B
sram.io.we_i := false.B
sram.io.wmask_i := io.req.bits.activeByteLane
sram.io.addr_i := io.req.bits.addrRequest
sram.io.wdata_i := io.req.bits.dataRequest
+ io.rsp.valid := false.B
+ //validReg := false.B
rdata := DontCare
+ //req_valid := false.B
} .otherwise {
+ io.rsp.valid := false.B
+ //validReg := false.B
// rdata map (_ := DontCare)
rdata := DontCare
+ //req_valid := true.B
}
io.rsp.bits.dataResponse := sram.io.rdata_o
diff --git a/src/main/scala/csr/CSRBundles.scala b/src/main/scala/csr/CSRBundles.scala
index b5c309ebb..3d736f699 100644
--- a/src/main/scala/csr/CSRBundles.scala
+++ b/src/main/scala/csr/CSRBundles.scala
@@ -40,11 +40,11 @@ class CSR_IO extends Bundle{
val i_w_en = Input(Bool())
}
-class CSRRegFileIO extends Bundle{
+class CSRRegFileIO(F: Boolean) extends Bundle{
val MISA = new MISA()
val MHARTID = new MHARTID()
val MARCHID = new MARCHID()
val CSR = new CSR_IO()
- val FCSR = new FCSR()
+ val FCSR = if (F) Some(new FCSR()) else None
val MINSTRET = new MINSTRET()
}
diff --git a/src/main/scala/csr/CSRRegFile.scala b/src/main/scala/csr/CSRRegFile.scala
index 086cb339f..ac3ed9710 100644
--- a/src/main/scala/csr/CSRRegFile.scala
+++ b/src/main/scala/csr/CSRRegFile.scala
@@ -12,8 +12,8 @@ case class CSROperations(
val CLEAR :UInt = 3.U(2.W)
)
-class CSRRegFile extends Module{
- val io = IO(new CSRRegFileIO)
+class CSRRegFile(F: Boolean) extends Module{
+ val io = IO(new CSRRegFileIO(F))
/***************** Initializations *****************/
// Registers
@@ -64,12 +64,12 @@ class CSRRegFile extends Module{
val TIMEH_REG = RegInit(0.U(32.W))
//FCSR
- val FCSR_NX_REG = RegInit(0.B)
- val FCSR_UF_REG = RegInit(0.B)
- val FCSR_OF_REG = RegInit(0.B)
- val FCSR_DZ_REG = RegInit(0.B)
- val FCSR_NV_REG = RegInit(0.B)
- val FCSR_FRM_REG = RegInit(0.U(3.W))
+ val FCSR_NX_REG = if (F) Some(RegInit(0.B)) else None
+ val FCSR_UF_REG = if (F) Some(RegInit(0.B)) else None
+ val FCSR_OF_REG = if (F) Some(RegInit(0.B)) else None
+ val FCSR_DZ_REG = if (F) Some(RegInit(0.B)) else None
+ val FCSR_NV_REG = if (F) Some(RegInit(0.B)) else None
+ val FCSR_FRM_REG = if (F) Some(RegInit(0.U(3.W))) else None
/***************************************************/
@@ -78,21 +78,23 @@ class CSRRegFile extends Module{
MHARTID_REG := io.MHARTID.i_value
MARCHID_REG := io.MARCHID.i_value
- Vector(
- FCSR_NX_REG,
- FCSR_UF_REG,
- FCSR_OF_REG,
- FCSR_DZ_REG,
- FCSR_NV_REG
- ).zipWithIndex.foreach(
- f => f._1 := f._1 | io.FCSR.except(f._2)
- )
- io.FCSR.nx := FCSR_NX_REG
- io.FCSR.uf := FCSR_UF_REG
- io.FCSR.of := FCSR_OF_REG
- io.FCSR.dz := FCSR_DZ_REG
- io.FCSR.nv := FCSR_NV_REG
- io.FCSR.frm := FCSR_FRM_REG
+ if (F) {
+ Vector(
+ FCSR_NX_REG.get,
+ FCSR_UF_REG.get,
+ FCSR_OF_REG.get,
+ FCSR_DZ_REG.get,
+ FCSR_NV_REG.get
+ ).zipWithIndex.foreach(
+ f => f._1 := f._1 | io.FCSR.get.except(f._2)
+ )
+ io.FCSR.get.nx := FCSR_NX_REG.get
+ io.FCSR.get.uf := FCSR_UF_REG.get
+ io.FCSR.get.of := FCSR_OF_REG.get
+ io.FCSR.get.dz := FCSR_DZ_REG.get
+ io.FCSR.get.nv := FCSR_NV_REG.get
+ io.FCSR.get.frm := FCSR_FRM_REG.get
+ }
// Wires
val w_data = Wire(UInt(32.W))
@@ -105,24 +107,24 @@ class CSRRegFile extends Module{
val MTVEC_MODE_WIRE = WireInit(MTVEC_REG(1,0))
val MTVEC_BASE_WIRE = WireInit(MTVEC_REG(31,2))
val MCOUNTINHIBIT_WIRE = WireInit(Cat("b0".U(29.W),MCOUNTINHIBIT_IR_REG, "b0".U(1.W), MCOUNTINHIBIT_CY_REG))
- val FFLAGS_WIRE = WireInit(Cat(
+ val FFLAGS_WIRE = if (F) Some(WireInit(Cat(
"b0".U(27.W),
- FCSR_NV_REG,
- FCSR_DZ_REG,
- FCSR_OF_REG,
- FCSR_UF_REG,
- FCSR_NX_REG
- ))
- val FRM_WIRE = WireInit(Cat("b0".U(29.W),FCSR_FRM_REG))
- val FCSR_WIRE = WireInit(Cat(
+ FCSR_NV_REG.get,
+ FCSR_DZ_REG.get,
+ FCSR_OF_REG.get,
+ FCSR_UF_REG.get,
+ FCSR_NX_REG.get
+ ))) else None
+ val FRM_WIRE = if (F) Some(WireInit(Cat("b0".U(29.W),FCSR_FRM_REG.get))) else None
+ val FCSR_WIRE = if (F) Some(WireInit(Cat(
"b0".U(24.W),
- FCSR_FRM_REG,
- FCSR_NV_REG,
- FCSR_DZ_REG,
- FCSR_OF_REG,
- FCSR_UF_REG,
- FCSR_NX_REG
- ))
+ FCSR_FRM_REG.get,
+ FCSR_NV_REG.get,
+ FCSR_DZ_REG.get,
+ FCSR_OF_REG.get,
+ FCSR_UF_REG.get,
+ FCSR_NX_REG.get
+ ))) else None
val csr_opr = CSROperations()
/***************************************************/
@@ -169,9 +171,9 @@ class CSRRegFile extends Module{
AddressMap.MTVAL -> MTVAL_REG,
AddressMap.MIE -> MIE_WIRE,
AddressMap.MIP -> MIP_WIRE,
- AddressMap.FFLAGS -> FFLAGS_WIRE,
- AddressMap.FRM -> FRM_WIRE,
- AddressMap.FCSR -> FCSR_WIRE,
+ //AddressMap.FFLAGS -> FFLAGS_WIRE,
+ //AddressMap.FRM -> FRM_WIRE,
+ //AddressMap.FCSR -> FCSR_WIRE,
AddressMap.MCYCLE -> MCYCLE_REG,
AddressMap.MCYCLEH -> MCYCLEH_REG,
AddressMap.MINSTRET-> MINSTRET_REG,
@@ -179,7 +181,11 @@ class CSRRegFile extends Module{
AddressMap.MCOUNTINHIBIT-> MCOUNTINHIBIT_WIRE,
AddressMap.TIME -> TIME_REG,
AddressMap.TIMEH -> TIMEH_REG
- )
+ ) ++ (if (F) Array(
+ AddressMap.FFLAGS -> FFLAGS_WIRE.get,
+ AddressMap.FRM -> FRM_WIRE.get,
+ AddressMap.FCSR -> FCSR_WIRE.get
+ ) else Array())
r_data := MuxLookup(io.CSR.i_addr, DontCare, READ_CASES)
@@ -241,22 +247,28 @@ class CSRRegFile extends Module{
MIP_MSIP_REG := w_data(3)
}
is(AddressMap.FCSR){
- FCSR_NX_REG := w_data(0)
- FCSR_UF_REG := w_data(1)
- FCSR_OF_REG := w_data(2)
- FCSR_DZ_REG := w_data(3)
- FCSR_NV_REG := w_data(4)
- FCSR_FRM_REG := w_data(7,5)
+ if (F) {
+ FCSR_NX_REG.get := w_data(0)
+ FCSR_UF_REG.get := w_data(1)
+ FCSR_OF_REG.get := w_data(2)
+ FCSR_DZ_REG.get := w_data(3)
+ FCSR_NV_REG.get := w_data(4)
+ FCSR_FRM_REG.get := w_data(7,5)
+ }
}
is(AddressMap.FFLAGS){
- FCSR_NX_REG := w_data(0)
- FCSR_UF_REG := w_data(1)
- FCSR_OF_REG := w_data(2)
- FCSR_DZ_REG := w_data(3)
- FCSR_NV_REG := w_data(4)
+ if (F) {
+ FCSR_NX_REG.get := w_data(0)
+ FCSR_UF_REG.get := w_data(1)
+ FCSR_OF_REG.get := w_data(2)
+ FCSR_DZ_REG.get := w_data(3)
+ FCSR_NV_REG.get := w_data(4)
+ }
}
is(AddressMap.FRM){
- FCSR_FRM_REG := w_data(2,0)
+ if (F) {
+ FCSR_FRM_REG.get := w_data(2,0)
+ }
}
is(AddressMap.MCYCLE){
MCYCLE_REG := w_data