From bfd69ea9ab9b1197911ad1645f40cf51a4da018d Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Fri, 25 Jul 2025 15:30:15 +0200 Subject: [PATCH 1/7] Add outstanding hci interface --- Bender.yml | 5 + rtl/common/hci_interfaces.sv | 86 ++++++ rtl/outstanding/hci_outstanding_assign.sv | 45 +++ rtl/outstanding/hci_outstanding_mux.sv | 197 +++++++++++++ rtl/outstanding/hci_outstanding_rob.sv | 165 +++++++++++ rtl/outstanding/hci_outstanding_sink.sv | 294 +++++++++++++++++++ rtl/outstanding/hci_outstanding_source.sv | 340 ++++++++++++++++++++++ 7 files changed, 1132 insertions(+) create mode 100644 rtl/outstanding/hci_outstanding_assign.sv create mode 100644 rtl/outstanding/hci_outstanding_mux.sv create mode 100644 rtl/outstanding/hci_outstanding_rob.sv create mode 100644 rtl/outstanding/hci_outstanding_sink.sv create mode 100644 rtl/outstanding/hci_outstanding_source.sv diff --git a/Bender.yml b/Bender.yml index 9c57081..5da7ad2 100644 --- a/Bender.yml +++ b/Bender.yml @@ -46,6 +46,10 @@ sources: - rtl/ecc/hci_ecc_dec.sv - rtl/ecc/hci_ecc_enc.sv - rtl/ecc/hci_ecc_manager.sv + - rtl/outstanding/hci_outstanding_assign.sv + - rtl/outstanding/hci_outstanding_mux.sv + - rtl/outstanding/hci_outstanding_rob.sv + - rtl/outstanding/hci_outstanding_source.sv - rtl/interco/hci_log_interconnect.sv - rtl/interco/hci_log_interconnect_l2.sv - rtl/interco/hci_new_log_interconnect.sv # `new_XBAR_TCDM` dep. is a private repo @@ -57,6 +61,7 @@ sources: # Level 3 - rtl/core/hci_core_sink.sv - rtl/ecc/hci_ecc_source.sv + - rtl/outstanding/hci_outstanding_sink.sv - rtl/interco/hci_router.sv # Level 4 - rtl/ecc/hci_ecc_interconnect.sv diff --git a/rtl/common/hci_interfaces.sv b/rtl/common/hci_interfaces.sv index 94c19d9..dc77507 100644 --- a/rtl/common/hci_interfaces.sv +++ b/rtl/common/hci_interfaces.sv @@ -1,6 +1,7 @@ /* * hci_interfaces.sv * Francesco Conti + * Marco Bertuletti * * Copyright (C) 2019-2020 ETH Zurich, University of Bologna * Copyright and related rights are licensed under the Solderpad Hardware @@ -207,6 +208,91 @@ interface hci_core_intf ( endinterface // hci_core_intf +interface hci_outstanding_intf ( + input logic clk +); + + import hci_package::*; + + parameter int unsigned DW = hci_package::DEFAULT_DW; /// Data Width + parameter int unsigned AW = hci_package::DEFAULT_AW; /// Address Width + parameter int unsigned BW = hci_package::DEFAULT_BW; /// Width of a "byte" in bits (default 8) + parameter int unsigned UW = hci_package::DEFAULT_UW; /// User Width + parameter int unsigned IW = hci_package::DEFAULT_IW; /// ID Width + + // handshake signals + logic req_valid; + logic req_ready; + logic resp_valid; + logic resp_ready; + + // request phase payload + logic [AW-1:0] req_add; + logic req_wen; // wen=1'b1 for LOAD, wen=1'b0 for STORE + logic [DW-1:0] req_data; + logic [DW/BW-1:0] req_be; + logic [hci_package::iomsb(UW):0] req_user; + logic [hci_package::iomsb(IW):0] req_id; + + // response phase payload + logic [DW-1:0] resp_data; + logic [hci_package::iomsb(UW):0] resp_user; + logic [hci_package::iomsb(IW):0] resp_id; + logic resp_opc; + + modport initiator ( + output req_add, + output req_wen, + output req_data, + output req_be, + output req_user, + output req_id, + output req_valid, + input req_ready, + input resp_data, + input resp_user, + input resp_id, + input resp_opc, + input resp_valid, + output resp_ready + ); + + modport target ( + input req_add, + input req_wen, + input req_data, + input req_be, + input req_user, + input req_id, + input req_valid, + output req_ready, + output resp_data, + output resp_user, + output resp_id, + output resp_opc, + output resp_valid, + input resp_ready + ); + + modport monitor ( + input req_add, + input req_wen, + input req_data, + input req_be, + input req_user, + input req_id, + input req_valid, + input req_ready, + input resp_data, + input resp_user, + input resp_id, + input resp_opc, + input resp_valid, + input resp_ready + ); + +endinterface // hci_outstanding_intf + `ifdef BUILD_DEPRECATED interface hci_mem_intf ( input logic clk diff --git a/rtl/outstanding/hci_outstanding_assign.sv b/rtl/outstanding/hci_outstanding_assign.sv new file mode 100644 index 0000000..b922595 --- /dev/null +++ b/rtl/outstanding/hci_outstanding_assign.sv @@ -0,0 +1,45 @@ +/* + * hci_outstanding_assign.sv + * Marco Bertuletti + * + * Copyright (C) 2019-2020 ETH Zurich, University of Bologna + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +/** + * The **hci_outstanding_assign** module implements a simple assignment for + * HCI-Core streams. + * + */ + +module hci_outstanding_assign + import hwpe_stream_package::*; +( + hci_outstanding_intf.target tcdm_target, + hci_outstanding_intf.initiator tcdm_initiator +); + + assign tcdm_initiator.req_add = tcdm_target.req_add; + assign tcdm_initiator.req_wen = tcdm_target.req_wen; + assign tcdm_initiator.req_data = tcdm_target.req_data; + assign tcdm_initiator.req_be = tcdm_target.req_be; + assign tcdm_initiator.req_user = tcdm_target.req_user; + assign tcdm_initiator.req_id = tcdm_target.req_id; + assign tcdm_initiator.req_valid = tcdm_target.req_valid; + assign tcdm_target.req_ready = tcdm_initiator.req_ready; + + assign tcdm_target.resp_data = tcdm_initiator.resp_data; + assign tcdm_target.resp_user = tcdm_initiator.resp_user; + assign tcdm_target.resp_id = tcdm_initiator.resp_id; + assign tcdm_target.resp_opc = tcdm_initiator.resp_opc; + assign tcdm_target.resp_valid = tcdm_initiator.resp_valid; + assign tcdm_initiator.resp_ready = tcdm_target.resp_ready; + +endmodule // hci_core_assign \ No newline at end of file diff --git a/rtl/outstanding/hci_outstanding_mux.sv b/rtl/outstanding/hci_outstanding_mux.sv new file mode 100644 index 0000000..3da1616 --- /dev/null +++ b/rtl/outstanding/hci_outstanding_mux.sv @@ -0,0 +1,197 @@ +/* + * hci_core_mux_ooo.sv + * Francesco Conti + * + * Copyright (C) 2017-2023 ETH Zurich, University of Bologna + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +/** + * The **HCI dynamic OoO N-to-1 multiplexer** enables to funnel multiple HCI ports + * into a single one. It supports out-of-order responses by means of ID. + * As the ID is implemented as user signal, any FIFO coming after (i.e., + * nearer to memory side) with respect to this block must respect id + * signals - specifically it must return them identical in the response. + * At the end of the chain, there will typically be a `hci_core_r_id_filter` + * block reflecting back all the IDs. This must be placed at the 0-latency + * boundary with the memory system. + * Priority is normally round-robin but can also be forced from the outside + * by setting `priority_force_i` to 1 and driving the `priority_i` array + * to the desired priority values. + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_core_mux_ooo_params: + * .. table:: **hci_core_mux_ooo** design-time parameters. + * + * +------------+-------------+--------------------------------+ + * | **Name** | **Default** | **Description** | + * +------------+-------------+--------------------------------+ + * | *NB_CHAN* | 2 | Number of input HCI channels. | + * +------------+-------------+--------------------------------+ + * + */ + +`include "hci_helpers.svh" + +module hci_outstanding_mux + import hwpe_stream_package::*; + import hci_package::*; +#( + parameter int unsigned NB_CHAN = 2, + parameter hci_size_parameter_t `HCI_SIZE_PARAM(out) = '0 +) +( + input logic clk_i, + input logic rst_ni, + input logic clear_i, + + input logic priority_force_i, + input logic [NB_CHAN-1:0][$clog2(NB_CHAN)-1:0] priority_i, + + hci_outstanding_intf.target in [0:NB_CHAN-1], + hci_outstanding_intf.initiator out +); + + localparam int unsigned DW = `HCI_SIZE_GET_DW(out); + localparam int unsigned BW = `HCI_SIZE_GET_BW(out); + localparam int unsigned AW = `HCI_SIZE_GET_AW(out); + localparam int unsigned UW = `HCI_SIZE_GET_UW(out); + localparam int unsigned IW = `HCI_SIZE_GET_IW(out); + + // tcdm ports binding + logic [NB_CHAN-1:0] in_req_valid; + logic [NB_CHAN-1:0][AW-1:0] in_req_add; + logic [NB_CHAN-1:0] in_req_wen; + logic [NB_CHAN-1:0][DW-1:0] in_req_data; + logic [NB_CHAN-1:0][DW/BW-1:0] in_req_be; + logic [NB_CHAN-1:0][hci_package::iomsb(UW):0] in_req_user; + logic [NB_CHAN-1:0][hci_package::iomsb(IW):0] in_req_id; + + logic [NB_CHAN-1:0] in_resp_valid; + logic [NB_CHAN-1:0] in_resp_ready; + + logic [$clog2(NB_CHAN)-1:0] rr_counter_q; + logic [NB_CHAN-1:0][$clog2(NB_CHAN)-1:0] rr_priority_d; + logic [$clog2(NB_CHAN)-1:0] winner_d, winner_q; + + logic rr_counter_en_d, rr_counter_en_q; + assign rr_counter_en_d = out.req_valid & out.req_ready; + + logic any_req_q; + + // round-robin counter + always_ff @(posedge clk_i, negedge rst_ni) + begin : round_robin_counter + if(rst_ni == 1'b0) begin + rr_counter_q <= '0; + end + else if (clear_i == 1'b1) begin + rr_counter_q <= '0; + end + else if (rr_counter_en_d) begin + if (rr_counter_q == NB_CHAN-1) + rr_counter_q <= '0; + else + rr_counter_q <= (rr_counter_q + {{($clog2(NB_CHAN)-1){1'b0}},1'b1}); + end + end + + // keep previous winner in case of no-gnt + always_ff @(posedge clk_i, negedge rst_ni) + begin : winner_reg + if(rst_ni == 1'b0) begin + winner_q <= '0; + end + else if (clear_i == 1'b1) begin + winner_q <= '0; + end + else begin + winner_q <= winner_d; + end + end + + // keep track of round-robin counter updates (= output handshakes) to enable WTA circuit + always_ff @(posedge clk_i, negedge rst_ni) + begin : rr_counter_en_reg + if(rst_ni == 1'b0) begin + rr_counter_en_q <= '0; + end + else if (clear_i == 1'b1) begin + rr_counter_en_q <= '0; + end + else begin + rr_counter_en_q <= rr_counter_en_d; + end + end + + // keep track of any input requests to enable WTA circuit + always_ff @(posedge clk_i, negedge rst_ni) + begin : any_req_reg + if(rst_ni == 1'b0) begin + any_req_q <= '0; + end + else if (clear_i == 1'b1) begin + any_req_q <= '0; + end + else begin + any_req_q <= |(in_req_valid); + end + end + + for(genvar ii=0; ii + * + * Copyright (C) 2014-2022 ETH Zurich, University of Bologna + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +/** + * The **hci_core_sink** module is the high-level sink streamer + * performing a series of stores on a HCI-Core interface + * from an incoming HWPE-Stream data stream from a HWPE engine/datapath. + * The sink streamer is a composite module that makes use of many other + * fundamental IPs. + * + * Fundamentally, a sink streamer acts as a specialized DMA engine acting + * out a predefined pattern from an **hwpe_stream_addressgen_v3** to perform + * a burst of stores via a HCI-Core interface, consuming a HWPE-Stream data + * stream into the HCI-Core `data` field. + * The sink streamer is insensitive to memory latency. + * This is due to the nature of store streams, which are unidirectional + * (i.e. `addr` and `data` move in the same direction). + * + * Misaligned accesses are supported by widening the HCI-Core data width of 32 + * bits compared to the HWPE-Stream that gets consumed by the streamer. + * The stream is shifted according to the address alignment and invalid bytes + * are disabled by unsetting their `strb`. This feature can be deactivated by + * unsetting the `MISALIGNED_ACCESS` parameter; in this case, the sink will + * only work correctly if all data is aligned to a word boundary. + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_core_sink_params: + * .. table:: **hci_core_sink** design-time parameters. + * + * +---------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ + * | **Name** | **Default** | **Description** | + * +---------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ + * | *TCDM_FIFO_DEPTH* | 2 | If >0, the module produces a HWPE-MemDecoupled interface and includes a TCDM FIFO of this depth. | + * +---------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ + * | *TRANS_CNT* | 16 | Number of bits supported in the transaction counter of the address generator, which will overflow at 2^ `TRANS_CNT`. | + * +---------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ + * | *MISALIGNED_ACCESS* | 1 | If set to 0, the sink will not support non-word-aligned HWPE-Mem accesses. | + * +---------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_core_sink_ctrl: + * .. table:: **hci_core_sink** input control signals. + * + * +-------------------+------------------------+----------------------------------------------------------------------------+ + * | **Name** | **Type** | **Description** | + * +-------------------+------------------------+----------------------------------------------------------------------------+ + * | *req_start* | `logic` | When 1, the sink streamer operation is started if it is ready. | + * +-------------------+------------------------+----------------------------------------------------------------------------+ + * | *addressgen_ctrl* | `ctrl_addressgen_v3_t` | Configuration of the address generator (see **hwpe_stream_addresgen_v3**). | + * +-------------------+------------------------+----------------------------------------------------------------------------+ + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_core_sink_flags: + * .. table:: **hci_core_sink** output flags. + * + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | **Name** | **Type** | **Description** | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | *ready_start* | `logic` | 1 when the sink streamer is ready to start operation, from the first IDLE state cycle on. | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | *done* | `logic` | 1 for one cycle when the streamer ends operation, in the cycle before it goes to IDLE state . | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | *addressgen_flags* | `flags_addressgen_v3_t`| Address generator flags (see **hwpe_stream_addresgen_v3**). | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * + */ +`include "hci_helpers.svh" + +module hci_outstanding_sink + import hwpe_stream_package::*; + import hci_package::*; +#( + // Stream interface params + parameter int unsigned TRANS_CNT = 16, + parameter int unsigned MISALIGNED_ACCESSES = 1, + parameter hci_size_parameter_t `HCI_SIZE_PARAM(tcdm) = '0 +) +( + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + input logic clear_i, + input logic enable_i, + + hci_outstanding_intf.initiator tcdm, + hwpe_stream_intf_stream.sink stream, + + // control plane + input hci_streamer_ctrl_t ctrl_i, + output hci_streamer_flags_t flags_o +); + + localparam int unsigned DATA_WIDTH = `HCI_SIZE_GET_DW(tcdm); + + hci_streamer_state_t cs, ns; + flags_fifo_t addr_fifo_flags; + + logic address_gen_en; + logic address_gen_clr; + logic done; + + logic tcdm_inflight; + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( 36 ) + ) addr_push ( + .clk ( clk_i ) + ); + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( 36 ) + ) addr_pop ( + .clk ( clk_i ) + ); + + hci_outstanding_intf #( + .DW( DATA_WIDTH ), + .AW( DEFAULT_AW ), + .BW( DEFAULT_BW ), + .UW( DEFAULT_UW ), + .IW( DEFAULT_IW ) + ) tcdm_target ( .clk ( clk_i ) ); + + hwpe_stream_addressgen_v3 i_addressgen ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .enable_i ( address_gen_en ), + .clear_i ( address_gen_clr ), + .presample_i ( ctrl_i.req_start ), + .addr_o ( addr_push ), + .ctrl_i ( ctrl_i.addressgen_ctrl ), + .flags_o ( flags_o.addressgen_flags ) + ); + + hwpe_stream_fifo #( + .DATA_WIDTH ( 36 ), + .FIFO_DEPTH ( 2 ) + ) i_fifo_addr ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( addr_fifo_flags ), + .push_i ( addr_push ), + .pop_o ( addr_pop ) + ); + + logic address_cnt_en, address_cnt_clr; + logic [TRANS_CNT-1:0] address_cnt_d, address_cnt_q; + + logic [DATA_WIDTH-1:0] stream_data_misaligned; + logic [DATA_WIDTH/8-1:0] stream_strb_misaligned; + logic [DATA_WIDTH-1:0] stream_data_aligned; + logic [DATA_WIDTH/8-1:0] stream_strb_aligned; + + assign stream_data_misaligned = stream.data; + assign stream_strb_misaligned = stream.strb; + + if (MISALIGNED_ACCESSES==1 ) begin : missaligned_access_gen + always_comb + begin + stream_data_aligned = '0; + stream_strb_aligned = '0; + case(addr_pop.data[1:0]) + 2'b00: begin + stream_data_aligned[DATA_WIDTH-32-1:0] = stream_data_misaligned[DATA_WIDTH-32-1:0]; + stream_strb_aligned[(DATA_WIDTH-32)/8-1:0] = stream_strb_misaligned[(DATA_WIDTH-32)/8-1:0]; + end + 2'b01: begin + stream_data_aligned[DATA_WIDTH-24-1:8] = stream_data_misaligned[DATA_WIDTH-32-1:0]; + stream_strb_aligned[(DATA_WIDTH-24)/8-1:1] = stream_strb_misaligned[(DATA_WIDTH-32)/8-1:0]; + end + 2'b10: begin + stream_data_aligned[DATA_WIDTH-16-1:16] = stream_data_misaligned[DATA_WIDTH-32-1:0]; + stream_strb_aligned[(DATA_WIDTH-16)/8-1:2] = stream_strb_misaligned[(DATA_WIDTH-32)/8-1:0]; + end + 2'b11: begin + stream_data_aligned[DATA_WIDTH-8-1:24] = stream_data_misaligned[DATA_WIDTH-32-1:0]; + stream_strb_aligned[(DATA_WIDTH-8)/8-1:3] = stream_strb_misaligned[(DATA_WIDTH-32)/8-1:0]; + end + endcase + end + end + else begin + assign stream_data_aligned[DATA_WIDTH-1:0] = stream_data_misaligned[DATA_WIDTH-1:0]; + assign stream_strb_aligned[DATA_WIDTH/8-1:0] = stream_strb_misaligned[DATA_WIDTH/8-1:0]; + end + + // hci port binding + assign tcdm_target.req_valid = (cs != STREAMER_IDLE) ? stream.valid & addr_pop.valid : '0; + assign tcdm_target.req_add = (cs != STREAMER_IDLE) ? {addr_pop.data[31:2],2'b0} : '0; + assign tcdm_target.req_wen = '0; + assign tcdm_target.req_be = (cs != STREAMER_IDLE) ? stream_strb_aligned : '0; + assign tcdm_target.req_data = (cs != STREAMER_IDLE) ? stream_data_aligned : '0; + assign tcdm_target.resp_ready = '1; + assign stream.ready = ~stream.valid | (tcdm_target.req_ready & addr_pop.valid); + assign addr_pop.ready = stream.valid & stream.ready; + + // unimplemented user bits = 0 + assign tcdm_target.req_user = '0; + + // unimplemented id bits = 0 + assign tcdm_target.req_id = '0; + + hci_outstanding_assign i_tcdm_assign ( + .tcdm_target ( tcdm_target ), + .tcdm_initiator ( tcdm ) + ); + + assign tcdm_inflight = tcdm.req_valid; + + always_ff @(posedge clk_i or negedge rst_ni) + begin : done_sink_ff + if(~rst_ni) + flags_o.done <= 1'b0; + else if(clear_i) + flags_o.done <= 1'b0; + else if(enable_i) + flags_o.done <= done; + end + + always_ff @(posedge clk_i, negedge rst_ni) + begin : fsm_seq + if(rst_ni == 1'b0) begin + cs <= STREAMER_IDLE; + end + else if(clear_i == 1'b1) begin + cs <= STREAMER_IDLE; + end + else if(enable_i) begin + cs <= ns; + end + end + + always_comb + begin : fsm_comb + ns = cs; + done = 1'b0; + flags_o.ready_start = 1'b0; + address_gen_en = 1'b0; + address_gen_clr = clear_i; + address_cnt_clr = 1'b0; + case(cs) + STREAMER_IDLE : begin + flags_o.ready_start = 1'b1; + if(ctrl_i.req_start) begin + ns = STREAMER_WORKING; + address_gen_en = 1'b1; + end + end + STREAMER_WORKING : begin + address_gen_en = 1'b1; + if(flags_o.addressgen_flags.done) begin + ns = STREAMER_DONE; + end + end + STREAMER_DONE : begin + address_gen_en = 1'b1; + if(address_cnt_q==ctrl_i.addressgen_ctrl.tot_len) begin + ns = STREAMER_IDLE; + done = 1'b1; + address_gen_en = 1'b0; + address_gen_clr = 1'b1; + address_cnt_clr = 1'b1; + end + end + endcase + end + + assign address_cnt_en = addr_pop.valid & addr_pop.ready; + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if(~rst_ni) + address_cnt_q <= '0; + else if(clear_i | address_cnt_clr) + address_cnt_q <= '0; + else if(enable_i & address_cnt_en) + address_cnt_q <= address_cnt_d; + end + assign address_cnt_d = address_cnt_q + 1; + +endmodule // hci_core_sink diff --git a/rtl/outstanding/hci_outstanding_source.sv b/rtl/outstanding/hci_outstanding_source.sv new file mode 100644 index 0000000..ae9266a --- /dev/null +++ b/rtl/outstanding/hci_outstanding_source.sv @@ -0,0 +1,340 @@ +/* + * hci_core_source.sv + * Francesco Conti + * Arpan Suravi Prasad + * Marco Bertuletti + * + * Copyright (C) 2014-2022 ETH Zurich, University of Bologna + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +/** + * The **hci_core_source** module is the high-level source streamer + * performing a series of loads on a HCI-Core interface + * and producing a HWPE-Stream data stream to feed a HWPE engine/datapath. + * The source streamer is a composite module that makes use of many other + * fundamental IPs. + * + * Fundamentally, a source streamer acts as a specialized DMA engine acting + * out a predefined pattern from an **hwpe_stream_addressgen_v3** to perform + * a burst of loads via a HCI-Core interface, producing a HWPE-Stream + * data stream from the HCI-Core `r_data` field. + * By default, the HCI-Core streamer supports delayed accesses using a HCI-Core + * interface. + * + * Misaligned accesses are supported by widening the HCI-Core data width of 32 + * bits compared to the HWPE-Stream that gets produced by the streamer. + * Unused bytes are simply ignored. This feature can be deactivated by unsetting + * the `MISALIGNED_ACCESS` parameter; in this case, the sink will + * only work correctly if all data is aligned to a word boundary. + * + * In principle, the source streamer is insensitive to latency. + * However, when configured to support misaligned memory accesses, the address FIFO + * depth sets the maximum supported latency. + * This parameter can be controlled by the `ADDR_MIS_DEPTH` parameter (default 8). + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_core_source_params: + * .. table:: **hci_core_source** design-time parameters. + * + * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | **Name** | **Default** | **Description** | + * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | *LATCH_FIFO* | 0 | If 1, use latches instead of flip-flops (requires special constraints in synthesis). | + * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | *TRANS_CNT* | 16 | Number of bits supported in the transaction counter of the address generator, which will overflow at 2^ `TRANS_CNT`. | + * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | *ADDR_MIS_DEPTH* | 8 | Depth of the misaligned address FIFO. This **must** be equal to the max-latency between the HCI-Core `gnt` and `r_valid`.| + * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | *MISALIGNED_ACCESS* | 1 | If set to 0, the source will not support non-word-aligned HCI-Core accesses. | + * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * | *PASSTHROUGH_FIFO* | 0 | If set to 1, the address FIFO will be capable of fall-through operation (i.e., skipping the FIFO latency entirely). | + * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_core_source_ctrl: + * .. table:: **hci_core_source** input control signals. + * + * +-------------------+------------------------+----------------------------------------------------------------------------+ + * | **Name** | **Type** | **Description** | + * +-------------------+------------------------+----------------------------------------------------------------------------+ + * | *req_start* | `logic` | When 1, the source streamer operation is started if it is ready. | + * +-------------------+------------------------+----------------------------------------------------------------------------+ + * | *addressgen_ctrl* | `ctrl_addressgen_v3_t` | Configuration of the address generator (see **hwpe_stream_addresgen_v3**). | + * +-------------------+------------------------+----------------------------------------------------------------------------+ + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_core_source_flags: + * .. table:: **hci_core_source** output flags. + * + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | **Name** | **Type** | **Description** | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | *ready_start* | `logic` | 1 when the source streamer is ready to start operation, from the first IDLE state cycle on. | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | *done* | `logic` | 1 for one cycle when the streamer ends operation, in the cycle before it goes to IDLE state . | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * | *addressgen_flags* | `flags_addressgen_v3_t`| Address generator flags (see **hwpe_stream_addresgen_v3**). | + * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ + * + */ + +`include "hci_helpers.svh" + +module hci_outstanding_source + import hwpe_stream_package::*; + import hci_package::*; +#( + // Stream interface params + parameter int unsigned LATCH_FIFO = 0, + parameter int unsigned TRANS_CNT = 16, + parameter int unsigned ADDR_MIS_DEPTH = 8, // Beware: this must be >= the maximum latency between TCDM gnt and TCDM r_valid!!! + parameter int unsigned MISALIGNED_ACCESSES = 1, + parameter int unsigned PASSTHROUGH_FIFO = 0, + parameter hci_size_parameter_t `HCI_SIZE_PARAM(tcdm) = '0 +) +( + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + input logic clear_i, + input logic enable_i, + + hci_outstanding_intf.initiator tcdm, + hwpe_stream_intf_stream.source stream, + + // control plane + input hci_streamer_ctrl_t ctrl_i, + output hci_streamer_flags_t flags_o +); + + localparam int unsigned DATA_WIDTH = `HCI_SIZE_GET_DW(tcdm); + localparam int unsigned EHW = `HCI_SIZE_GET_EHW(tcdm); + + logic stream_cnt_en, stream_cnt_clr; + logic [TRANS_CNT-1:0] stream_cnt_d, stream_cnt_q; + + hci_streamer_state_t cs, ns; + flags_fifo_t addr_fifo_flags; + + logic done; + logic address_gen_en; + logic address_gen_clr; + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( 32 ) + ) addr_push ( + .clk ( clk_i ) + ); + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( 32 ) + ) addr_pop ( + .clk ( clk_i ) + ); + + // generate addresses + hwpe_stream_addressgen_v3 i_addressgen ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .enable_i ( address_gen_en ), + .clear_i ( address_gen_clr ), + .presample_i ( ctrl_i.req_start ), + .addr_o ( addr_push ), + .ctrl_i ( ctrl_i.addressgen_ctrl ), + .flags_o ( flags_o.addressgen_flags ) + ); + + if (PASSTHROUGH_FIFO) begin : passthrough_gen + hwpe_stream_fifo_passthrough #( + .DATA_WIDTH ( 36 ), + .FIFO_DEPTH ( 2 ) + ) i_fifo_addr ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( addr_fifo_flags ), + .push_i ( addr_push ), + .pop_o ( addr_pop ) + ); + end + else begin : nopassthrough_gen + hwpe_stream_fifo #( + .DATA_WIDTH ( 36 ), + .FIFO_DEPTH ( 2 ) + ) i_fifo_addr ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( addr_fifo_flags ), + .push_i ( addr_push ), + .pop_o ( addr_pop ) + ); + end + + // Align the addresses + logic addr_misaligned; + logic [DATA_WIDTH-1:0] stream_data_misaligned; + logic [DATA_WIDTH-1:0] stream_data_aligned; + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( 8 ) // only 2 significant + ) addr_misaligned_push ( + .clk ( clk_i ) + ); + hwpe_stream_intf_stream #( + .DATA_WIDTH ( 8 ) // only 2 significant + ) addr_misaligned_pop ( + .clk ( clk_i ) + ); + assign addr_misaligned_push.data = {6'b0, addr_pop.data[1:0]}; + assign addr_misaligned_push.strb = '1; + assign addr_misaligned_push.valid = enable_i & (tcdm.req_valid & tcdm.req_ready); + assign addr_misaligned_pop.ready = (tcdm.resp_valid & tcdm.resp_ready); + + hwpe_stream_fifo #( + .DATA_WIDTH ( 8 ), // only [1:0] significant + .FIFO_DEPTH ( ADDR_MIS_DEPTH ) + ) i_addr_misaligned_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( ), + .push_i ( addr_misaligned_push ), + .pop_o ( addr_misaligned_pop ) + ); + + // this is simply exploiting the fact that we can make a wider data access than strictly necessary! + assign addr_misaligned = (tcdm.resp_valid & tcdm.resp_ready) ? addr_misaligned_pop.data[1:0] : '0; + assign stream_data_misaligned = (tcdm.resp_valid & tcdm.resp_ready) ? tcdm.resp_data : '0; + + if (MISALIGNED_ACCESSES==1 ) begin : misaligned_access_gen + always_comb + begin + stream_data_aligned = '0; + case(addr_misaligned) + 2'b00: begin + stream_data_aligned[DATA_WIDTH-1:0] = stream_data_misaligned[DATA_WIDTH-1:0]; + end + 2'b01: begin + stream_data_aligned[DATA_WIDTH-32-1:0] = stream_data_misaligned[DATA_WIDTH-24-1:8]; + end + 2'b10: begin + stream_data_aligned[DATA_WIDTH-32-1:0] = stream_data_misaligned[DATA_WIDTH-16-1:16]; + end + 2'b11: begin + stream_data_aligned[DATA_WIDTH-32-1:0] = stream_data_misaligned[DATA_WIDTH-8-1:24]; + end + endcase + end + end + else begin + assign stream_data_aligned[DATA_WIDTH-1:0] = stream_data_misaligned[DATA_WIDTH-1:0]; + end + + logic streamer_ready_q, streamer_ready_d; + + always_ff @(posedge clk_i, negedge rst_ni) begin + if(rst_ni == 1'b0) begin + streamer_ready_q <= 1'b0; + end else begin + streamer_ready_q <= streamer_ready_d; + end + end + + always_comb begin + streamer_ready_d = streamer_ready_q; + if (stream.ready) begin + streamer_ready_d = 1'b1; + end else begin + if (streamer_ready_q) begin + streamer_ready_d = !addr_pop.valid | (addr_pop.valid & tcdm.req_ready); + end + end + end + + assign tcdm.resp_ready = stream.ready; + assign tcdm.req_valid = (cs != STREAMER_IDLE) ? addr_pop.valid : '0; + assign tcdm.req_add = (cs != STREAMER_IDLE) ? {addr_pop.data[31:2],2'b0} : '0; + assign tcdm.req_wen = 1'b1; + assign tcdm.req_be = 4'h0; + assign tcdm.req_data = '0; + assign tcdm.req_user = '0; + assign tcdm.req_id = '0; + assign stream.strb = '1; + assign stream.data = stream_data_aligned; + assign stream.valid = enable_i & (tcdm.resp_valid & tcdm.resp_ready); + assign addr_pop.ready = (cs != STREAMER_IDLE) ? addr_misaligned_push.ready & (tcdm.req_valid & tcdm.req_ready) : 1'b0; + + + always_ff @(posedge clk_i, negedge rst_ni) + begin : fsm_seq + if(rst_ni == 1'b0) begin + cs <= STREAMER_IDLE; + end + else if(clear_i == 1'b1) begin + cs <= STREAMER_IDLE; + end + else if(enable_i) begin + cs <= ns; + end + end + + always_comb + begin : fsm_comb + ns = cs; + done = 1'b0; + flags_o.ready_start = 1'b0; + flags_o.done = 1'b0; + address_gen_en = 1'b0; + address_gen_clr = clear_i; + stream_cnt_clr = 1'b0; + case(cs) + STREAMER_IDLE : begin + flags_o.ready_start = 1'b1; + if(ctrl_i.req_start) begin + ns = STREAMER_WORKING; + address_gen_en = 1'b1; + end + end + STREAMER_WORKING : begin + address_gen_en = 1'b1; + if(flags_o.addressgen_flags.done) begin + ns = STREAMER_DONE; + end + end + STREAMER_DONE : begin + address_gen_en = 1'b1; + if((addr_fifo_flags.empty==1'b1) && (stream_cnt_q==ctrl_i.addressgen_ctrl.tot_len)) begin + ns = STREAMER_IDLE; + flags_o.done = 1'b1; + done = 1'b1; + address_gen_en = 1'b0; + address_gen_clr = 1'b1; + stream_cnt_clr = 1'b1; + end + end + endcase + end + + assign stream_cnt_en = stream.valid & stream.ready; + + always_ff @(posedge clk_i or negedge rst_ni) + begin + if(~rst_ni) + stream_cnt_q <= '0; + else if(clear_i | stream_cnt_clr) + stream_cnt_q <= '0; + else if(enable_i & stream_cnt_en) + stream_cnt_q <= stream_cnt_d; + end + assign stream_cnt_d = stream_cnt_q + 1; + +endmodule // hci_core_source From 22163ab81a946d28fa76d1110d9c36b508a786b9 Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Mon, 25 Aug 2025 08:06:55 +0200 Subject: [PATCH 2/7] Fix outstanding mux handshake --- rtl/outstanding/hci_outstanding_mux.sv | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rtl/outstanding/hci_outstanding_mux.sv b/rtl/outstanding/hci_outstanding_mux.sv index 3da1616..d0ddedb 100644 --- a/rtl/outstanding/hci_outstanding_mux.sv +++ b/rtl/outstanding/hci_outstanding_mux.sv @@ -73,8 +73,6 @@ module hci_outstanding_mux logic [NB_CHAN-1:0][DW/BW-1:0] in_req_be; logic [NB_CHAN-1:0][hci_package::iomsb(UW):0] in_req_user; logic [NB_CHAN-1:0][hci_package::iomsb(IW):0] in_req_id; - - logic [NB_CHAN-1:0] in_resp_valid; logic [NB_CHAN-1:0] in_resp_ready; logic [$clog2(NB_CHAN)-1:0] rr_counter_q; @@ -160,7 +158,7 @@ module hci_outstanding_mux assign in[ii].resp_opc = out.resp_opc; assign in[ii].resp_user = out.resp_user; assign in[ii].resp_id = out.resp_id; - assign in[ii].resp_valid = (out.resp_id == ii) ? (out.resp_valid & in[ii].resp_ready) : 1'b0; + assign in[ii].resp_valid = (out.resp_id == ii) ? out.resp_valid : 1'b0; assign in_resp_ready [ii] = in[ii].resp_ready; // assign priorities to each port depending on round-robin counter From 8c53ebef743a1c343f93a337c856340773627b93 Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Mon, 25 Aug 2025 08:07:26 +0200 Subject: [PATCH 3/7] Add FIFO for retired outstanding responses --- rtl/outstanding/hci_outstanding_source.sv | 63 +++++++++++++---------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/rtl/outstanding/hci_outstanding_source.sv b/rtl/outstanding/hci_outstanding_source.sv index ae9266a..eef2469 100644 --- a/rtl/outstanding/hci_outstanding_source.sv +++ b/rtl/outstanding/hci_outstanding_source.sv @@ -212,8 +212,8 @@ module hci_outstanding_source ); // this is simply exploiting the fact that we can make a wider data access than strictly necessary! - assign addr_misaligned = (tcdm.resp_valid & tcdm.resp_ready) ? addr_misaligned_pop.data[1:0] : '0; - assign stream_data_misaligned = (tcdm.resp_valid & tcdm.resp_ready) ? tcdm.resp_data : '0; + assign addr_misaligned = tcdm.resp_valid ? addr_misaligned_pop.data[1:0] : '0; + assign stream_data_misaligned = tcdm.resp_valid ? tcdm.resp_data : '0; if (MISALIGNED_ACCESSES==1 ) begin : misaligned_access_gen always_comb @@ -239,28 +239,8 @@ module hci_outstanding_source assign stream_data_aligned[DATA_WIDTH-1:0] = stream_data_misaligned[DATA_WIDTH-1:0]; end - logic streamer_ready_q, streamer_ready_d; + // HANDSHAKES Request - always_ff @(posedge clk_i, negedge rst_ni) begin - if(rst_ni == 1'b0) begin - streamer_ready_q <= 1'b0; - end else begin - streamer_ready_q <= streamer_ready_d; - end - end - - always_comb begin - streamer_ready_d = streamer_ready_q; - if (stream.ready) begin - streamer_ready_d = 1'b1; - end else begin - if (streamer_ready_q) begin - streamer_ready_d = !addr_pop.valid | (addr_pop.valid & tcdm.req_ready); - end - end - end - - assign tcdm.resp_ready = stream.ready; assign tcdm.req_valid = (cs != STREAMER_IDLE) ? addr_pop.valid : '0; assign tcdm.req_add = (cs != STREAMER_IDLE) ? {addr_pop.data[31:2],2'b0} : '0; assign tcdm.req_wen = 1'b1; @@ -268,11 +248,42 @@ module hci_outstanding_source assign tcdm.req_data = '0; assign tcdm.req_user = '0; assign tcdm.req_id = '0; - assign stream.strb = '1; - assign stream.data = stream_data_aligned; - assign stream.valid = enable_i & (tcdm.resp_valid & tcdm.resp_ready); assign addr_pop.ready = (cs != STREAMER_IDLE) ? addr_misaligned_push.ready & (tcdm.req_valid & tcdm.req_ready) : 1'b0; + // HANDSHAKES Response + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( DATA_WIDTH ) + ) resp_push ( + .clk ( clk_i ) + ); + hwpe_stream_intf_stream #( + .DATA_WIDTH ( DATA_WIDTH ) + ) resp_pop ( + .clk ( clk_i ) + ); + + assign resp_push.data = stream_data_aligned; + assign resp_push.strb = '1; + assign resp_push.valid = enable_i & (tcdm.resp_valid & tcdm.resp_ready); + assign tcdm.resp_ready = resp_push.ready; + + hwpe_stream_fifo #( + .DATA_WIDTH ( DATA_WIDTH ), + .FIFO_DEPTH ( 8 ) + ) i_resp_fifo ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( ), + .push_i ( resp_push ), + .pop_o ( resp_pop ) + ); + + assign stream.data = resp_pop.data; + assign stream.valid = resp_pop.valid; + assign stream.strb = resp_pop.strb; + assign resp_pop.ready = stream.ready; always_ff @(posedge clk_i, negedge rst_ni) begin : fsm_seq From 3d724650425868da40b08157696741169cf84440 Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Thu, 4 Sep 2025 09:38:32 +0200 Subject: [PATCH 4/7] Improve documentation --- rtl/outstanding/hci_outstanding_assign.sv | 4 +- rtl/outstanding/hci_outstanding_mux.sv | 46 +++++++++--- rtl/outstanding/hci_outstanding_rob.sv | 89 +++++++++++++++++------ rtl/outstanding/hci_outstanding_sink.sv | 42 +++++------ rtl/outstanding/hci_outstanding_source.sv | 69 ++++++++---------- 5 files changed, 151 insertions(+), 99 deletions(-) diff --git a/rtl/outstanding/hci_outstanding_assign.sv b/rtl/outstanding/hci_outstanding_assign.sv index b922595..d4ff691 100644 --- a/rtl/outstanding/hci_outstanding_assign.sv +++ b/rtl/outstanding/hci_outstanding_assign.sv @@ -15,7 +15,7 @@ /** * The **hci_outstanding_assign** module implements a simple assignment for - * HCI-Core streams. + * HCI-Outstanding streams. * */ @@ -42,4 +42,4 @@ module hci_outstanding_assign assign tcdm_target.resp_valid = tcdm_initiator.resp_valid; assign tcdm_initiator.resp_ready = tcdm_target.resp_ready; -endmodule // hci_core_assign \ No newline at end of file +endmodule // hci_outstanding_assign diff --git a/rtl/outstanding/hci_outstanding_mux.sv b/rtl/outstanding/hci_outstanding_mux.sv index d0ddedb..290ec99 100644 --- a/rtl/outstanding/hci_outstanding_mux.sv +++ b/rtl/outstanding/hci_outstanding_mux.sv @@ -1,6 +1,6 @@ /* * hci_core_mux_ooo.sv - * Francesco Conti + * Marco Bertuletti * * Copyright (C) 2017-2023 ETH Zurich, University of Bologna * Copyright and related rights are licensed under the Solderpad Hardware @@ -14,21 +14,16 @@ */ /** - * The **HCI dynamic OoO N-to-1 multiplexer** enables to funnel multiple HCI ports - * into a single one. It supports out-of-order responses by means of ID. - * As the ID is implemented as user signal, any FIFO coming after (i.e., + * The **HCI-Outstanding N-to-1 multiplexer** enables to funnel multiple HCI + * ports into a single one. It supports out-of-order responses by means of ID. + * As the ID is implemented as user signal, any module coming after (i.e., * nearer to memory side) with respect to this block must respect id * signals - specifically it must return them identical in the response. - * At the end of the chain, there will typically be a `hci_core_r_id_filter` - * block reflecting back all the IDs. This must be placed at the 0-latency - * boundary with the memory system. - * Priority is normally round-robin but can also be forced from the outside - * by setting `priority_force_i` to 1 and driving the `priority_i` array - * to the desired priority values. + * Priority is normally round-robin. * * .. tabularcolumns:: |l|l|J| - * .. _hci_core_mux_ooo_params: - * .. table:: **hci_core_mux_ooo** design-time parameters. + * .. _hci_outstanding_mux_params: + * .. table:: **hci_outstanding_mux** design-time parameters. * * +------------+-------------+--------------------------------+ * | **Name** | **Default** | **Description** | @@ -192,4 +187,31 @@ module hci_outstanding_mux assign out.req_valid = in_req_valid [winner_d]; assign out.resp_ready = in_resp_ready[out.resp_id]; +/* + * Interface size asserts + */ +`ifndef SYNTHESIS +`ifndef VERILATOR +`ifndef VCS + for(genvar i=0; i= $clog2(NB_CHAN)); + end + + `HCI_OUTSTANDING_SIZE_CHECK_ASSERTS(out); + +`endif +`endif +`endif; + endmodule // hci_outstanding_mux diff --git a/rtl/outstanding/hci_outstanding_rob.sv b/rtl/outstanding/hci_outstanding_rob.sv index b3eaa64..5c18ab7 100644 --- a/rtl/outstanding/hci_outstanding_rob.sv +++ b/rtl/outstanding/hci_outstanding_rob.sv @@ -1,12 +1,37 @@ -// Copyright 2023 ETH Zurich and University of Bologna. -// Licensed under the Apache License, Version 2.0, see LICENSE for details. -// SPDX-License-Identifier: Apache-2.0 -// -// Author: Marco Bertuletti, ETH Zurich -// -// This generic module provides an interface through which responses can -// be read in order, despite being written out of order. The responses -// must be indexed with an ID that identifies it within the ROB. +/* + * hci_outstanding_rob.sv + * Marco Bertuletti + * + * Copyright (C) 2017-2023 ETH Zurich, University of Bologna + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +/** + * The **HCI-Outstanding reorder buffer** issues requests with up to ROB_NW + * unique user-IDs. The responses can be retired out-of-order, by comparing + * the incoming response user-ID with the issued IDs. As the user-ID is + * implemented as user signal, any module coming after (i.e., nearer to memory + * side) with respect to this block must respect user signals - specifically + * it must return them identical in the response. + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_outstanding_rob_params: + * .. table:: **hci_outstanding_mux** design-time parameters. + * + * +------------+-------------+-----------------------------------------------+ + * | **Name** | **Default** | **Description** | + * +------------+-------------+-----------------------------------------------+ + * | *ROB_NW* | 8 | Number of supported outstanding transactions. | + * +------------+-------------+-----------------------------------------------+ + * + */ `include "hci_helpers.svh" @@ -15,8 +40,7 @@ module hci_outstanding_rob import hci_package::*; import cf_math_pkg::idx_width; #( - parameter int unsigned ROB_NW = 0, - parameter bit FallThrough = 1'b0, + parameter int unsigned ROB_NW = 8, // Dependant parameters. Do not change! parameter int unsigned ROB_IW = idx_width(ROB_NW), parameter hci_size_parameter_t `HCI_SIZE_PARAM(out) = '0 @@ -38,8 +62,10 @@ module hci_outstanding_rob logic [ROB_IW-1:0] read_pointer_p, read_pointer_q; logic [ROB_IW-1:0] write_pointer_p, write_pointer_q, resp_write_id; logic [ROB_IW-1:0] status_cnt_p, status_cnt_q; + // Status flags logic full, empty; + // Buffer commands logic push, pop, request_id; @@ -54,27 +80,31 @@ module hci_outstanding_rob assign out.req_wen = in.req_wen; assign out.req_be = in.req_be; assign out.req_data = in.req_data; + // Assign unique ROB ID to the user field assign out.req_user = write_pointer_q; assign out.req_id = in.req_id; assign out.req_valid = !full & in.req_valid; assign in.req_ready = !full & out.req_ready; + // HCI Port Right assignment assign in.resp_data = mem_resp_data_q[read_pointer_q]; assign in.resp_opc = mem_resp_opc_q[read_pointer_q]; assign in.resp_user = '0; + // ROB ID of the incoming response assign in.resp_id = mem_req_id_q[read_pointer_q]; assign in.resp_valid = mem_resp_valid_q[read_pointer_q]; assign out.resp_ready = !empty; // Assign status flags - assign full = (status_cnt_q == ROB_NW-1); - assign empty = (status_cnt_q == 'd0); + assign full = (status_cnt_q == ROB_NW-1); + assign empty = (status_cnt_q == 'd0); + // Assign buffer commands assign request_id = in.req_valid & in.req_ready; - assign pop = mem_resp_valid_q[read_pointer_q] & in.resp_ready; - assign push = out.resp_valid & out.resp_ready; + assign pop = mem_resp_valid_q[read_pointer_q] & in.resp_ready; + assign push = out.resp_valid & out.resp_ready; // Read and Write logic always_comb begin: read_write_comb @@ -83,6 +113,7 @@ module hci_outstanding_rob read_pointer_p = read_pointer_q; write_pointer_p = write_pointer_q; status_cnt_p = status_cnt_q; + // Maintain response queue & initiator_id queue mem_req_id_p = mem_req_id_q; mem_resp_data_p = mem_resp_data_q; @@ -93,11 +124,15 @@ module hci_outstanding_rob if (request_id) begin // Store in the initiator_id queue mem_req_id_p[write_pointer_q] = in.req_id; + // Increment the write pointer - if (write_pointer_q == ROB_NW-1) + if (write_pointer_q == ROB_NW-1) begin write_pointer_p = 0; - else + end + else begin write_pointer_p = write_pointer_q + 1; + end + // Increment the overall counter status_cnt_p = status_cnt_q + 1; end @@ -115,11 +150,15 @@ module hci_outstanding_rob // Word was consumed mem_req_id_p[read_pointer_q] = 1'b0; mem_resp_valid_p[read_pointer_q] = 1'b0; + // Increment the read pointer - if (read_pointer_q == ROB_NW-1) + if (read_pointer_q == ROB_NW-1) begin read_pointer_p = '0; - else + end + else begin read_pointer_p = read_pointer_q + 1; + end + // Decrement the overall counter status_cnt_p = status_cnt_q - 1; end @@ -128,6 +167,7 @@ module hci_outstanding_rob if (request_id && pop) begin status_cnt_p = status_cnt_q; end + end: read_write_comb always_ff @(posedge clk_i or negedge rst_ni) begin @@ -140,15 +180,16 @@ module hci_outstanding_rob mem_resp_data_q <= '0; mem_resp_opc_q <= '0; mem_resp_valid_q <= '0; - end else begin + end + else begin read_pointer_q <= read_pointer_p; write_pointer_q <= write_pointer_p; status_cnt_q <= status_cnt_p; // Memory queues - mem_req_id_q <= mem_req_id_p; - mem_resp_data_q <= mem_resp_data_p; - mem_resp_opc_q <= mem_resp_opc_p; - mem_resp_valid_q <= mem_resp_valid_p; + mem_req_id_q <= mem_req_id_p; + mem_resp_data_q <= mem_resp_data_p; + mem_resp_opc_q <= mem_resp_opc_p; + mem_resp_valid_q <= mem_resp_valid_p; end end diff --git a/rtl/outstanding/hci_outstanding_sink.sv b/rtl/outstanding/hci_outstanding_sink.sv index b3766ad..ff7ebf8 100644 --- a/rtl/outstanding/hci_outstanding_sink.sv +++ b/rtl/outstanding/hci_outstanding_sink.sv @@ -1,6 +1,6 @@ /* - * hci_core_sink.sv - * Francesco Conti + * hci_outstanding_sink.sv + * Marco Bertuletti * * Copyright (C) 2014-2022 ETH Zurich, University of Bologna * Copyright and related rights are licensed under the Solderpad Hardware @@ -14,43 +14,39 @@ */ /** - * The **hci_core_sink** module is the high-level sink streamer - * performing a series of stores on a HCI-Core interface - * from an incoming HWPE-Stream data stream from a HWPE engine/datapath. + * The **hci_outstanding_sink** module is the high-level sink streamer + * performing a series of stores on a HCI-Outstanding interface from an + * incoming HWPE-Stream data stream from a HWPE engine/datapath. * The sink streamer is a composite module that makes use of many other * fundamental IPs. * * Fundamentally, a sink streamer acts as a specialized DMA engine acting * out a predefined pattern from an **hwpe_stream_addressgen_v3** to perform - * a burst of stores via a HCI-Core interface, consuming a HWPE-Stream data - * stream into the HCI-Core `data` field. - * The sink streamer is insensitive to memory latency. - * This is due to the nature of store streams, which are unidirectional - * (i.e. `addr` and `data` move in the same direction). + * a burst of stores via a HCI-Outstanding interface, consuming a HWPE-Stream + * data stream into the HCI-Outstanding `data` field. * - * Misaligned accesses are supported by widening the HCI-Core data width of 32 - * bits compared to the HWPE-Stream that gets consumed by the streamer. - * The stream is shifted according to the address alignment and invalid bytes - * are disabled by unsetting their `strb`. This feature can be deactivated by - * unsetting the `MISALIGNED_ACCESS` parameter; in this case, the sink will - * only work correctly if all data is aligned to a word boundary. + * Misaligned accesses are supported by widening the HCI-Outstanding data + * width of 32 bits compared to the HWPE-Stream that gets consumed by the + * streamer. The stream is shifted according to the address alignment and + * invalid bytes are disabled by unsetting their `strb`. This feature can be + * deactivated by unsetting the `MISALIGNED_ACCESS` parameter; in this case, + * the sink will only work correctly if all data is aligned to a word + * boundary. * * .. tabularcolumns:: |l|l|J| - * .. _hci_core_sink_params: - * .. table:: **hci_core_sink** design-time parameters. + * .. _hci_outstanding_sink_params: + * .. table:: **hci_outstanding_sink** design-time parameters. * * +---------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ * | **Name** | **Default** | **Description** | * +---------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ - * | *TCDM_FIFO_DEPTH* | 2 | If >0, the module produces a HWPE-MemDecoupled interface and includes a TCDM FIFO of this depth. | - * +---------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ * | *TRANS_CNT* | 16 | Number of bits supported in the transaction counter of the address generator, which will overflow at 2^ `TRANS_CNT`. | * +---------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ * | *MISALIGNED_ACCESS* | 1 | If set to 0, the sink will not support non-word-aligned HWPE-Mem accesses. | * +---------------------+-------------+------------------------------------------------------------------------------------------------------------------------+ * * .. tabularcolumns:: |l|l|J| - * .. _hci_core_sink_ctrl: + * .. _hci_outstanding_sink_ctrl: * .. table:: **hci_core_sink** input control signals. * * +-------------------+------------------------+----------------------------------------------------------------------------+ @@ -62,7 +58,7 @@ * +-------------------+------------------------+----------------------------------------------------------------------------+ * * .. tabularcolumns:: |l|l|J| - * .. _hci_core_sink_flags: + * .. _hci_outstanding_sink_flags: * .. table:: **hci_core_sink** output flags. * * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ @@ -291,4 +287,4 @@ module hci_outstanding_sink end assign address_cnt_d = address_cnt_q + 1; -endmodule // hci_core_sink +endmodule // hci_outstanding_source diff --git a/rtl/outstanding/hci_outstanding_source.sv b/rtl/outstanding/hci_outstanding_source.sv index eef2469..63ac89c 100644 --- a/rtl/outstanding/hci_outstanding_source.sv +++ b/rtl/outstanding/hci_outstanding_source.sv @@ -1,7 +1,5 @@ /* - * hci_core_source.sv - * Francesco Conti - * Arpan Suravi Prasad + * hci_outstanding_source.sv * Marco Bertuletti * * Copyright (C) 2014-2022 ETH Zurich, University of Bologna @@ -16,51 +14,46 @@ */ /** - * The **hci_core_source** module is the high-level source streamer - * performing a series of loads on a HCI-Core interface + * The **hci_outstanding_source** module is the high-level source streamer + * performing a series of loads on a HCI-Outstanding interface * and producing a HWPE-Stream data stream to feed a HWPE engine/datapath. * The source streamer is a composite module that makes use of many other * fundamental IPs. * * Fundamentally, a source streamer acts as a specialized DMA engine acting * out a predefined pattern from an **hwpe_stream_addressgen_v3** to perform - * a burst of loads via a HCI-Core interface, producing a HWPE-Stream - * data stream from the HCI-Core `r_data` field. - * By default, the HCI-Core streamer supports delayed accesses using a HCI-Core - * interface. + * a burst of loads via a HCI-Outstanding interface, producing a HWPE-Stream + * data stream from the HCI-Outstanding `r_data` field. + * By default, the HCI-Outstanding streamer supports delayed accesses using a + * HCI-Outstanding interface. * - * Misaligned accesses are supported by widening the HCI-Core data width of 32 - * bits compared to the HWPE-Stream that gets produced by the streamer. - * Unused bytes are simply ignored. This feature can be deactivated by unsetting - * the `MISALIGNED_ACCESS` parameter; in this case, the sink will + * Misaligned accesses are supported by widening the HCI-Outstanding data + * width of 32 bits compared to the HWPE-Stream that gets produced by the + * streamer. Unused bytes are simply ignored. This feature can be deactivated + * by unsetting the `MISALIGNED_ACCESS` parameter; in this case, the sink will * only work correctly if all data is aligned to a word boundary. * - * In principle, the source streamer is insensitive to latency. - * However, when configured to support misaligned memory accesses, the address FIFO - * depth sets the maximum supported latency. - * This parameter can be controlled by the `ADDR_MIS_DEPTH` parameter (default 8). - * * .. tabularcolumns:: |l|l|J| - * .. _hci_core_source_params: - * .. table:: **hci_core_source** design-time parameters. + * .. _hci_outstanding_source_params: + * .. table:: **hci_outstanding_source** design-time parameters. * - * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ - * | **Name** | **Default** | **Description** | - * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ - * | *LATCH_FIFO* | 0 | If 1, use latches instead of flip-flops (requires special constraints in synthesis). | - * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ - * | *TRANS_CNT* | 16 | Number of bits supported in the transaction counter of the address generator, which will overflow at 2^ `TRANS_CNT`. | - * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ - * | *ADDR_MIS_DEPTH* | 8 | Depth of the misaligned address FIFO. This **must** be equal to the max-latency between the HCI-Core `gnt` and `r_valid`.| - * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ - * | *MISALIGNED_ACCESS* | 1 | If set to 0, the source will not support non-word-aligned HCI-Core accesses. | - * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ - * | *PASSTHROUGH_FIFO* | 0 | If set to 1, the address FIFO will be capable of fall-through operation (i.e., skipping the FIFO latency entirely). | - * +---------------------+-------------+--------------------------------------------------------------------------------------------------------------------------+ + * +---------------------+-------------+---------------------------------------------------------------------------------------------------------------------------------+ + * | **Name** | **Default** | **Description** | + * +---------------------+-------------+---------------------------------------------------------------------------------------------------------------------------------+ + * | *LATCH_FIFO* | 0 | If 1, use latches instead of flip-flops (requires special constraints in synthesis). | + * +---------------------+-------------+---------------------------------------------------------------------------------------------------------------------------------+ + * | *TRANS_CNT* | 16 | Number of bits supported in the transaction counter of the address generator, which will overflow at 2^ `TRANS_CNT`. | + * +---------------------+-------------+---------------------------------------------------------------------------------------------------------------------------------+ + * | *ADDR_MIS_DEPTH* | 8 | Depth of the misaligned address FIFO. This **must** be equal to the max-latency between the HCI-Outstanding `gnt` and `r_valid`.| + * +---------------------+-------------+---------------------------------------------------------------------------------------------------------------------------------+ + * | *MISALIGNED_ACCESS* | 1 | If set to 0, the source will not support non-word-aligned HCI-Outstanding accesses. | + * +---------------------+-------------+---------------------------------------------------------------------------------------------------------------------------------+ + * | *PASSTHROUGH_FIFO* | 0 | If set to 1, the address FIFO will be capable of fall-through operation (i.e., skipping the FIFO latency entirely). | + * +---------------------+-------------+---------------------------------------------------------------------------------------------------------------------------------+ * * .. tabularcolumns:: |l|l|J| - * .. _hci_core_source_ctrl: - * .. table:: **hci_core_source** input control signals. + * .. _hci_outstanding_source_ctrl: + * .. table:: **hci_outstanding_source** input control signals. * * +-------------------+------------------------+----------------------------------------------------------------------------+ * | **Name** | **Type** | **Description** | @@ -71,8 +64,8 @@ * +-------------------+------------------------+----------------------------------------------------------------------------+ * * .. tabularcolumns:: |l|l|J| - * .. _hci_core_source_flags: - * .. table:: **hci_core_source** output flags. + * .. _hci_outstanding_source_flags: + * .. table:: **hci_outstanding_source** output flags. * * +--------------------+------------------------+-----------------------------------------------------------------------------------------------+ * | **Name** | **Type** | **Description** | @@ -348,4 +341,4 @@ module hci_outstanding_source end assign stream_cnt_d = stream_cnt_q + 1; -endmodule // hci_core_source +endmodule // hci_outstanding_source From f29880df75caee4400878749c9db65d348d016a2 Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Thu, 4 Sep 2025 09:38:54 +0200 Subject: [PATCH 5/7] Add assertions --- rtl/common/hci_helpers.svh | 11 +++++++++++ rtl/outstanding/hci_outstanding_rob.sv | 23 +++++++++++++++++++++-- rtl/outstanding/hci_outstanding_sink.sv | 20 ++++++++++++++++++++ rtl/outstanding/hci_outstanding_source.sv | 20 ++++++++++++++++++++ 4 files changed, 72 insertions(+), 2 deletions(-) diff --git a/rtl/common/hci_helpers.svh b/rtl/common/hci_helpers.svh index b94978a..4560a22 100644 --- a/rtl/common/hci_helpers.svh +++ b/rtl/common/hci_helpers.svh @@ -245,6 +245,17 @@ // Asserts (specialized definition for conventional param names `define HCI_SIZE_CHECK_ASSERTS(__intf) `HCI_SIZE_CHECK_ASSERTS_EXPLICIT_PARAM(`HCI_SIZE_PARAM(__intf), __intf) + // Asserts (generic definition usable with any parameter name) + `define HCI_OUTSTANDING_SIZE_CHECK_ASSERTS_EXPLICIT_PARAM(__xparam, __xintf) \ + initial __xparam``_intf_size_check_dw : assert(__xparam.DW == `HCI_SIZE_GET_DW_CHECK(__xintf)); \ + initial __xparam``_intf_size_check_bw : assert(__xparam.BW == `HCI_SIZE_GET_BW_CHECK(__xintf)); \ + initial __xparam``_intf_size_check_aw : assert(__xparam.AW == `HCI_SIZE_GET_AW_CHECK(__xintf)); \ + initial __xparam``_intf_size_check_uw : assert(__xparam.UW == `HCI_SIZE_GET_UW_CHECK(__xintf)); \ + initial __xparam``_intf_size_check_iw : assert(__xparam.IW == `HCI_SIZE_GET_IW_CHECK(__xintf)); + + // Asserts (specialized definition for conventional param names + `define HCI_OUTSTANDING_SIZE_CHECK_ASSERTS(__intf) `HCI_OUTSTANDING_SIZE_CHECK_ASSERTS_EXPLICIT_PARAM(`HCI_SIZE_PARAM(__intf), __intf) + `endif `endif /* `ifndef __HCI_HELPERS__ */ diff --git a/rtl/outstanding/hci_outstanding_rob.sv b/rtl/outstanding/hci_outstanding_rob.sv index 5c18ab7..0066e33 100644 --- a/rtl/outstanding/hci_outstanding_rob.sv +++ b/rtl/outstanding/hci_outstanding_rob.sv @@ -197,10 +197,29 @@ module hci_outstanding_rob * Assertions * ****************/ +`ifndef SYNTHESIS +`ifndef VERILATOR +`ifndef VCS + if (ROB_NW == 0) $error("ROB_NW cannot be 0."); - if (UW < ROB_IW) - $error("UW must contain the ROB ID. UW = %0d, ROB_IW = %0d", UW, ROB_IW); + // Interface size asserts + initial + dw : assert(in.DW == out.DW); + initial + bw : assert(in.BW == out.BW); + initial + aw : assert(in.AW == out.AW); + initial + uw : assert(in.UW == out.UW); + initial + iw_out : assert(out.UW >= $clog2(ROB_NW)); + + `HCI_OUTSTANDING_SIZE_CHECK_ASSERTS(out); + +`endif +`endif +`endif; endmodule: hci_outstanding_rob diff --git a/rtl/outstanding/hci_outstanding_sink.sv b/rtl/outstanding/hci_outstanding_sink.sv index ff7ebf8..39964f1 100644 --- a/rtl/outstanding/hci_outstanding_sink.sv +++ b/rtl/outstanding/hci_outstanding_sink.sv @@ -287,4 +287,24 @@ module hci_outstanding_sink end assign address_cnt_d = address_cnt_q + 1; +/* + * Interface size asserts + */ +`ifndef SYNTHESIS +`ifndef VERILATOR +`ifndef VCS + if(MISALIGNED_ACCESSES == 0) begin + initial + dw : assert(stream.DATA_WIDTH == tcdm.DW); + end + else begin + initial + dw : assert(stream.DATA_WIDTH+32 == tcdm.DW); + end + + `HCI_OUTSTANDING_SIZE_CHECK_ASSERTS(tcdm); +`endif +`endif +`endif + endmodule // hci_outstanding_source diff --git a/rtl/outstanding/hci_outstanding_source.sv b/rtl/outstanding/hci_outstanding_source.sv index 63ac89c..b856f3f 100644 --- a/rtl/outstanding/hci_outstanding_source.sv +++ b/rtl/outstanding/hci_outstanding_source.sv @@ -341,4 +341,24 @@ module hci_outstanding_source end assign stream_cnt_d = stream_cnt_q + 1; +/* + * Interface size asserts + */ +`ifndef SYNTHESIS +`ifndef VERILATOR +`ifndef VCS + if(MISALIGNED_ACCESSES == 0) begin + initial + dw : assert(stream.DATA_WIDTH == tcdm.DW); + end + else begin + initial + dw : assert(stream.DATA_WIDTH <= tcdm.DW); + end + + `HCI_OUTSTANDING_SIZE_CHECK_ASSERTS(tcdm); +`endif +`endif +`endif + endmodule // hci_outstanding_source From 0582e78d19da6c11f25e4f344faedfad922c152a Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Fri, 12 Sep 2025 10:53:31 +0200 Subject: [PATCH 6/7] Fix parametrization of response FIFO in HCI oustanding source --- rtl/outstanding/hci_outstanding_source.sv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rtl/outstanding/hci_outstanding_source.sv b/rtl/outstanding/hci_outstanding_source.sv index b856f3f..28d8e24 100644 --- a/rtl/outstanding/hci_outstanding_source.sv +++ b/rtl/outstanding/hci_outstanding_source.sv @@ -88,7 +88,7 @@ module hci_outstanding_source // Stream interface params parameter int unsigned LATCH_FIFO = 0, parameter int unsigned TRANS_CNT = 16, - parameter int unsigned ADDR_MIS_DEPTH = 8, // Beware: this must be >= the maximum latency between TCDM gnt and TCDM r_valid!!! + parameter int unsigned ADDR_MIS_DEPTH = 8, parameter int unsigned MISALIGNED_ACCESSES = 1, parameter int unsigned PASSTHROUGH_FIFO = 0, parameter hci_size_parameter_t `HCI_SIZE_PARAM(tcdm) = '0 @@ -262,8 +262,8 @@ module hci_outstanding_source assign tcdm.resp_ready = resp_push.ready; hwpe_stream_fifo #( - .DATA_WIDTH ( DATA_WIDTH ), - .FIFO_DEPTH ( 8 ) + .DATA_WIDTH ( DATA_WIDTH ), + .FIFO_DEPTH ( ADDR_MIS_DEPTH ) ) i_resp_fifo ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), From 86c7319ef4601c1586e44ce27f6d58e3e1466525 Mon Sep 17 00:00:00 2001 From: mbertuletti Date: Tue, 14 Oct 2025 17:12:59 +0200 Subject: [PATCH 7/7] Add outstanding FIFO --- Bender.yml | 1 + rtl/outstanding/hci_outstanding_fifo.sv | 274 ++++++++++++++++++++++++ 2 files changed, 275 insertions(+) create mode 100644 rtl/outstanding/hci_outstanding_fifo.sv diff --git a/Bender.yml b/Bender.yml index 5da7ad2..823b290 100644 --- a/Bender.yml +++ b/Bender.yml @@ -50,6 +50,7 @@ sources: - rtl/outstanding/hci_outstanding_mux.sv - rtl/outstanding/hci_outstanding_rob.sv - rtl/outstanding/hci_outstanding_source.sv + - rtl/outstanding/hci_outstanding_fifo.sv - rtl/interco/hci_log_interconnect.sv - rtl/interco/hci_log_interconnect_l2.sv - rtl/interco/hci_new_log_interconnect.sv # `new_XBAR_TCDM` dep. is a private repo diff --git a/rtl/outstanding/hci_outstanding_fifo.sv b/rtl/outstanding/hci_outstanding_fifo.sv new file mode 100644 index 0000000..4c42ae3 --- /dev/null +++ b/rtl/outstanding/hci_outstanding_fifo.sv @@ -0,0 +1,274 @@ +/* + * hci_outstanding_fifo.sv + * Francesco Conti + * + * Copyright (C) 2014-2020 ETH Zurich, University of Bologna + * Copyright and related rights are licensed under the Solderpad Hardware + * License, Version 0.51 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law + * or agreed to in writing, software, hardware and materials distributed under + * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + * CONDITIONS OF ANY KIND, either express or implied. See the License for the + * specific language governing permissions and limitations under the License. + */ + +/** + * The **hci_outstanding_fifo** module implements a hardware FIFO queue for + * HCI-outstanding interfaces. + * + * Mapping of HCI-outstanding and HWPE-Stream signals inside the load FIFO. + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_outstanding_fifo_params: + * .. table:: **hci_outstanding_fifo** design-time parameters. + * + * +------------------------+--------------+--------------------------------------------------------------------------------------+ + * | **Name** | **Default** | **Description** | + * +------------------------+--------------+--------------------------------------------------------------------------------------+ + * | *FIFO_DEPTH* | 8 | Depth of the FIFO queue (multiple of 2). | + * +------------------------+--------------+--------------------------------------------------------------------------------------+ + * | *LATCH_FIFO* | 0 | If 1, use latches instead of flip-flops (requires special constraints in synthesis). | + * +------------------------+--------------+--------------------------------------------------------------------------------------+ + * + * .. tabularcolumns:: |l|l|J| + * .. _hci_outstanding_fifo_flags: + * .. table:: **hci_outstanding_fifo** output flags. + * + * +----------------+--------------+-----------------------------------+ + * | **Name** | **Type** | **Description** | + * +----------------+--------------+-----------------------------------+ + * | *empty* | `logic` | 1 if the FIFO is currently empty. | + * +----------------+--------------+-----------------------------------+ + * | *full* | `logic` | 1 if the FIFO is currently full. | + * +----------------+--------------+-----------------------------------+ + * | *push_pointer* | `logic[7:0]` | Unused. | + * +----------------+--------------+-----------------------------------+ + * | *pop_pointer* | `logic[7:0]` | Unused. | + * +----------------+--------------+-----------------------------------+ + * + */ + +`include "hci_helpers.svh" + +module hci_outstanding_fifo + import hwpe_stream_package::*; + import hci_package::*; +#( + parameter int unsigned FIFO_DEPTH = 8, + parameter int unsigned LATCH_FIFO = 0, + parameter hci_size_parameter_t `HCI_SIZE_PARAM(tcdm_initiator) = '0 +) +( + input logic clk_i, + input logic rst_ni, + input logic clear_i, + + output flags_fifo_t flags_o, + + hci_outstanding_intf.target tcdm_target, + hci_outstanding_intf.initiator tcdm_initiator +); + + localparam int unsigned DW = `HCI_SIZE_GET_DW(tcdm_initiator); + localparam int unsigned BW = `HCI_SIZE_GET_BW(tcdm_initiator); + localparam int unsigned AW = `HCI_SIZE_GET_AW(tcdm_initiator); + localparam int unsigned UW = `HCI_SIZE_GET_UW(tcdm_initiator); + localparam int unsigned IW = `HCI_SIZE_GET_IW(tcdm_initiator); + + flags_fifo_t flags_incoming, flags_outgoing; + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( AW+UW+IW+DW+DW/BW ) +`ifndef SYNTHESIS + , + .BYPASS_VCR_ASSERT ( 1'b1 ), + .BYPASS_VDR_ASSERT ( 1'b1 ) +`endif + ) stream_outgoing_push ( + .clk ( clk_i ) + ); + hwpe_stream_intf_stream #( + .DATA_WIDTH ( AW+UW+IW+DW+DW/BW ) +`ifndef SYNTHESIS + , + .BYPASS_VCR_ASSERT ( 1'b1 ), + .BYPASS_VDR_ASSERT ( 1'b1 ) +`endif + ) stream_outgoing_pop ( + .clk ( clk_i ) + ); + + hwpe_stream_intf_stream #( + .DATA_WIDTH ( UW+IW+DW ) +`ifndef SYNTHESIS + , + .BYPASS_VCR_ASSERT ( 1'b1 ), + .BYPASS_VDR_ASSERT ( 1'b1 ) +`endif + ) stream_incoming_push ( + .clk ( clk_i ) + ); + hwpe_stream_intf_stream #( + .DATA_WIDTH ( UW+IW+DW ) +`ifndef SYNTHESIS + , + .BYPASS_VCR_ASSERT ( 1'b1 ), + .BYPASS_VDR_ASSERT ( 1'b1 ) +`endif + ) stream_incoming_pop ( + .clk ( clk_i ) + ); + + /*******************************************/ + /** target.resp* <- stream_incoming_pop.* **/ + /*******************************************/ + + assign tcdm_target.resp_data = stream_incoming_pop.data[DW-1:0]; + if (UW > 0) begin + assign tcdm_target.resp_user = stream_incoming_pop.data[UW+DW-1:DW]; + end else begin + assign tcdm_target.resp_user = '0; + end + if (IW > 0) begin + assign tcdm_target.resp_id = stream_incoming_pop.data[UW+DW+IW-1:UW+DW]; + end else begin + assign tcdm_target.resp_id = '0; + end + assign tcdm_target.resp_opc = '0; // ignore r_opc in FIFO + assign tcdm_target.resp_valid = stream_incoming_pop.valid; + assign stream_incoming_pop.ready = tcdm_target.resp_ready; + + /****************************************************/ + /** stream_incoming_push.* <- tcdm_initiator.resp* **/ + /****************************************************/ + + logic tcdm_initiator_r_valid_d, tcdm_initiator_r_valid_q; + logic [UW+IW+DW-1:0] tcdm_initiator_r_data_d, tcdm_initiator_r_data_q; + + if (UW > 0 && IW > 0) + assign stream_incoming_push.data = { tcdm_initiator.resp_id, tcdm_initiator.resp_user, tcdm_initiator.resp_data }; + else if (UW > 0 && IW == 0) + assign stream_incoming_push.data = { tcdm_initiator.resp_user, tcdm_initiator.resp_data }; + else if (UW == 0 && IW > 0) + assign stream_incoming_push.data = { tcdm_initiator.resp_id, tcdm_initiator.resp_data }; + else // UW==IW==0 + assign stream_incoming_push.data = tcdm_initiator.resp_data; + + assign stream_incoming_push.strb = '1; + assign stream_incoming_push.valid = tcdm_initiator.resp_valid; + assign tcdm_initiator.resp_ready = stream_incoming_push.ready; + + + hwpe_stream_fifo #( + .DATA_WIDTH ( UW+IW+DW ), + .FIFO_DEPTH ( FIFO_DEPTH ), + .LATCH_FIFO ( LATCH_FIFO ) + ) i_fifo_incoming ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( flags_incoming ), + .push_i ( stream_incoming_push.sink ), + .pop_o ( stream_incoming_pop.source ) + ); + + /************************************************/ + /** stream_outgoing_push.* <- tcdm_target.req* **/ + /************************************************/ + + // wrap tcdm outgoing ports into a stream + if (UW > 0 && IW > 0) + assign stream_outgoing_push.data = { tcdm_target.req_add, tcdm_target.req_id, tcdm_target.req_user, tcdm_target.req_data, tcdm_target.req_be, tcdm_target.req_wen }; + else if (UW > 0 && IW == 0) + assign stream_outgoing_push.data = { tcdm_target.req_add, tcdm_target.req_user, tcdm_target.req_data, tcdm_target.req_be, tcdm_target.req_wen }; + else if (UW == 0 && IW > 0) + assign stream_outgoing_push.data = { tcdm_target.req_add, tcdm_target.req_id, tcdm_target.req_data, tcdm_target.req_be, tcdm_target.req_wen }; + else // UW==IW==0 + assign stream_outgoing_push.data = { tcdm_target.req_add, tcdm_target.req_data, tcdm_target.req_be, tcdm_target.req_wen }; + + assign stream_outgoing_push.strb = '1; + assign stream_outgoing_push.valid = tcdm_target.req_valid; + assign tcdm_target.req_ready = stream_outgoing_push.ready; + + /**************************************************/ + /** tcdm_initiator.req* <- stream_outgoing_pop.* **/ + /**************************************************/ + + logic [AW+UW+IW+DW+DW/BW-1:0] stream_outgoing_pop_data; + logic [AW-1:0] tcdm_initiator_add; + logic [DW-1:0] tcdm_initiator_data; + logic [hci_package::iomsb(UW):0] tcdm_initiator_user; + logic [hci_package::iomsb(IW):0] tcdm_initiator_id; + logic [DW/BW-1:0] tcdm_initiator_be; + logic tcdm_initiator_wen; + + assign stream_outgoing_pop_data = stream_outgoing_pop.data; + + if (UW > 0 && IW > 0) begin + assign { tcdm_initiator_add, tcdm_initiator_id, tcdm_initiator_user, tcdm_initiator_data, tcdm_initiator_be, tcdm_initiator_wen } = stream_outgoing_pop_data; + end + else if (UW > 0 && IW == 0) begin + assign { tcdm_initiator_add, tcdm_initiator_user, tcdm_initiator_data, tcdm_initiator_be, tcdm_initiator_wen } = stream_outgoing_pop_data; + assign tcdm_initiator_id = '0; + end + else if (UW == 0 && IW > 0) begin + assign { tcdm_initiator_add, tcdm_initiator_id, tcdm_initiator_data, tcdm_initiator_be, tcdm_initiator_wen } = stream_outgoing_pop_data; + assign tcdm_initiator_user = '0; + end + else begin // UW==IW==0 + assign { tcdm_initiator_add, tcdm_initiator_data, tcdm_initiator_be, tcdm_initiator_wen } = stream_outgoing_pop_data; + assign tcdm_initiator_id = '0; + assign tcdm_initiator_user = '0; + end + + assign tcdm_initiator.req_add = tcdm_initiator_add; + assign tcdm_initiator.req_data = tcdm_initiator_data; + assign tcdm_initiator.req_user = tcdm_initiator_user; + assign tcdm_initiator.req_id = tcdm_initiator_id; + assign tcdm_initiator.req_be = tcdm_initiator_be; + assign tcdm_initiator.req_wen = tcdm_initiator_wen; + + assign tcdm_initiator.req_valid = stream_outgoing_pop.valid; + assign stream_outgoing_pop.ready = tcdm_initiator.req_ready; + + hwpe_stream_fifo #( + .DATA_WIDTH ( AW+UW+IW+DW+DW/BW ), + .FIFO_DEPTH ( FIFO_DEPTH ), + .LATCH_FIFO ( LATCH_FIFO ) + ) i_fifo_outgoing ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clear_i ( clear_i ), + .flags_o ( flags_outgoing ), + .push_i ( stream_outgoing_push.sink ), + .pop_o ( stream_outgoing_pop.source ) + ); + + assign flags_o.empty = flags_incoming.empty & flags_outgoing.empty; + assign flags_o.full = flags_incoming.full | flags_outgoing.full; + +/* + * Interface size asserts + */ +`ifndef SYNTHESIS +`ifndef VERILATOR +`ifndef VCS + initial + dw : assert(tcdm_target.DW == tcdm_initiator.DW); + initial + bw : assert(tcdm_target.BW == tcdm_initiator.BW); + initial + aw : assert(tcdm_target.AW == tcdm_initiator.AW); + initial + uw : assert(tcdm_target.UW == tcdm_initiator.UW); + initial begin : depth_check + if (FIFO_DEPTH % 2 != 0) begin + $error("hci_outstanding_fifo FIFO_DEPTH must be a multiple of 2!"); + end + end +`endif +`endif +`endif + +endmodule : hci_outstanding_fifo