From 5e2d0dcafe3c5f82f14456b7d5490d952395f135 Mon Sep 17 00:00:00 2001 From: andreaskuster Date: Fri, 8 Jan 2021 16:26:37 +0100 Subject: [PATCH 01/27] Add minimal bug example. --- bug_min.json | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 bug_min.json diff --git a/bug_min.json b/bug_min.json new file mode 100644 index 0000000..1c8278a --- /dev/null +++ b/bug_min.json @@ -0,0 +1,174 @@ +{ + "inputs": { + "crlato": { + "data": "crlato_128_float32.dat", + "data_type": "float32", + "input_dims": [ + "i" + ] + } + }, + "outputs": [ + "out" + ], + "dimensions": [ + 128, + 80, + 128 + ], + "vectorization": 1, + "program": { + "v_tmp": { + "data_type": "float32", + "computation_string": "\nv_tmp = crlato[i]\n", + "boundary_conditions": { + "crlato": { + "btype": "shrink", + "halo": [ + "halo-2", + "halo-1", + 0, + 0, + "halo-1", + "halo-2" + ] + } + } + }, + "u_tmp": { + "data_type": "float32", + "computation_string": "\nu_tmp = crlato[i]\n", + "boundary_conditions": { + "crlato": { + "btype": "shrink", + "halo": [ + "halo-1", + "halo-2", + 0, + 0, + "halo-2", + "halo-1" + ] + } + } + }, + "__tmp_T": { + "data_type": "float32", + "computation_string": "\n__tmp_T = u_tmp[(i, j, k)] + v_tmp[((i + 1), j, k)] + v_tmp[(i, j, k)]\n", + "boundary_conditions": { + "u_tmp": { + "btype": "shrink", + "halo": [ + "halo-1", + "halo-1", + 0, + 0, + "halo-1", + "halo-1" + ] + }, + "v_tmp": { + "btype": "shrink", + "halo": [ + "halo-1", + "halo-1", + 0, + 0, + "halo-1", + "halo-1" + ] + } + } + }, + "__tmp_S": { + "data_type": "float32", + "computation_string": "\n__tmp_S = v_tmp[(i, j, k)] + u_tmp[((i + 1), j, k)] + u_tmp[(i, j, k)]\n", + "boundary_conditions": { + "v_tmp": { + "btype": "shrink", + "halo": [ + "halo-1", + "halo-1", + 0, + 0, + "halo-1", + "halo-1" + ] + }, + "u_tmp": { + "btype": "shrink", + "halo": [ + "halo-1", + "halo-1", + 0, + 0, + "halo-1", + "halo-1" + ] + } + } + }, + "out": { + "data_type": "float32", + "computation_string": "\nout = __tmp_S[(i,j,k)] + __tmp_T[(i,j,k)]\n", + "boundary_conditions": { + "__tmp_S":{ + "btype": "shrink", + "halo": [ + "halo", + "halo", + 0, + 0, + "halo", + "halo" + ] + }, + "__tmp_T": { + "btype": "shrink", + "halo": [ + "halo", + "halo", + 0, + 0, + "halo", + "halo" + ] + } + } + } + }, + "constants": { + "eddlat": { + "value": "5729.58", + "data_type": "float32" + }, + "eddlon": { + "value": "5729.58", + "data_type": "float32" + }, + "tau_smag": { + "value": "0.3", + "data_type": "float32" + }, + "weight_smag": { + "value": "0.5", + "data_type": "float32" + }, + "I": { + "value": "128", + "data_type": "int32" + }, + "J": { + "value": "128", + "data_type": "int32" + }, + "K": { + "value": "80", + "data_type": "int32" + }, + "halo": { + "value": "1", + "data_type": "int32" + } + } +} From f4763fc83c5fbc071e7e854143e6588445fc7233 Mon Sep 17 00:00:00 2001 From: andreaskuster Date: Fri, 8 Jan 2021 19:58:40 +0100 Subject: [PATCH 02/27] Further reduce minimal example. --- bug_min.json | 170 ++++++++++++--------------------------------------- 1 file changed, 40 insertions(+), 130 deletions(-) diff --git a/bug_min.json b/bug_min.json index 1c8278a..981e180 100644 --- a/bug_min.json +++ b/bug_min.json @@ -1,7 +1,7 @@ { "inputs": { - "crlato": { - "data": "crlato_128_float32.dat", + "inA": { + "data": "inA_float32.dat", "data_type": "float32", "input_dims": [ "i" @@ -12,163 +12,73 @@ "out" ], "dimensions": [ - 128, - 80, - 128 + 8, + 8, + 8 ], "vectorization": 1, "program": { - "v_tmp": { + "k0": { "data_type": "float32", - "computation_string": "\nv_tmp = crlato[i]\n", + "computation_string": "k0 = inA[i]", "boundary_conditions": { - "crlato": { - "btype": "shrink", - "halo": [ - "halo-2", - "halo-1", - 0, - 0, - "halo-1", - "halo-2" - ] - } + "inA": { + "type": "constant", + "value": 0.0 + } } }, - "u_tmp": { + "k1": { "data_type": "float32", - "computation_string": "\nu_tmp = crlato[i]\n", + "computation_string": "k1 = inA[i]", "boundary_conditions": { - "crlato": { - "btype": "shrink", - "halo": [ - "halo-1", - "halo-2", - 0, - 0, - "halo-2", - "halo-1" - ] - } + "inA": { + "type": "constant", + "value": 0.0 + } } }, - "__tmp_T": { + "k2": { "data_type": "float32", - "computation_string": "\n__tmp_T = u_tmp[(i, j, k)] + v_tmp[((i + 1), j, k)] + v_tmp[(i, j, k)]\n", + "computation_string": "k2 = k1[i, j, k] + k0[i + 1, j, k] + k0[i, j, k]", "boundary_conditions": { - "u_tmp": { - "btype": "shrink", - "halo": [ - "halo-1", - "halo-1", - 0, - 0, - "halo-1", - "halo-1" - ] + "k1": { + "type": "constant", + "value": 0.0 }, - "v_tmp": { - "btype": "shrink", - "halo": [ - "halo-1", - "halo-1", - 0, - 0, - "halo-1", - "halo-1" - ] + "k0": { + "type": "constant", + "value": 0.0 } } }, - "__tmp_S": { + "k3": { "data_type": "float32", - "computation_string": "\n__tmp_S = v_tmp[(i, j, k)] + u_tmp[((i + 1), j, k)] + u_tmp[(i, j, k)]\n", + "computation_string": "k3 = k0[i, j, k] + k1[i + 1, j, k] + k1[i, j, k]", "boundary_conditions": { - "v_tmp": { - "btype": "shrink", - "halo": [ - "halo-1", - "halo-1", - 0, - 0, - "halo-1", - "halo-1" - ] + "k0": { + "type": "constant", + "value": 0.0 }, - "u_tmp": { - "btype": "shrink", - "halo": [ - "halo-1", - "halo-1", - 0, - 0, - "halo-1", - "halo-1" - ] + "k1": { + "type": "constant", + "value": 0.0 } } }, "out": { "data_type": "float32", - "computation_string": "\nout = __tmp_S[(i,j,k)] + __tmp_T[(i,j,k)]\n", + "computation_string": "out = k2[i,j,k] + k3[i,j,k]", "boundary_conditions": { - "__tmp_S":{ - "btype": "shrink", - "halo": [ - "halo", - "halo", - 0, - 0, - "halo", - "halo" - ] + "k2":{ + "type": "constant", + "value": 0.0 }, - "__tmp_T": { - "btype": "shrink", - "halo": [ - "halo", - "halo", - 0, - 0, - "halo", - "halo" - ] + "k3": { + "type": "constant", + "value": 0.0 } } } - }, - "constants": { - "eddlat": { - "value": "5729.58", - "data_type": "float32" - }, - "eddlon": { - "value": "5729.58", - "data_type": "float32" - }, - "tau_smag": { - "value": "0.3", - "data_type": "float32" - }, - "weight_smag": { - "value": "0.5", - "data_type": "float32" - }, - "I": { - "value": "128", - "data_type": "int32" - }, - "J": { - "value": "128", - "data_type": "int32" - }, - "K": { - "value": "80", - "data_type": "int32" - }, - "halo": { - "value": "1", - "data_type": "int32" - } } } From 89f87c3a9186f6c1e1e41302fbaaa59c209f069f Mon Sep 17 00:00:00 2001 From: andreaskuster Date: Fri, 8 Jan 2021 19:59:07 +0100 Subject: [PATCH 03/27] Account for offset to center. --- stencilflow/kernel_chain_graph.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/stencilflow/kernel_chain_graph.py b/stencilflow/kernel_chain_graph.py index ff54023..301827f 100644 --- a/stencilflow/kernel_chain_graph.py +++ b/stencilflow/kernel_chain_graph.py @@ -507,6 +507,11 @@ def compute_delay_buffer(self) -> None: dimensions=self.dimensions, index=stencilflow.list_subtract_cwise( max_delay[:-1], entry[:-1])) + + if not isinstance(node, Output): + max_offset = node.dist_to_center[max(node.dist_to_center, key=lambda x: node.dist_to_center[x])] + max_size = max_offset - node.dist_to_center[entry[-1]] + node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) node.delay_buffer[name].import_data( From e1caeb135ddb5b425fe243e11569638e40799014 Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Sat, 9 Jan 2021 15:18:23 +0100 Subject: [PATCH 04/27] Add fpga0 sdk env vars script --- vars.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 vars.sh diff --git a/vars.sh b/vars.sh new file mode 100644 index 0000000..80989aa --- /dev/null +++ b/vars.sh @@ -0,0 +1,16 @@ +# intel fpga +export INTELFPGAOCLSDKROOT=/opt/intelFPGA_pro/19.1/hld +export PATH=$INTELFPGAOCLSDKROOT/bin/:$PATH +export AOCL_BOARD_PACKAGE_ROOT=$INTELFPGAOCLSDKROOT/board/bittware_pcie/s10 +# /opt/intelFPGA_pro/19.4/hld/board/bittware_pcie/s10/board_env.xml +# /opt/intelFPGA_pro/19.4/hld/board/bittware_pcie/s10_hpc_default/board_env.xml +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$AOCL_BOARD_PACKAGE_ROOT/linux64/lib + +# xilinx fpga +export PATH=/opt/Xilinx/Vitis/2019.2/bin:/opt/Xilinx/Vitis_HLS/2019.2/bin:/opt/Xilinx/Vivado/2019.2/bin:$PATH +export XILINX_XRT=/opt/xilinx/xrt +export PATH=$XILINX_XRT/bin:$PATH +export LD_LIBRARY_PATH=$XILINX_XRT/lib:$LD_LIBRARY_PATH +export XILINXD_LICENSE_FILE=2100@sgv-license-01 +export LIBRARY_PATH=/usr/lib/x86_64-linux-gnu + From 34389ae15307458bb0057a6f1fdb6a3aef30265b Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Sat, 9 Jan 2021 16:27:03 +0100 Subject: [PATCH 05/27] Add larger jacobi3d example --- test/stencils/jacobi3d_512x512x512.json | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 test/stencils/jacobi3d_512x512x512.json diff --git a/test/stencils/jacobi3d_512x512x512.json b/test/stencils/jacobi3d_512x512x512.json new file mode 100644 index 0000000..82db32d --- /dev/null +++ b/test/stencils/jacobi3d_512x512x512.json @@ -0,0 +1,24 @@ +{ + "inputs": { + "a": { + "data": "data/zeros_32x32x32_fp32.dat", + "data_type": "float32" + } + }, + "outputs": ["b"], + "dimensions": [512, 512, 512], + "program": { + "b": { + "computation_string": + "b = 0.16666666 * (a[i-1,j,k] + a[i+1,j,k] + a[i,j-1,k] + a[i,j+1,k] + a[i,j,k-1] + a[i,j,k+1])", + "boundary_conditions": { + "a": { + "type": "constant", + "value": 1.0 + } + }, + "data_type": + "float32" + } + } +} From b1bac07114135dd328e158c417f10fe11ab9885f Mon Sep 17 00:00:00 2001 From: andreaskuster Date: Sat, 9 Jan 2021 17:54:42 +0100 Subject: [PATCH 06/27] Add temporary fix. --- stencilflow/kernel_chain_graph.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/stencilflow/kernel_chain_graph.py b/stencilflow/kernel_chain_graph.py index 301827f..2763b65 100644 --- a/stencilflow/kernel_chain_graph.py +++ b/stencilflow/kernel_chain_graph.py @@ -85,6 +85,29 @@ def __init__(self, if self.log_level >= LogLevel.MODERATE: print("Compute delay buffer sizes.") self.compute_delay_buffer() # compute the delay buffer sizes + + for node in self.graph.nodes(): + if node.name == "__tmp_T" or node.name == "__tmp_T_sqr_s_1351": + name = "u_tmp" + max_size = self.dimensions[0]*self.dimensions[1] + node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) + node.delay_buffer[name].import_data([None] * node.delay_buffer[name].maxsize) + if node.name == "__tmp_S" or node.name == "__tmp_S_sqr_uv_1352": + name = "v_tmp" + max_size = self.dimensions[0] * self.dimensions[1] + node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) + node.delay_buffer[name].import_data([None] * node.delay_buffer[name].maxsize) + if node.name == "__tmp_T_sqr_s_1351": + name = "ms_sdfg_1330___local_frac_1_dx_1660" + max_size = self.dimensions[0]*self.dimensions[1] + node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) + node.delay_buffer[name].import_data([None] * node.delay_buffer[name].maxsize) + if node.name == "__tmp_S_sqr_uv_1352": + name = "ms_sdfg_1330___local_frac_1_dx_1660" + max_size = self.dimensions[0] * self.dimensions[1] + node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) + node.delay_buffer[name].import_data([None] * node.delay_buffer[name].maxsize) + if self.log_level >= LogLevel.MODERATE: print("Add channels to the graph edges.") # plot kernel graphs if flag set to true From e83e4e3d76c0048ddd8c16511e41cd8dc3e14ad1 Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Sat, 9 Jan 2021 19:19:57 +0100 Subject: [PATCH 07/27] Increase problem size. --- bug_min.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bug_min.json b/bug_min.json index 981e180..47815e5 100644 --- a/bug_min.json +++ b/bug_min.json @@ -12,9 +12,9 @@ "out" ], "dimensions": [ - 8, - 8, - 8 + 256, + 256, + 256 ], "vectorization": 1, "program": { From 9da97a3d9c30fbcd02996e0adfa90f3031d62835 Mon Sep 17 00:00:00 2001 From: andreaskuster Date: Mon, 11 Jan 2021 00:18:47 +0100 Subject: [PATCH 08/27] Add more complex example. --- bug_min_ext.json | 94 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 bug_min_ext.json diff --git a/bug_min_ext.json b/bug_min_ext.json new file mode 100644 index 0000000..adc7d4f --- /dev/null +++ b/bug_min_ext.json @@ -0,0 +1,94 @@ +{ + "inputs": { + "inA": { + "data": "inA_float32.dat", + "data_type": "float32", + "input_dims": [ + "i" + ] + } + }, + "outputs": [ + "out" + ], + "dimensions": [ + 8, + 8, + 8 + ], + "vectorization": 1, + "program": { + "k0": { + "data_type": "float32", + "computation_string": "k0 = inA[i]", + "boundary_conditions": { + "inA": { + "type": "constant", + "value": 0.0 + } + } + }, + "k1": { + "data_type": "float32", + "computation_string": "k1 = inA[i]", + "boundary_conditions": { + "inA": { + "type": "constant", + "value": 0.0 + } + } + }, + "k2": { + "data_type": "float32", + "computation_string": "k2 = k1[i, j, k] + k0[i + 1, j, k] + k0[i, j, k]", + "boundary_conditions": { + "k1": { + "type": "constant", + "value": 0.0 + }, + "k0": { + "type": "constant", + "value": 0.0 + } + } + }, + "k3": { + "data_type": "float32", + "computation_string": "k3 = k0[i, j, k] + k4[i + 1, j, k] + k4[i, j, k]", + "boundary_conditions": { + "k0": { + "type": "constant", + "value": 0.0 + }, + "k4": { + "type": "constant", + "value": 0.0 + } + } + }, + "k4": { + "data_type": "float32", + "computation_string": "k4 = k1[i, j, k] + k1[i+1, j, k]", + "boundary_conditions": { + "k1": { + "type": "constant", + "value": 0.0 + } + } + }, + "out": { + "data_type": "float32", + "computation_string": "out = k2[i,j,k] + k3[i,j,k]", + "boundary_conditions": { + "k2":{ + "type": "constant", + "value": 0.0 + }, + "k3": { + "type": "constant", + "value": 0.0 + } + } + } + } +} From 4000eca386f2e0beb51b4d30d57aef5f7b748f1a Mon Sep 17 00:00:00 2001 From: andreaskuster Date: Fri, 8 Jan 2021 16:26:37 +0100 Subject: [PATCH 09/27] Add minimal bug example. --- bug_min.json | 174 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 bug_min.json diff --git a/bug_min.json b/bug_min.json new file mode 100644 index 0000000..1c8278a --- /dev/null +++ b/bug_min.json @@ -0,0 +1,174 @@ +{ + "inputs": { + "crlato": { + "data": "crlato_128_float32.dat", + "data_type": "float32", + "input_dims": [ + "i" + ] + } + }, + "outputs": [ + "out" + ], + "dimensions": [ + 128, + 80, + 128 + ], + "vectorization": 1, + "program": { + "v_tmp": { + "data_type": "float32", + "computation_string": "\nv_tmp = crlato[i]\n", + "boundary_conditions": { + "crlato": { + "btype": "shrink", + "halo": [ + "halo-2", + "halo-1", + 0, + 0, + "halo-1", + "halo-2" + ] + } + } + }, + "u_tmp": { + "data_type": "float32", + "computation_string": "\nu_tmp = crlato[i]\n", + "boundary_conditions": { + "crlato": { + "btype": "shrink", + "halo": [ + "halo-1", + "halo-2", + 0, + 0, + "halo-2", + "halo-1" + ] + } + } + }, + "__tmp_T": { + "data_type": "float32", + "computation_string": "\n__tmp_T = u_tmp[(i, j, k)] + v_tmp[((i + 1), j, k)] + v_tmp[(i, j, k)]\n", + "boundary_conditions": { + "u_tmp": { + "btype": "shrink", + "halo": [ + "halo-1", + "halo-1", + 0, + 0, + "halo-1", + "halo-1" + ] + }, + "v_tmp": { + "btype": "shrink", + "halo": [ + "halo-1", + "halo-1", + 0, + 0, + "halo-1", + "halo-1" + ] + } + } + }, + "__tmp_S": { + "data_type": "float32", + "computation_string": "\n__tmp_S = v_tmp[(i, j, k)] + u_tmp[((i + 1), j, k)] + u_tmp[(i, j, k)]\n", + "boundary_conditions": { + "v_tmp": { + "btype": "shrink", + "halo": [ + "halo-1", + "halo-1", + 0, + 0, + "halo-1", + "halo-1" + ] + }, + "u_tmp": { + "btype": "shrink", + "halo": [ + "halo-1", + "halo-1", + 0, + 0, + "halo-1", + "halo-1" + ] + } + } + }, + "out": { + "data_type": "float32", + "computation_string": "\nout = __tmp_S[(i,j,k)] + __tmp_T[(i,j,k)]\n", + "boundary_conditions": { + "__tmp_S":{ + "btype": "shrink", + "halo": [ + "halo", + "halo", + 0, + 0, + "halo", + "halo" + ] + }, + "__tmp_T": { + "btype": "shrink", + "halo": [ + "halo", + "halo", + 0, + 0, + "halo", + "halo" + ] + } + } + } + }, + "constants": { + "eddlat": { + "value": "5729.58", + "data_type": "float32" + }, + "eddlon": { + "value": "5729.58", + "data_type": "float32" + }, + "tau_smag": { + "value": "0.3", + "data_type": "float32" + }, + "weight_smag": { + "value": "0.5", + "data_type": "float32" + }, + "I": { + "value": "128", + "data_type": "int32" + }, + "J": { + "value": "128", + "data_type": "int32" + }, + "K": { + "value": "80", + "data_type": "int32" + }, + "halo": { + "value": "1", + "data_type": "int32" + } + } +} From f0e2e3b8abbc09558f895565f33b30fb0a9f6b4c Mon Sep 17 00:00:00 2001 From: andreaskuster Date: Fri, 8 Jan 2021 19:58:40 +0100 Subject: [PATCH 10/27] Further reduce minimal example. --- bug_min.json | 170 ++++++++++++--------------------------------------- 1 file changed, 40 insertions(+), 130 deletions(-) diff --git a/bug_min.json b/bug_min.json index 1c8278a..981e180 100644 --- a/bug_min.json +++ b/bug_min.json @@ -1,7 +1,7 @@ { "inputs": { - "crlato": { - "data": "crlato_128_float32.dat", + "inA": { + "data": "inA_float32.dat", "data_type": "float32", "input_dims": [ "i" @@ -12,163 +12,73 @@ "out" ], "dimensions": [ - 128, - 80, - 128 + 8, + 8, + 8 ], "vectorization": 1, "program": { - "v_tmp": { + "k0": { "data_type": "float32", - "computation_string": "\nv_tmp = crlato[i]\n", + "computation_string": "k0 = inA[i]", "boundary_conditions": { - "crlato": { - "btype": "shrink", - "halo": [ - "halo-2", - "halo-1", - 0, - 0, - "halo-1", - "halo-2" - ] - } + "inA": { + "type": "constant", + "value": 0.0 + } } }, - "u_tmp": { + "k1": { "data_type": "float32", - "computation_string": "\nu_tmp = crlato[i]\n", + "computation_string": "k1 = inA[i]", "boundary_conditions": { - "crlato": { - "btype": "shrink", - "halo": [ - "halo-1", - "halo-2", - 0, - 0, - "halo-2", - "halo-1" - ] - } + "inA": { + "type": "constant", + "value": 0.0 + } } }, - "__tmp_T": { + "k2": { "data_type": "float32", - "computation_string": "\n__tmp_T = u_tmp[(i, j, k)] + v_tmp[((i + 1), j, k)] + v_tmp[(i, j, k)]\n", + "computation_string": "k2 = k1[i, j, k] + k0[i + 1, j, k] + k0[i, j, k]", "boundary_conditions": { - "u_tmp": { - "btype": "shrink", - "halo": [ - "halo-1", - "halo-1", - 0, - 0, - "halo-1", - "halo-1" - ] + "k1": { + "type": "constant", + "value": 0.0 }, - "v_tmp": { - "btype": "shrink", - "halo": [ - "halo-1", - "halo-1", - 0, - 0, - "halo-1", - "halo-1" - ] + "k0": { + "type": "constant", + "value": 0.0 } } }, - "__tmp_S": { + "k3": { "data_type": "float32", - "computation_string": "\n__tmp_S = v_tmp[(i, j, k)] + u_tmp[((i + 1), j, k)] + u_tmp[(i, j, k)]\n", + "computation_string": "k3 = k0[i, j, k] + k1[i + 1, j, k] + k1[i, j, k]", "boundary_conditions": { - "v_tmp": { - "btype": "shrink", - "halo": [ - "halo-1", - "halo-1", - 0, - 0, - "halo-1", - "halo-1" - ] + "k0": { + "type": "constant", + "value": 0.0 }, - "u_tmp": { - "btype": "shrink", - "halo": [ - "halo-1", - "halo-1", - 0, - 0, - "halo-1", - "halo-1" - ] + "k1": { + "type": "constant", + "value": 0.0 } } }, "out": { "data_type": "float32", - "computation_string": "\nout = __tmp_S[(i,j,k)] + __tmp_T[(i,j,k)]\n", + "computation_string": "out = k2[i,j,k] + k3[i,j,k]", "boundary_conditions": { - "__tmp_S":{ - "btype": "shrink", - "halo": [ - "halo", - "halo", - 0, - 0, - "halo", - "halo" - ] + "k2":{ + "type": "constant", + "value": 0.0 }, - "__tmp_T": { - "btype": "shrink", - "halo": [ - "halo", - "halo", - 0, - 0, - "halo", - "halo" - ] + "k3": { + "type": "constant", + "value": 0.0 } } } - }, - "constants": { - "eddlat": { - "value": "5729.58", - "data_type": "float32" - }, - "eddlon": { - "value": "5729.58", - "data_type": "float32" - }, - "tau_smag": { - "value": "0.3", - "data_type": "float32" - }, - "weight_smag": { - "value": "0.5", - "data_type": "float32" - }, - "I": { - "value": "128", - "data_type": "int32" - }, - "J": { - "value": "128", - "data_type": "int32" - }, - "K": { - "value": "80", - "data_type": "int32" - }, - "halo": { - "value": "1", - "data_type": "int32" - } } } From 8023480d5d49a3e67bc46bc9a1c65459da6ec8ad Mon Sep 17 00:00:00 2001 From: andreaskuster Date: Fri, 8 Jan 2021 19:59:07 +0100 Subject: [PATCH 11/27] Account for offset to center. --- stencilflow/kernel_chain_graph.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/stencilflow/kernel_chain_graph.py b/stencilflow/kernel_chain_graph.py index ff54023..301827f 100644 --- a/stencilflow/kernel_chain_graph.py +++ b/stencilflow/kernel_chain_graph.py @@ -507,6 +507,11 @@ def compute_delay_buffer(self) -> None: dimensions=self.dimensions, index=stencilflow.list_subtract_cwise( max_delay[:-1], entry[:-1])) + + if not isinstance(node, Output): + max_offset = node.dist_to_center[max(node.dist_to_center, key=lambda x: node.dist_to_center[x])] + max_size = max_offset - node.dist_to_center[entry[-1]] + node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) node.delay_buffer[name].import_data( From 0afabe0b7403aed827b47d9a2fbee07782abc76e Mon Sep 17 00:00:00 2001 From: andreaskuster Date: Sat, 9 Jan 2021 17:54:42 +0100 Subject: [PATCH 12/27] Add temporary fix. --- stencilflow/kernel_chain_graph.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/stencilflow/kernel_chain_graph.py b/stencilflow/kernel_chain_graph.py index 301827f..2763b65 100644 --- a/stencilflow/kernel_chain_graph.py +++ b/stencilflow/kernel_chain_graph.py @@ -85,6 +85,29 @@ def __init__(self, if self.log_level >= LogLevel.MODERATE: print("Compute delay buffer sizes.") self.compute_delay_buffer() # compute the delay buffer sizes + + for node in self.graph.nodes(): + if node.name == "__tmp_T" or node.name == "__tmp_T_sqr_s_1351": + name = "u_tmp" + max_size = self.dimensions[0]*self.dimensions[1] + node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) + node.delay_buffer[name].import_data([None] * node.delay_buffer[name].maxsize) + if node.name == "__tmp_S" or node.name == "__tmp_S_sqr_uv_1352": + name = "v_tmp" + max_size = self.dimensions[0] * self.dimensions[1] + node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) + node.delay_buffer[name].import_data([None] * node.delay_buffer[name].maxsize) + if node.name == "__tmp_T_sqr_s_1351": + name = "ms_sdfg_1330___local_frac_1_dx_1660" + max_size = self.dimensions[0]*self.dimensions[1] + node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) + node.delay_buffer[name].import_data([None] * node.delay_buffer[name].maxsize) + if node.name == "__tmp_S_sqr_uv_1352": + name = "ms_sdfg_1330___local_frac_1_dx_1660" + max_size = self.dimensions[0] * self.dimensions[1] + node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) + node.delay_buffer[name].import_data([None] * node.delay_buffer[name].maxsize) + if self.log_level >= LogLevel.MODERATE: print("Add channels to the graph edges.") # plot kernel graphs if flag set to true From c4b83c5e5a8cfdb41ef94b895850eb750e21418f Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Sat, 9 Jan 2021 15:18:23 +0100 Subject: [PATCH 13/27] Add fpga0 sdk env vars script --- vars.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 vars.sh diff --git a/vars.sh b/vars.sh new file mode 100644 index 0000000..80989aa --- /dev/null +++ b/vars.sh @@ -0,0 +1,16 @@ +# intel fpga +export INTELFPGAOCLSDKROOT=/opt/intelFPGA_pro/19.1/hld +export PATH=$INTELFPGAOCLSDKROOT/bin/:$PATH +export AOCL_BOARD_PACKAGE_ROOT=$INTELFPGAOCLSDKROOT/board/bittware_pcie/s10 +# /opt/intelFPGA_pro/19.4/hld/board/bittware_pcie/s10/board_env.xml +# /opt/intelFPGA_pro/19.4/hld/board/bittware_pcie/s10_hpc_default/board_env.xml +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$AOCL_BOARD_PACKAGE_ROOT/linux64/lib + +# xilinx fpga +export PATH=/opt/Xilinx/Vitis/2019.2/bin:/opt/Xilinx/Vitis_HLS/2019.2/bin:/opt/Xilinx/Vivado/2019.2/bin:$PATH +export XILINX_XRT=/opt/xilinx/xrt +export PATH=$XILINX_XRT/bin:$PATH +export LD_LIBRARY_PATH=$XILINX_XRT/lib:$LD_LIBRARY_PATH +export XILINXD_LICENSE_FILE=2100@sgv-license-01 +export LIBRARY_PATH=/usr/lib/x86_64-linux-gnu + From ab8c555961403a03fa7609030bbc08e5fe4765d4 Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Sat, 9 Jan 2021 16:27:03 +0100 Subject: [PATCH 14/27] Add larger jacobi3d example --- test/stencils/jacobi3d_512x512x512.json | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 test/stencils/jacobi3d_512x512x512.json diff --git a/test/stencils/jacobi3d_512x512x512.json b/test/stencils/jacobi3d_512x512x512.json new file mode 100644 index 0000000..82db32d --- /dev/null +++ b/test/stencils/jacobi3d_512x512x512.json @@ -0,0 +1,24 @@ +{ + "inputs": { + "a": { + "data": "data/zeros_32x32x32_fp32.dat", + "data_type": "float32" + } + }, + "outputs": ["b"], + "dimensions": [512, 512, 512], + "program": { + "b": { + "computation_string": + "b = 0.16666666 * (a[i-1,j,k] + a[i+1,j,k] + a[i,j-1,k] + a[i,j+1,k] + a[i,j,k-1] + a[i,j,k+1])", + "boundary_conditions": { + "a": { + "type": "constant", + "value": 1.0 + } + }, + "data_type": + "float32" + } + } +} From 47cc6666b27c857e64ae2fb881ba33140008d318 Mon Sep 17 00:00:00 2001 From: andreaskuster Date: Mon, 11 Jan 2021 00:18:47 +0100 Subject: [PATCH 15/27] Add more complex example. --- bug_min_ext.json | 94 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 bug_min_ext.json diff --git a/bug_min_ext.json b/bug_min_ext.json new file mode 100644 index 0000000..adc7d4f --- /dev/null +++ b/bug_min_ext.json @@ -0,0 +1,94 @@ +{ + "inputs": { + "inA": { + "data": "inA_float32.dat", + "data_type": "float32", + "input_dims": [ + "i" + ] + } + }, + "outputs": [ + "out" + ], + "dimensions": [ + 8, + 8, + 8 + ], + "vectorization": 1, + "program": { + "k0": { + "data_type": "float32", + "computation_string": "k0 = inA[i]", + "boundary_conditions": { + "inA": { + "type": "constant", + "value": 0.0 + } + } + }, + "k1": { + "data_type": "float32", + "computation_string": "k1 = inA[i]", + "boundary_conditions": { + "inA": { + "type": "constant", + "value": 0.0 + } + } + }, + "k2": { + "data_type": "float32", + "computation_string": "k2 = k1[i, j, k] + k0[i + 1, j, k] + k0[i, j, k]", + "boundary_conditions": { + "k1": { + "type": "constant", + "value": 0.0 + }, + "k0": { + "type": "constant", + "value": 0.0 + } + } + }, + "k3": { + "data_type": "float32", + "computation_string": "k3 = k0[i, j, k] + k4[i + 1, j, k] + k4[i, j, k]", + "boundary_conditions": { + "k0": { + "type": "constant", + "value": 0.0 + }, + "k4": { + "type": "constant", + "value": 0.0 + } + } + }, + "k4": { + "data_type": "float32", + "computation_string": "k4 = k1[i, j, k] + k1[i+1, j, k]", + "boundary_conditions": { + "k1": { + "type": "constant", + "value": 0.0 + } + } + }, + "out": { + "data_type": "float32", + "computation_string": "out = k2[i,j,k] + k3[i,j,k]", + "boundary_conditions": { + "k2":{ + "type": "constant", + "value": 0.0 + }, + "k3": { + "type": "constant", + "value": 0.0 + } + } + } + } +} From ed1dcb8fc6e9953292bda2f4b45be161e429a1de Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Sat, 9 Jan 2021 19:19:57 +0100 Subject: [PATCH 16/27] Increase problem size. --- bug_min.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bug_min.json b/bug_min.json index 981e180..47815e5 100644 --- a/bug_min.json +++ b/bug_min.json @@ -12,9 +12,9 @@ "out" ], "dimensions": [ - 8, - 8, - 8 + 256, + 256, + 256 ], "vectorization": 1, "program": { From ff683e158a8718a74ee52bf6f9392af6d22ec715 Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Sat, 11 Sep 2021 21:45:31 +0200 Subject: [PATCH 17/27] Make example more distinct to test function correctness. --- bug_min.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bug_min.json b/bug_min.json index 47815e5..cf2679b 100644 --- a/bug_min.json +++ b/bug_min.json @@ -12,9 +12,9 @@ "out" ], "dimensions": [ - 256, - 256, - 256 + 10, + 10, + 10 ], "vectorization": 1, "program": { @@ -40,7 +40,7 @@ }, "k2": { "data_type": "float32", - "computation_string": "k2 = k1[i, j, k] + k0[i + 1, j, k] + k0[i, j, k]", + "computation_string": "k2 = k1[i, j, k] + k0[i+1, j, k] + k0[i, j, k]", "boundary_conditions": { "k1": { "type": "constant", @@ -54,7 +54,7 @@ }, "k3": { "data_type": "float32", - "computation_string": "k3 = k0[i, j, k] + k1[i + 1, j, k] + k1[i, j, k]", + "computation_string": "k3 = k0[i, j, k] + k1[i+1, j+1, k+1] + k1[i, j, k]", "boundary_conditions": { "k0": { "type": "constant", @@ -68,7 +68,7 @@ }, "out": { "data_type": "float32", - "computation_string": "out = k2[i,j,k] + k3[i,j,k]", + "computation_string": "out = k2[i, j, k] + k3[i, j, k]", "boundary_conditions": { "k2":{ "type": "constant", From 0e35d03e928ab1fd82faaf60cf7a15aeb6d09707 Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Sat, 11 Sep 2021 21:46:16 +0200 Subject: [PATCH 18/27] Add path inclusion for direct file execution. Extend optimization functionality. --- stencilflow/kernel_chain_graph.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/stencilflow/kernel_chain_graph.py b/stencilflow/kernel_chain_graph.py index 2763b65..52c73b6 100644 --- a/stencilflow/kernel_chain_graph.py +++ b/stencilflow/kernel_chain_graph.py @@ -15,6 +15,8 @@ import operator import re import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(__file__))) from typing import Any, List, Dict, Tuple @@ -817,6 +819,14 @@ def runtime_lower_bound(self): type=int) parser.add_argument("-report", action="store_true") parser.add_argument("-simulate", action="store_true") + parser.add_argument("-opt", action="store_true") + parser.add_argument("-opt_goal", default=["min_fast_mem", 12000], nargs="+") + """ + choices: + - min_com_vol, FAST_MEM_BOUND, SLOW_MEM_BOUND + - min_fast_mem, COM_VOL_BOUND + - opt_ratio, RATIO + """ args = parser.parse_args() args.log_level = stencilflow.log_level.LogLevel(args.log_level) program_description = stencilflow.parse_json(args.stencil_file) @@ -837,6 +847,17 @@ def runtime_lower_bound(self): log_level=LogLevel(args.log_level)) sim.simulate() + # choose optimization goal + if args.opt: + from stencilflow import Optimizer + opt = Optimizer(self.kernel_nodes, self.dimensions) + if args.opt_goal[0] == "min_com_vol": + opt.minimize_comm_vol(fast_memory_bound=args.opt_goal[1], slow_memory_bound=args.opt_goal[2]) + if args.opt_goal[0] == "min_fast_mem": + opt.minimize_fast_mem(communication_volume_bound=args.opt_goal[1]) + if args.opt_goal[0] == "opt_ratio": + opt.optimize_to_ratio(ratio=args.opt_goal[1]) + # output a report if argument -report is true if args.report: chain.report(args.stencil_file) From fb9966c163cd4bab69935a85a027b07f0bbb8712 Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Sat, 11 Sep 2021 21:46:44 +0200 Subject: [PATCH 19/27] Several readme extension --- README.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/README.md b/README.md index 7f9989c..08d3765 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ To run the code, the following software must be available: - Python 3.6.x or newer. - The `virtualenv` module (installed with `pip install virtualenv`). - A C++17-capable compiler (e.g., GCC 7.x or Clang 6.x). +- graphviz (for graph plotting support) - One or both FPGA compilers: - Intel FPGA OpenCL SDK (tested with 18.1.1 and 19.1) - Xilinx Vitis (tested with 2020.2) @@ -47,6 +48,13 @@ kernel source files themselves in: .dacecache//src/intel_fpga/device ``` +To run low-level analysis of the buffer size and stencil program visualization, you can invoke the executable `stencilflow/kernel_chain_graph.py`. +Example usage: + +```bash +stencilflow/kernel_chain_graph.py -stencil_file test/stencils/jacobi3d_32x32x32_8itr_8vec.json -plot -simulate -report -optimize +``` + Verification ------------ @@ -81,3 +89,16 @@ It is a known issue that launching multiple Intel FPGA kernels in quick succession (such as is done in the tests) can sometimes fail sporadically, seemingly due to file I/O issues. Running individual programs should never fail. +Publication +----------- + +If you use StencilFlow, cite us: +```bibtex +@inproceedings{dace, + author = {Johannes de Fine Licht, Andreas Kuster, Tiziano De Matteis, Tal Ben-Nun, Dominic Hofer, Torsten Hoefler}, + title = {StencilFlow: Mapping Large Stencil Programs to Distributed Spatial Computing Systems}, + year = {2021}, + booktitle = {Proceedings of the IEEE/ACM International Symposium on Code Generation and Optimization (CGO)}, + series = {CGO '21} +} +``` \ No newline at end of file From 67a6b93a87e1163470f2317645c287eaebc22ace Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Sat, 11 Sep 2021 22:40:02 +0200 Subject: [PATCH 20/27] Remove horidiff hotfix --- stencilflow/kernel_chain_graph.py | 73 +++++++++++-------------------- 1 file changed, 25 insertions(+), 48 deletions(-) diff --git a/stencilflow/kernel_chain_graph.py b/stencilflow/kernel_chain_graph.py index 52c73b6..8723851 100644 --- a/stencilflow/kernel_chain_graph.py +++ b/stencilflow/kernel_chain_graph.py @@ -16,6 +16,7 @@ import re import os import sys + sys.path.append(os.path.dirname(os.path.dirname(__file__))) from typing import Any, List, Dict, Tuple @@ -87,29 +88,6 @@ def __init__(self, if self.log_level >= LogLevel.MODERATE: print("Compute delay buffer sizes.") self.compute_delay_buffer() # compute the delay buffer sizes - - for node in self.graph.nodes(): - if node.name == "__tmp_T" or node.name == "__tmp_T_sqr_s_1351": - name = "u_tmp" - max_size = self.dimensions[0]*self.dimensions[1] - node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) - node.delay_buffer[name].import_data([None] * node.delay_buffer[name].maxsize) - if node.name == "__tmp_S" or node.name == "__tmp_S_sqr_uv_1352": - name = "v_tmp" - max_size = self.dimensions[0] * self.dimensions[1] - node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) - node.delay_buffer[name].import_data([None] * node.delay_buffer[name].maxsize) - if node.name == "__tmp_T_sqr_s_1351": - name = "ms_sdfg_1330___local_frac_1_dx_1660" - max_size = self.dimensions[0]*self.dimensions[1] - node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) - node.delay_buffer[name].import_data([None] * node.delay_buffer[name].maxsize) - if node.name == "__tmp_S_sqr_uv_1352": - name = "ms_sdfg_1330___local_frac_1_dx_1660" - max_size = self.dimensions[0] * self.dimensions[1] - node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) - node.delay_buffer[name].import_data([None] * node.delay_buffer[name].maxsize) - if self.log_level >= LogLevel.MODERATE: print("Add channels to the graph edges.") # plot kernel graphs if flag set to true @@ -314,14 +292,14 @@ def add_channels(self) -> None: name = src.name + "_" + dest.name channel = { "name": - name, + name, "delay_buffer": - self.kernel_nodes[dest.name].delay_buffer[ - src.name], + self.kernel_nodes[dest.name].delay_buffer[ + src.name], "internal_buffer": - dest.internal_buffer[src.name], + dest.internal_buffer[src.name], "data_type": - src.data_type + src.data_type } # add channel reference to global channel dictionary self.channels[name] = channel @@ -339,18 +317,18 @@ def add_channels(self) -> None: name = src.name + "_" + dest.name channel = { "name": - name, + name, "delay_buffer": - self.kernel_nodes[dest.name].delay_buffer[ - src.name], + self.kernel_nodes[dest.name].delay_buffer[ + src.name], "internal_buffer": - dest.internal_buffer[src.name], + dest.internal_buffer[src.name], "data_type": - src.data_type, + src.data_type, "input_dims": - self.inputs[src.name]["input_dims"] - if "input_dims" in self.inputs[src.name] - else None + self.inputs[src.name]["input_dims"] + if "input_dims" in self.inputs[src.name] + else None } # add channel reference to global channel dictionary self.channels[name] = channel @@ -367,13 +345,13 @@ def add_channels(self) -> None: name = src.name + "_" + dest.name channel = { "name": - name, + name, "delay_buffer": - self.output_nodes[dest.name].delay_buffer[ - src.name], + self.output_nodes[dest.name].delay_buffer[ + src.name], "internal_buffer": {}, "data_type": - src.data_type + src.data_type } # add channel reference to global channel dictionary self.channels[name] = channel @@ -411,7 +389,7 @@ def import_input(self) -> None: else: i["input_dims"] = stencilflow.ITERATORS[len(stencilflow. ITERATORS) - - self.kernel_dimensions:] + self.kernel_dimensions:] self.outputs = inp["outputs"] # handle stencil program output dimensions if self.kernel_dimensions == 1: # 1D @@ -419,8 +397,8 @@ def import_input(self) -> None: self.program[entry]["computation_string"] = \ self.program[entry]["computation_string"].replace("[", "[i, j,") # add two extra indices self.dimensions = [ - 1, 1 - ] + inp["dimensions"] # add two extra dimensions + 1, 1 + ] + inp["dimensions"] # add two extra dimensions elif self.kernel_dimensions == 2: # 2D for entry in self.program: self.program[entry]["computation_string"] = self.program[entry]["computation_string"] \ @@ -514,16 +492,14 @@ def compute_delay_buffer(self) -> None: order = list(nx.topological_sort(self.graph)) except nx.exception.NetworkXUnfeasible: cycle = next(nx.algorithms.cycles.simple_cycles(self.graph)) - raise ValueError("Cycle detected: {}".format( - [c.name for c in cycle])) + raise ValueError("Cycle detected: {}".format([c.name for c in cycle])) # go through all nodes for node in order: # process delay buffer (no additional delay buffer will appear because of the topological order) for inp in node.input_paths: # compute maximum delay size per input max_delay = max(node.input_paths[inp]) - max_delay[ - 2] += 1 # add an extra delay cycle for the processing in the kernel node + max_delay[2] += 1 # add an extra delay cycle for the processing in the kernel node # loop over all inputs and set their size relative to the max size to have data ready at the exact # same time for entry in node.input_paths[inp]: @@ -746,7 +722,7 @@ def report(self, name): u.name, v.name, entry.name, entry.maxsize)) total_fast += entry.maxsize print("buffer size slow memory: {} \nbuffer size fast memory: {}".format( - total_slow, total_fast)) + total_slow, total_fast)) def operation_count(self): """For each operation type found in the ASTs, return a tuple of @@ -850,6 +826,7 @@ def runtime_lower_bound(self): # choose optimization goal if args.opt: from stencilflow import Optimizer + opt = Optimizer(self.kernel_nodes, self.dimensions) if args.opt_goal[0] == "min_com_vol": opt.minimize_comm_vol(fast_memory_bound=args.opt_goal[1], slow_memory_bound=args.opt_goal[2]) From 47dfc5874ac45f0758e6429bd9e453a99dd6a18e Mon Sep 17 00:00:00 2001 From: Andreas Kuster <20418060+andreaskuster@users.noreply.github.com> Date: Mon, 13 Sep 2021 20:34:47 +0200 Subject: [PATCH 21/27] Update README.md Co-authored-by: definelicht --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 08d3765..6f4c9d2 100644 --- a/README.md +++ b/README.md @@ -98,7 +98,7 @@ If you use StencilFlow, cite us: author = {Johannes de Fine Licht, Andreas Kuster, Tiziano De Matteis, Tal Ben-Nun, Dominic Hofer, Torsten Hoefler}, title = {StencilFlow: Mapping Large Stencil Programs to Distributed Spatial Computing Systems}, year = {2021}, - booktitle = {Proceedings of the IEEE/ACM International Symposium on Code Generation and Optimization (CGO)}, + booktitle = {Proceedings of the IEEE/ACM International Symposium on Code Generation and Optimization (CGO'21)}, series = {CGO '21} } ``` \ No newline at end of file From 6b737df8ad9700b000e69b458f8da60b42214397 Mon Sep 17 00:00:00 2001 From: Andreas Kuster <20418060+andreaskuster@users.noreply.github.com> Date: Mon, 13 Sep 2021 20:35:00 +0200 Subject: [PATCH 22/27] Update README.md Co-authored-by: definelicht --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6f4c9d2..f86405d 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ Publication If you use StencilFlow, cite us: ```bibtex @inproceedings{dace, - author = {Johannes de Fine Licht, Andreas Kuster, Tiziano De Matteis, Tal Ben-Nun, Dominic Hofer, Torsten Hoefler}, + author = {Johannes de~Fine~Licht, Andreas Kuster, Tiziano De~Matteis, Tal Ben-Nun, Dominic Hofer, Torsten Hoefler}, title = {StencilFlow: Mapping Large Stencil Programs to Distributed Spatial Computing Systems}, year = {2021}, booktitle = {Proceedings of the IEEE/ACM International Symposium on Code Generation and Optimization (CGO'21)}, From 6a27a5f1ae7bb292202a8276bebde933bd1d6e75 Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Mon, 13 Sep 2021 21:22:44 +0200 Subject: [PATCH 23/27] Move test config to default location --- bug_min_ext.json | 94 ------------------- .../stencils/horidiff_min.json | 0 2 files changed, 94 deletions(-) delete mode 100644 bug_min_ext.json rename bug_min.json => test/stencils/horidiff_min.json (100%) diff --git a/bug_min_ext.json b/bug_min_ext.json deleted file mode 100644 index adc7d4f..0000000 --- a/bug_min_ext.json +++ /dev/null @@ -1,94 +0,0 @@ -{ - "inputs": { - "inA": { - "data": "inA_float32.dat", - "data_type": "float32", - "input_dims": [ - "i" - ] - } - }, - "outputs": [ - "out" - ], - "dimensions": [ - 8, - 8, - 8 - ], - "vectorization": 1, - "program": { - "k0": { - "data_type": "float32", - "computation_string": "k0 = inA[i]", - "boundary_conditions": { - "inA": { - "type": "constant", - "value": 0.0 - } - } - }, - "k1": { - "data_type": "float32", - "computation_string": "k1 = inA[i]", - "boundary_conditions": { - "inA": { - "type": "constant", - "value": 0.0 - } - } - }, - "k2": { - "data_type": "float32", - "computation_string": "k2 = k1[i, j, k] + k0[i + 1, j, k] + k0[i, j, k]", - "boundary_conditions": { - "k1": { - "type": "constant", - "value": 0.0 - }, - "k0": { - "type": "constant", - "value": 0.0 - } - } - }, - "k3": { - "data_type": "float32", - "computation_string": "k3 = k0[i, j, k] + k4[i + 1, j, k] + k4[i, j, k]", - "boundary_conditions": { - "k0": { - "type": "constant", - "value": 0.0 - }, - "k4": { - "type": "constant", - "value": 0.0 - } - } - }, - "k4": { - "data_type": "float32", - "computation_string": "k4 = k1[i, j, k] + k1[i+1, j, k]", - "boundary_conditions": { - "k1": { - "type": "constant", - "value": 0.0 - } - } - }, - "out": { - "data_type": "float32", - "computation_string": "out = k2[i,j,k] + k3[i,j,k]", - "boundary_conditions": { - "k2":{ - "type": "constant", - "value": 0.0 - }, - "k3": { - "type": "constant", - "value": 0.0 - } - } - } - } -} diff --git a/bug_min.json b/test/stencils/horidiff_min.json similarity index 100% rename from bug_min.json rename to test/stencils/horidiff_min.json From b9ae43b6083c5623f1aedae96c27ea1d926ffb74 Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Mon, 13 Sep 2021 21:23:00 +0200 Subject: [PATCH 24/27] Remove local env setup --- vars.sh | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 vars.sh diff --git a/vars.sh b/vars.sh deleted file mode 100644 index 80989aa..0000000 --- a/vars.sh +++ /dev/null @@ -1,16 +0,0 @@ -# intel fpga -export INTELFPGAOCLSDKROOT=/opt/intelFPGA_pro/19.1/hld -export PATH=$INTELFPGAOCLSDKROOT/bin/:$PATH -export AOCL_BOARD_PACKAGE_ROOT=$INTELFPGAOCLSDKROOT/board/bittware_pcie/s10 -# /opt/intelFPGA_pro/19.4/hld/board/bittware_pcie/s10/board_env.xml -# /opt/intelFPGA_pro/19.4/hld/board/bittware_pcie/s10_hpc_default/board_env.xml -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$AOCL_BOARD_PACKAGE_ROOT/linux64/lib - -# xilinx fpga -export PATH=/opt/Xilinx/Vitis/2019.2/bin:/opt/Xilinx/Vitis_HLS/2019.2/bin:/opt/Xilinx/Vivado/2019.2/bin:$PATH -export XILINX_XRT=/opt/xilinx/xrt -export PATH=$XILINX_XRT/bin:$PATH -export LD_LIBRARY_PATH=$XILINX_XRT/lib:$LD_LIBRARY_PATH -export XILINXD_LICENSE_FILE=2100@sgv-license-01 -export LIBRARY_PATH=/usr/lib/x86_64-linux-gnu - From 86ab704c8d114a105a72a96a108a933c7558c627 Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Tue, 14 Sep 2021 13:25:39 +0200 Subject: [PATCH 25/27] Add extended horidiff example. Adjust delay buffer computation. --- stencilflow/kernel_chain_graph.py | 24 +++++--- test/stencils/horidiff_min_ext.json | 94 +++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 9 deletions(-) create mode 100644 test/stencils/horidiff_min_ext.json diff --git a/stencilflow/kernel_chain_graph.py b/stencilflow/kernel_chain_graph.py index 8723851..8aad222 100644 --- a/stencilflow/kernel_chain_graph.py +++ b/stencilflow/kernel_chain_graph.py @@ -497,6 +497,13 @@ def compute_delay_buffer(self) -> None: for node in order: # process delay buffer (no additional delay buffer will appear because of the topological order) for inp in node.input_paths: + + # add internal buffer latency for internal computation + if not isinstance(node, Output): + for entry in node.input_paths[inp]: + name = entry[-1] + entry[2] += node.dist_to_center[name] + # compute maximum delay size per input max_delay = max(node.input_paths[inp]) max_delay[2] += 1 # add an extra delay cycle for the processing in the kernel node @@ -506,17 +513,16 @@ def compute_delay_buffer(self) -> None: name = entry[-1] max_size = stencilflow.convert_3d_to_1d( dimensions=self.dimensions, - index=stencilflow.list_subtract_cwise( - max_delay[:-1], entry[:-1])) + index=stencilflow.list_subtract_cwise(max_delay[:-1], entry[:-1])) + node.delay_buffer[name] = BoundedQueue(name=name, maxsize=max_size) + node.delay_buffer[name].import_data([None] * node.delay_buffer[name].maxsize) - if not isinstance(node, Output): - max_offset = node.dist_to_center[max(node.dist_to_center, key=lambda x: node.dist_to_center[x])] - max_size = max_offset - node.dist_to_center[entry[-1]] + # remove internal buffer latency for internal computation + if not isinstance(node, Output): + for entry in node.input_paths[inp]: + name = entry[-1] + entry[2] -= node.dist_to_center[name] - node.delay_buffer[name] = BoundedQueue(name=name, - maxsize=max_size) - node.delay_buffer[name].import_data( - [None] * node.delay_buffer[name].maxsize) # set input node delay buffers to 1 if isinstance(node, Input): node.delay_buffer = BoundedQueue(name=node.name, diff --git a/test/stencils/horidiff_min_ext.json b/test/stencils/horidiff_min_ext.json new file mode 100644 index 0000000..607ff61 --- /dev/null +++ b/test/stencils/horidiff_min_ext.json @@ -0,0 +1,94 @@ +{ + "inputs": { + "inA": { + "data": "inA_float32.dat", + "data_type": "float32", + "input_dims": [ + "i" + ] + } + }, + "outputs": [ + "out" + ], + "dimensions": [ + 10, + 10, + 10 + ], + "vectorization": 1, + "program": { + "k0": { + "data_type": "float32", + "computation_string": "k0 = inA[i]", + "boundary_conditions": { + "inA": { + "type": "constant", + "value": 0.0 + } + } + }, + "k1": { + "data_type": "float32", + "computation_string": "k1 = inA[i]", + "boundary_conditions": { + "inA": { + "type": "constant", + "value": 0.0 + } + } + }, + "k2": { + "data_type": "float32", + "computation_string": "k2 = k1[i, j, k] + k0[i + 1, j, k] + k0[i, j, k]", + "boundary_conditions": { + "k1": { + "type": "constant", + "value": 0.0 + }, + "k0": { + "type": "constant", + "value": 0.0 + } + } + }, + "k3": { + "data_type": "float32", + "computation_string": "k3 = k0[i, j, k] + k4[i+1, j+1, k+1] + k4[i, j, k]", + "boundary_conditions": { + "k0": { + "type": "constant", + "value": 0.0 + }, + "k4": { + "type": "constant", + "value": 0.0 + } + } + }, + "k4": { + "data_type": "float32", + "computation_string": "k4 = k1[i, j, k] + k1[i+1, j+1, k+1]", + "boundary_conditions": { + "k1": { + "type": "constant", + "value": 0.0 + } + } + }, + "out": { + "data_type": "float32", + "computation_string": "out = k2[i,j,k] + k3[i,j,k]", + "boundary_conditions": { + "k2":{ + "type": "constant", + "value": 0.0 + }, + "k3": { + "type": "constant", + "value": 0.0 + } + } + } + } +} From d442f68c9d28a43d0e415ef75ab34376947b836e Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Wed, 15 Sep 2021 14:34:00 +0200 Subject: [PATCH 26/27] Reduce min channel depth to 1024 --- stencilflow/sdfg_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stencilflow/sdfg_generator.py b/stencilflow/sdfg_generator.py index 22629b4..87e54e5 100644 --- a/stencilflow/sdfg_generator.py +++ b/stencilflow/sdfg_generator.py @@ -28,7 +28,7 @@ import networkx as nx -MINIMUM_CHANNEL_DEPTH = 2048 +MINIMUM_CHANNEL_DEPTH = 1024 NUM_BANKS = 4 From 6a0cf1b806622f7a2c62fb967b610c459286e94b Mon Sep 17 00:00:00 2001 From: Andreas Kuster Date: Wed, 15 Sep 2021 23:13:29 +0200 Subject: [PATCH 27/27] Update dace version --- dace | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dace b/dace index 1fc6ddd..e732b1d 160000 --- a/dace +++ b/dace @@ -1 +1 @@ -Subproject commit 1fc6dddd94ee7fd467f1802398f4dad778c9a68a +Subproject commit e732b1d7ff83debeac9c7075f9ec78f4d5facc05