From 7583b420236d0108364e6414a4ec385a589ce0fb Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Sat, 27 Jan 2024 18:40:55 +0000 Subject: [PATCH 01/49] [SETUPTOOLS] add all install requirements Signed-off-by: aziz bahri --- setup.cfg | 66 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/setup.cfg b/setup.cfg index 4834011dea..26158cc4b1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,6 +32,7 @@ [metadata] name = finn +version = 0.9.0 description = A Framework for Fast, Scalable Quantized Neural Network Inference author = Yaman Umuroglu author_email = yamanu@xilinx.com @@ -60,6 +61,60 @@ package_dir = # tests_require = pytest; pytest-cov # Require a specific Python version, e.g. Python 2.7 or >= 3.4 # python_requires = >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.* +install_requires = + qonnx @ git+https://github.com/fastmachinelearning/qonnx.git@47e4357faf66b5b0d1bf77bf908bb47752421e5b + pyverilator @ git+https://github.com/maltanar/pyverilator.git@766e457465f5c0dd315490d7b9cc5d74f9a76f4f + brevitas @ git+https://github.com/Xilinx/brevitas.git@84f42259ec869eb151af4cb8a8b23ad925f493db + finn-experimental @ git+https://github.com/Xilinx/finn-experimental.git@de99347e936d51715f5356a1b6c64e37b91c23c2 + dataset_loading @ git+https://github.com/fbcotter/dataset_loading.git@0.0.4#egg=dataset_loading + bitstring==3.1.7 + clize==5.0.1 + dataclasses-json==0.5.7 + gspread==3.6.0 + importlib-resources==6.1.0 + ipython==8.12.2 + numpy==1.24.1 + onnx==1.13.0 + onnxoptimizer + onnxruntime==1.15.0 + pre-commit==3.3.2 + protobuf==3.20.3 + psutil==5.9.4 + pyscaffold==4.4 + scipy==1.10.1 + setupext-janitor>=1.1.2 + setuptools==68.2.2 + sigtools==4.0.1 + toposort==1.7.0 + vcdvcd==1.0.5 + wget==3.2 + torch==1.13.1 + torchvision==0.14.1 + torchaudio==0.13.1 + pygments==2.14.0 + ipykernel==6.21.2 + jupyter==1.0.0 + markupsafe==2.0.1 + matplotlib==3.7.0 + pytest-dependency==0.5.1 + pytest-xdist[setproctitle]==3.2.0 + pytest-parallel==0.1.1 + netron>=5.0.0 + pandas==1.5.3 + scikit-learn==1.2.1 + tqdm==4.64.1 + pytest==6.2.5 + pytest-metadata==1.7.0 + pytest-html==3.0.0 + pytest-html-merger==0.0.8 + pytest-cov==4.1.0 + deap==1.3.1 + mip==1.13.0 + networkx==2.8 + future-annotations==1.0.0 + dependencies==2.0.1 + tokenize-rt==4.2.1 + tclwrapper==0.0.1 [options.packages.find] where = src @@ -164,14 +219,3 @@ exclude = dist .eggs docs/conf.py - -[pyscaffold] -# PyScaffold's parameters when the project was created. -# This will be used when updating. Do not change! -version = 3.2.1 -package = finn -extensions = - travis - pre_commit - namespace -namespace = finn From 8ae17bc795f29088ff1bf78c1dc6dc2799839cee Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Sun, 28 Jan 2024 16:33:39 +0000 Subject: [PATCH 02/49] [SETUPTOOLS] Deps directory cleanup Signed-off-by: aziz bahri --- fetch-repos.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index 1275ccf31c..9869495fc1 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -115,10 +115,10 @@ fetch_board_files() { cd $OLD_PWD } -fetch_repo $QONNX_URL $QONNX_COMMIT $QONNX_DIR -fetch_repo $FINN_EXP_URL $FINN_EXP_COMMIT $FINN_EXP_DIR -fetch_repo $BREVITAS_URL $BREVITAS_COMMIT $BREVITAS_DIR -fetch_repo $PYVERILATOR_URL $PYVERILATOR_COMMIT $PYVERILATOR_DIR +# fetch_repo $QONNX_URL $QONNX_COMMIT $QONNX_DIR +# fetch_repo $FINN_EXP_URL $FINN_EXP_COMMIT $FINN_EXP_DIR +# fetch_repo $BREVITAS_URL $BREVITAS_COMMIT $BREVITAS_DIR +# fetch_repo $PYVERILATOR_URL $PYVERILATOR_COMMIT $PYVERILATOR_DIR fetch_repo $CNPY_URL $CNPY_COMMIT $CNPY_DIR fetch_repo $HLSLIB_URL $HLSLIB_COMMIT $HLSLIB_DIR fetch_repo $OMX_URL $OMX_COMMIT $OMX_DIR From 3e0b04f019fd41ba8aec49a169443a39f61ae168 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Thu, 7 Mar 2024 16:43:39 +0000 Subject: [PATCH 03/49] [QONNX] update to latest main fd61cfe Signed-off-by: aziz bahri --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 26158cc4b1..6168bec5e3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -62,7 +62,7 @@ package_dir = # Require a specific Python version, e.g. Python 2.7 or >= 3.4 # python_requires = >=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.* install_requires = - qonnx @ git+https://github.com/fastmachinelearning/qonnx.git@47e4357faf66b5b0d1bf77bf908bb47752421e5b + qonnx @ git+https://github.com/fastmachinelearning/qonnx.git@fd61cfeebbdaba351abf7e9d54cd785d7776fa4f pyverilator @ git+https://github.com/maltanar/pyverilator.git@766e457465f5c0dd315490d7b9cc5d74f9a76f4f brevitas @ git+https://github.com/Xilinx/brevitas.git@84f42259ec869eb151af4cb8a8b23ad925f493db finn-experimental @ git+https://github.com/Xilinx/finn-experimental.git@de99347e936d51715f5356a1b6c64e37b91c23c2 From b066882f6a912c6eb9cd19f4c6487a7d254dd7db Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 19 Jul 2024 14:39:47 +0100 Subject: [PATCH 04/49] [RTL Thresh] Enable workaround for unsigned narrow quantization --- .../fpgadataflow/rtl/thresholding_rtl.py | 31 +++++++++++++++---- .../test_fpgadataflow_thresholding.py | 8 ++--- .../test_fpgadataflow_thresholding_runtime.py | 26 ++++++++++------ 3 files changed, 45 insertions(+), 20 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py index c31f90af0b..230d2879f5 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py @@ -186,9 +186,19 @@ def prepare_codegen_rtl_values(self, model): n_thres_steps = self.get_nodeattr("numSteps") wdt = self.get_weight_datatype() if expected_thresholds != n_thres_steps: - min_val = wdt.min() - thresholds = np.insert(thresholds, 0, min_val, axis=1) - bias = bias - 1 + if DataType[output_data_type].signed(): + min_val = wdt.min() + thresholds = np.insert(thresholds, 0, min_val, axis=1) + bias = bias - 1 + # TODO: temporary fix for unsigned narrow quantization + else: + max_val = wdt.max() + if max_val > DataType[input_data_type].max(): + thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1) + else: + max_val = max_val + 1 + wdt = DataType.get_smallest_possible(max_val) + thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1) n_thres_steps += 1 # add dummy dimension as final dimension (that's what gets packed with next call) @@ -528,8 +538,18 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name): n_thres_steps = self.get_nodeattr("numSteps") wdt = self.get_weight_datatype() if expected_thresholds != n_thres_steps: - min_val = wdt.min() - thresholds = np.insert(thresholds, 0, min_val, axis=1) + if DataType[output_data_type].signed(): + min_val = wdt.min() + thresholds = np.insert(thresholds, 0, min_val, axis=1) + # TODO: temporary fix for unsigned narrow quantization + else: + max_val = wdt.max() + if max_val > self.get_input_datatype().max(): + thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1) + else: + max_val = max_val + 1 + wdt = DataType.get_smallest_possible(max_val) + thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1) n_thres_steps += 1 # If a single threshold value is found, broadcast the value @@ -541,7 +561,6 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name): thresh_padded = np.zeros((thresholds.shape[0], width_padded)) thresh_padded[: thresholds.shape[0], :n_thres_steps] = thresholds thresh_stream = [] - wdt = self.get_weight_datatype() bw_hexdigit = roundup_to_integer_multiple(wdt.bitwidth(), 32) padding = np.zeros(width_padded, dtype=np.int32) diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding.py b/tests/fpgadataflow/test_fpgadataflow_thresholding.py index e4dd49fc7f..fe7ba3d9fb 100644 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding.py @@ -129,14 +129,14 @@ def make_single_multithresholding_modelwrapper( [1, 2, 2], ], ) -@pytest.mark.parametrize("activation", [DataType["INT4"], DataType["BIPOLAR"]]) +@pytest.mark.parametrize("activation", [DataType["UINT4"], DataType["INT4"], DataType["BIPOLAR"]]) @pytest.mark.parametrize( "idt_tdt_cfg", [ (DataType["INT8"], DataType["INT8"]), (DataType["INT8"], DataType["INT9"]), - (DataType["UINT8"], DataType["UINT8"]), - (DataType["UINT8"], DataType["UINT9"]), + (DataType["UINT5"], DataType["UINT5"]), + (DataType["UINT5"], DataType["UINT6"]), ], ) @pytest.mark.parametrize("fold", [-1, 1, 2]) @@ -184,7 +184,7 @@ def test_fpgadataflow_thresholding( activation_bias = 0 else: activation_bias = activation.min() - if narrow: + if narrow and activation.signed(): activation_bias += 1 # Generate random thresholds and sort in ascending order diff --git a/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py b/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py index 1ad695bb94..e6175ac58b 100644 --- a/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py +++ b/tests/fpgadataflow/test_fpgadataflow_thresholding_runtime.py @@ -122,13 +122,16 @@ def make_single_thresholding_modelwrapper(impl_style, T, idt, odt, actval, n_inp @pytest.mark.parametrize("impl_style", ["rtl", "hls"]) +@pytest.mark.parametrize( + "idt_act_cfg", [(DataType["INT16"], DataType["INT4"]), (DataType["UINT8"], DataType["UINT4"])] +) # configuration (ch, pe) -@pytest.mark.parametrize("cfg", [(1, 1), (6, 2), (6, 3)]) +@pytest.mark.parametrize("cfg", [(1, 1), (6, 2), (6, 6)]) @pytest.mark.parametrize("narrow", [True, False]) @pytest.mark.parametrize("per_tensor", [True, False]) @pytest.mark.fpgadataflow @pytest.mark.vivado -def test_runtime_thresholds_read(impl_style, cfg, narrow, per_tensor): +def test_runtime_thresholds_read(impl_style, idt_act_cfg, cfg, narrow, per_tensor): """Read back threshold weights during runtime 1. Create random initial weights T @@ -140,8 +143,8 @@ def test_runtime_thresholds_read(impl_style, cfg, narrow, per_tensor): pe = cfg[1] n_inp_vecs = [1, 2, 2] hls_mem_mode = "internal_decoupled" - act = DataType["INT4"] - idt = DataType["INT16"] + act = idt_act_cfg[1] + idt = idt_act_cfg[0] odt = act n_steps = act.get_num_possible_values() - 1 # Generate random thresholds and sort in ascending order @@ -151,7 +154,7 @@ def test_runtime_thresholds_read(impl_style, cfg, narrow, per_tensor): T = sort_thresholds_increasing(T) actval = act.min() - if narrow: + if narrow and act.signed(): actval += 1 model = make_single_thresholding_modelwrapper(impl_style, T, idt, odt, actval, n_inp_vecs, ch) @@ -219,13 +222,16 @@ def read_weights(sim): @pytest.mark.parametrize("impl_style", ["rtl", "hls"]) +@pytest.mark.parametrize( + "idt_act_cfg", [(DataType["INT16"], DataType["INT4"]), (DataType["UINT8"], DataType["UINT4"])] +) # configuration (ch, pe) -@pytest.mark.parametrize("cfg", [(1, 1), (6, 2), (6, 3)]) +@pytest.mark.parametrize("cfg", [(1, 1), (6, 2), (6, 6)]) @pytest.mark.parametrize("narrow", [True, False]) @pytest.mark.parametrize("per_tensor", [True, False]) @pytest.mark.fpgadataflow @pytest.mark.vivado -def test_runtime_thresholds_write(impl_style, cfg, narrow, per_tensor): +def test_runtime_thresholds_write(impl_style, idt_act_cfg, cfg, narrow, per_tensor): """Write threshold weights during runtime 1. Create random initial weights T_init @@ -241,8 +247,8 @@ def test_runtime_thresholds_write(impl_style, cfg, narrow, per_tensor): n_inp_vecs = [1, 2, 2] hls_mem_mode = "internal_decoupled" - act = DataType["INT4"] - idt = DataType["INT16"] + act = idt_act_cfg[1] + idt = idt_act_cfg[0] odt = act n_steps = act.get_num_possible_values() - 1 @@ -253,7 +259,7 @@ def test_runtime_thresholds_write(impl_style, cfg, narrow, per_tensor): T_init = sort_thresholds_increasing(T_init) actval = act.min() - if narrow: + if narrow and act.signed(): actval += 1 model = make_single_thresholding_modelwrapper( From 9d95b1b3c34bfabcf4160e4a39f7cc9bc26a363e Mon Sep 17 00:00:00 2001 From: auphelia Date: Mon, 22 Jul 2024 11:30:31 +0100 Subject: [PATCH 05/49] [RTL thresh] Fix datatype extension for unsigned narrow quantization --- .../custom_op/fpgadataflow/rtl/thresholding_rtl.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py b/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py index 230d2879f5..d1e9387b1b 100644 --- a/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py +++ b/src/finn/custom_op/fpgadataflow/rtl/thresholding_rtl.py @@ -197,7 +197,11 @@ def prepare_codegen_rtl_values(self, model): thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1) else: max_val = max_val + 1 - wdt = DataType.get_smallest_possible(max_val) + # increase wdt + if not wdt.signed(): + wdt = DataType.get_smallest_possible(max_val) + else: + wdt = DataType.get_smallest_possible(-max_val - 1) thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1) n_thres_steps += 1 @@ -548,7 +552,11 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name): thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1) else: max_val = max_val + 1 - wdt = DataType.get_smallest_possible(max_val) + # increase wdt + if not wdt.signed(): + wdt = DataType.get_smallest_possible(max_val) + else: + wdt = DataType.get_smallest_possible(-max_val - 1) thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1) n_thres_steps += 1 From ec120d54f86d27546301b552cc780d87b859c6e7 Mon Sep 17 00:00:00 2001 From: Hannah Yan Date: Mon, 22 Jul 2024 17:48:32 +0100 Subject: [PATCH 06/49] Updated run-docker.sh to include values needed for verification --- run-docker.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/run-docker.sh b/run-docker.sh index 88fabff2fa..9dd6796782 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -265,6 +265,30 @@ if [ ! -z "$FINN_XILINX_PATH" ];then DOCKER_EXEC+="-e ALVEO_TARGET_DIR=$ALVEO_TARGET_DIR " fi fi + +# If using build verification, set up the necessary Docker variables +if [ "$VERIFICATION_EN" = 1 ]; then + if [ -z "$FINN_EXAMPLES_ROOT" ]; then + recho "FINN_EXAMPLES_ROOT path has not been set." + recho "Please set FINN_EXAMPLES_ROOT path to enable verification." + exit -1 + elif [ ! -d "${FINN_EXAMPLES_ROOT}/ci" ]; then + recho "ci folder not found in ${FINN_EXAMPLES_ROOT}." + recho "Please ensure the FINN-examples repo has been set up correctly, and FINN_EXAMPLES_ROOT path is set correctly, to enable verification." + exit -1 + elif [ -z "$VERIFICATION_IO" ]; then + recho "VERIFICATION_IO paths has not been set." + recho "Please ensure the path to the input and expected output files has been set correctly to eneable verification." + exit -1 + else + DOCKER_EXEC+="-e VERIFICATION_EN=$VERIFICATION_EN " + DOCKER_EXEC+="-e FINN_EXAMPLES_ROOT=$FINN_EXAMPLES_ROOT " + DOCKER_EXEC+="-e VERIFICATION_IO=$VERIFICATION_IO " + FINN_DOCKER_EXTRA+="-v $FINN_EXAMPLES_ROOT/ci:$FINN_EXAMPLES_ROOT/ci " + FINN_DOCKER_EXTRA+="-v $VERIFICATION_IO:$VERIFICATION_IO " + fi +fi + DOCKER_EXEC+="$FINN_DOCKER_EXTRA " if [ -z "$FINN_SINGULARITY" ];then From 6a4406d2d14da298648f0733ec0f744918b98806 Mon Sep 17 00:00:00 2001 From: auphelia Date: Tue, 23 Jul 2024 14:06:19 +0100 Subject: [PATCH 07/49] [Docker] Add additional comment to clarify that additions to bash script are for internal ci --- run-docker.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/run-docker.sh b/run-docker.sh index 9dd6796782..0b45638bda 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -266,7 +266,8 @@ if [ ! -z "$FINN_XILINX_PATH" ];then fi fi -# If using build verification, set up the necessary Docker variables +# This part is used for internal ci for finn-examples +# if using build verification for finn-examples ci, set up the necessary Docker variables if [ "$VERIFICATION_EN" = 1 ]; then if [ -z "$FINN_EXAMPLES_ROOT" ]; then recho "FINN_EXAMPLES_ROOT path has not been set." @@ -289,6 +290,7 @@ if [ "$VERIFICATION_EN" = 1 ]; then fi fi + DOCKER_EXEC+="$FINN_DOCKER_EXTRA " if [ -z "$FINN_SINGULARITY" ];then From 65a356a08f73cb42d750106ffa84fcc0c401826a Mon Sep 17 00:00:00 2001 From: Hannah Yan Date: Thu, 25 Jul 2024 10:13:04 +0100 Subject: [PATCH 08/49] Updated run-docker.sh to check VERIFICATION_IO path --- run-docker.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/run-docker.sh b/run-docker.sh index 0b45638bda..b1fe44eb0c 100755 --- a/run-docker.sh +++ b/run-docker.sh @@ -281,6 +281,10 @@ if [ "$VERIFICATION_EN" = 1 ]; then recho "VERIFICATION_IO paths has not been set." recho "Please ensure the path to the input and expected output files has been set correctly to eneable verification." exit -1 + elif [ ! -d "$VERIFICATION_IO" ]; then + recho "${VERIFICATION_IO} is not a directory." + recho "Please ensure the VERIFICATION_IO path has been set to the directory containing the input and expected output files for verification." + exit -1 else DOCKER_EXEC+="-e VERIFICATION_EN=$VERIFICATION_EN " DOCKER_EXEC+="-e FINN_EXAMPLES_ROOT=$FINN_EXAMPLES_ROOT " From 43dad3bf55b1a7da7e1d246b6c969e1d80f46480 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Fri, 26 Jul 2024 23:58:52 +0100 Subject: [PATCH 09/49] setup: upgrade onnxruntime --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 6168bec5e3..511ce451dd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -76,7 +76,7 @@ install_requires = numpy==1.24.1 onnx==1.13.0 onnxoptimizer - onnxruntime==1.15.0 + onnxruntime==1.16.1 pre-commit==3.3.2 protobuf==3.20.3 psutil==5.9.4 From 0256f043a527f721aed3464d21c0ef8f708715e7 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Fri, 26 Jul 2024 23:56:46 +0100 Subject: [PATCH 10/49] softmax: add initial test harness --- .../fpgadataflow/test_fpgadataflow_softmax.py | 165 ++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 tests/fpgadataflow/test_fpgadataflow_softmax.py diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py new file mode 100644 index 0000000000..6eb424f441 --- /dev/null +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -0,0 +1,165 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest +import torch +import onnx +from onnx import helper, numpy_helper +import numpy as np +import os +from brevitas.export import export_qonnx +from qonnx.util.cleanup import cleanup as qonnx_cleanup +from onnx import TensorProto, helper +from qonnx.core.datatype import DataType +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.custom_op.registry import getCustomOp +from qonnx.transformation.general import GiveUniqueNodeNames +from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model +from qonnx.transformation.infer_datatypes import InferDataTypes +from qonnx.transformation.fold_constants import FoldConstants +from finn.transformation.streamline.absorb import ( + AbsorbAddIntoMultiThreshold, + AbsorbMulIntoMultiThreshold, + FactorOutMulSignMagnitude, + Absorb1BitMulIntoConv, +) +import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw +from brevitas.quant.scaled_int import Int8ActPerTensorFloat, Int8WeightPerTensorFloat +import finn.core.onnx_exec as oxe +from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN +from finn.util.basic import pynq_part_map +from finn.transformation.streamline.reorder import ( + MakeMaxPoolNHWC, + MoveScalarLinearPastInvariants, +) +from finn.transformation.streamline import Streamline +import finn.transformation.streamline.absorb as absorb +import onnx +from onnx import helper +import onnxruntime +import torch +import torch.nn as nn +import brevitas.nn as qnn +test_fpga_part = "xczu3eg-sbva484-1-e" +target_clk_ns = 5 +export_onnx_path = "softmax_dut.onnx" + +### Make model wrapper +# 1. make node, + + +### Test +## 1. Compiler integration +# 1. check all transforms can be applied to a model with a softmax layer +# 2. Check that IP stitching produces valid HLS package + +## 2. Functionality test +# 1. Check that we can run cpp/rtl sims +# 2. check values are correct + +def create_model(): + ''' + Create a quantized softmax model. + Input and output are quantized to Int8ActPerTensorFloat, this is to make sure + that the softmax layer is followed by a Quant node. + ''' + io_shape = (1, 64) + class QuantSoftMaxSimple(nn.Module): + def __init__(self): + super(QuantSoftMaxSimple, self).__init__() + # self.input_identity = qnn.QuantIdentity(act_quant=Int8ActPerTensorFloat) + self.output_identity = qnn.QuantIdentity() + self.softmax = nn.Softmax(dim=1) + + def forward(self, x): + # x = self.input_identity(x) + x = self.softmax(x) + x = self.output_identity(x) + return x + + dut = QuantSoftMaxSimple() + input = torch.randn(io_shape) + export_qonnx(dut, input, export_onnx_path, opset_version=11) + qonnx_cleanup(export_onnx_path, out_file=export_onnx_path) + # set the model input to INT8 + model = ModelWrapper(export_onnx_path) + model.set_tensor_datatype(model.graph.input[0].name, DataType["UINT8"]) + # import pdb; pdb.set_trace() + return model + +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.fpgadataflow +def test_convert_to_hw_softmax_layer(exec_mode): + ''' + Test that all transofrmations can be applied to a model with a softmax layer. + ''' + # Create the qonnx model + # modelproto = create_softmax_graph() + + model = create_model() + try: + model = model.transform(ConvertQONNXtoFINN()) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model.save("qonnx_softmax_dut.onnx") + model = model.transform(to_hw.InferQuantSoftmax()) + model = model.transform(SpecializeLayers(test_fpga_part)) + if exec_mode == "cppsim": + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + elif exec_mode == "rtlsim": + model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(HLSSynthIP()) + model = model.transform(PrepareRTLSim()) + except Exception as e: + pytest.fail(f"Failed to transform the model: {str(e)}") + +def test_fpgadataflow_quantsoftmax(): + # Create the qonnx model + # create_model() + model = create_model() + try: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(FoldConstants()) + model = model.transform(to_hw.InferQuantSoftmax()) + model = model.transform(SpecializeLayers(test_fpga_part)) + + except Exception as e: + pytest.fail(f"Failed to transform the model: {str(e)}") \ No newline at end of file From 58da0f67ef1e8b5cbc14962270bc8f67a7b171e7 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Fri, 26 Jul 2024 23:57:57 +0100 Subject: [PATCH 11/49] softmax: stub class an infersoftmax --- src/finn/custom_op/fpgadataflow/__init__.py | 3 ++ .../custom_op/fpgadataflow/quantsoftmax.py | 22 +++++++++ .../fpgadataflow/convert_to_hw_layers.py | 47 +++++++++++++++++++ 3 files changed, 72 insertions(+) create mode 100644 src/finn/custom_op/fpgadataflow/quantsoftmax.py diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index aed2ab7fe1..9bcbb1e860 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -54,6 +54,8 @@ from finn.custom_op.fpgadataflow.thresholding import Thresholding from finn.custom_op.fpgadataflow.upsampler import UpsampleNearestNeighbour from finn.custom_op.fpgadataflow.vectorvectoractivation import VVAU +from finn.custom_op.fpgadataflow.quantsoftmax import QuantSoftmax + custom_op = dict() @@ -81,3 +83,4 @@ custom_op["StreamingEltwise"] = StreamingEltwise custom_op["StreamingMaxPool"] = StreamingMaxPool custom_op["UpsampleNearestNeighbour"] = UpsampleNearestNeighbour +custom_op["QuantSoftmax"] = QuantSoftmax diff --git a/src/finn/custom_op/fpgadataflow/quantsoftmax.py b/src/finn/custom_op/fpgadataflow/quantsoftmax.py new file mode 100644 index 0000000000..16f54cc2af --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/quantsoftmax.py @@ -0,0 +1,22 @@ + +from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp +from finn.util.data_packing import numpy_to_hls_code, pack_innermost_dim_as_hex_string + +class QuantSoftmax(HWCustomOp): + """Abstraction layer for HW implementation of VectorVectorActivation layers.""" + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = { + "simd": ("i", False, 1), + "channels": ("i", True, 0), + # FINN DataTypes for inputs, weights, outputs + "data_type": ("s", True, ""), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_number_output_values(self): + raise NotImplementedError("This function is not yet implemented.") \ No newline at end of file diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index e14181b140..c93bf48393 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -1697,3 +1697,50 @@ def apply(self, model): model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) return (model, graph_modified) + + +class InferQuantSoftmax(Transformation): + ''' + Find softmax layers that are followed by a MultiThreshold layer and replace them with QuantizedSoftmax + ''' + def __init__(self): + super().__init__() + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for n in graph.node: + node_ind += 1 + # check that an optype of Softmax is present followed by a MultiThreshold + consumer = model.find_consumer(n.output[0]) + if consumer is not None and consumer.op_type == "MultiThreshold": + print("Found Softmax followed by MultiThreshold") + # get the shape of the input tensor + input_shape = model.get_tensor_shape(n.input[0]) + # get the shape of the output tensor + output_shape = model.get_tensor_shape(n.output[0]) + idt0 = model.get_tensor_datatype(n.input[0]) + num_channels = int(input_shape[-1]) + # create node with no parallelization first + simd = 1 + # create and insert new node + new_node = helper.make_node( + "QuantSoftmax", + [n.input[0]], # input tensor(s) + [n.output[0]], # output tensor(s) + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + channels=num_channels, + data_type = idt0.name, + name=n.name, + simd=simd + ) + graph.node.insert(node_ind, new_node) + graph.node.remove(n) + graph_modified = True + + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) From bbbdc4aefc6c60e1ea446573d8117e9b7ebf1f23 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Mon, 29 Jul 2024 14:02:18 +0100 Subject: [PATCH 12/49] softmax: stub abstract methods --- .../custom_op/fpgadataflow/quantsoftmax.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/quantsoftmax.py b/src/finn/custom_op/fpgadataflow/quantsoftmax.py index 16f54cc2af..e6f258bde6 100644 --- a/src/finn/custom_op/fpgadataflow/quantsoftmax.py +++ b/src/finn/custom_op/fpgadataflow/quantsoftmax.py @@ -19,4 +19,22 @@ def get_nodeattr_types(self): return my_attrs def get_number_output_values(self): - raise NotImplementedError("This function is not yet implemented.") \ No newline at end of file + raise NotImplementedError("This function is not yet implemented.") + + def execute_node(self, context, graph): + raise NotImplementedError + + def get_number_output_values(self): + raise NotImplementedError + + def get_nodeattr_types(self): + raise NotImplementedError + + def make_shape_compatible_op(self, model): + raise NotImplementedError + + def infer_node_datatype(self, model): + raise NotImplementedError + + def verify_node(self): + raise NotImplementedError \ No newline at end of file From cbda331ed543e5b6dc8a48579b037776b561f9d1 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Mon, 29 Jul 2024 14:02:47 +0100 Subject: [PATCH 13/49] softmax: input image dimension attribute --- .../transformation/fpgadataflow/convert_to_hw_layers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index c93bf48393..52999c4c1a 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -1716,9 +1716,10 @@ def apply(self, model): consumer = model.find_consumer(n.output[0]) if consumer is not None and consumer.op_type == "MultiThreshold": print("Found Softmax followed by MultiThreshold") - # get the shape of the input tensor + # get the shape of the input/output tensor input_shape = model.get_tensor_shape(n.input[0]) - # get the shape of the output tensor + dim_h = int(input_shape[1]) + dim_w = int(input_shape[2]) output_shape = model.get_tensor_shape(n.output[0]) idt0 = model.get_tensor_datatype(n.input[0]) num_channels = int(input_shape[-1]) @@ -1731,6 +1732,7 @@ def apply(self, model): [n.output[0]], # output tensor(s) domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", + img_dim=[dim_h, dim_w], channels=num_channels, data_type = idt0.name, name=n.name, From 48738f63cefce35074952a247bd28dc9bbea744d Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Mon, 29 Jul 2024 14:03:07 +0100 Subject: [PATCH 14/49] softmax: use input img dimension to build input shape --- src/finn/custom_op/fpgadataflow/quantsoftmax.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/quantsoftmax.py b/src/finn/custom_op/fpgadataflow/quantsoftmax.py index e6f258bde6..4654feb6cc 100644 --- a/src/finn/custom_op/fpgadataflow/quantsoftmax.py +++ b/src/finn/custom_op/fpgadataflow/quantsoftmax.py @@ -10,6 +10,7 @@ def __init__(self, onnx_node, **kwargs): def get_nodeattr_types(self): my_attrs = { + "img_dim": ("i", True, 0), "simd": ("i", False, 1), "channels": ("i", True, 0), # FINN DataTypes for inputs, weights, outputs @@ -18,6 +19,15 @@ def get_nodeattr_types(self): my_attrs.update(super().get_nodeattr_types()) return my_attrs + def get_normal_input_shape(self, ind=0): + idim_h, idim_w = self.get_nodeattr("img_dim") + num_ch = self.get_nodeattr("channels") + ishape = (1, idim_h, idim_w, num_ch) + return ishape + + def get_normal_output_shape(self, ind=0): + return self.get_normal_input_shape() + def get_number_output_values(self): raise NotImplementedError("This function is not yet implemented.") From ec33c6aa6b89f2e886af1e9487f9d5eefb2c02e0 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Mon, 29 Jul 2024 16:18:29 +0100 Subject: [PATCH 15/49] softmax: softmax on inner dim --- tests/fpgadataflow/test_fpgadataflow_softmax.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py index 6eb424f441..e1242b7283 100644 --- a/tests/fpgadataflow/test_fpgadataflow_softmax.py +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -76,7 +76,7 @@ import brevitas.nn as qnn test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 -export_onnx_path = "softmax_dut.onnx" +export_onnx_path = "softmax_dut_qonnx.onnx" ### Make model wrapper # 1. make node, @@ -97,13 +97,13 @@ def create_model(): Input and output are quantized to Int8ActPerTensorFloat, this is to make sure that the softmax layer is followed by a Quant node. ''' - io_shape = (1, 64) + io_shape = (1, 8, 8, 2) class QuantSoftMaxSimple(nn.Module): def __init__(self): super(QuantSoftMaxSimple, self).__init__() # self.input_identity = qnn.QuantIdentity(act_quant=Int8ActPerTensorFloat) self.output_identity = qnn.QuantIdentity() - self.softmax = nn.Softmax(dim=1) + self.softmax = nn.Softmax(dim=3) # softmax along the last dimension def forward(self, x): # x = self.input_identity(x) From 507b7981731601cb48536fcd1af51c3177a02b5e Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Mon, 29 Jul 2024 16:19:16 +0100 Subject: [PATCH 16/49] softmax: infer softmax helper implementation --- .../custom_op/fpgadataflow/quantsoftmax.py | 43 ++++++++++++++----- .../fpgadataflow/convert_to_hw_layers.py | 16 ++++--- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/quantsoftmax.py b/src/finn/custom_op/fpgadataflow/quantsoftmax.py index 4654feb6cc..c37f791270 100644 --- a/src/finn/custom_op/fpgadataflow/quantsoftmax.py +++ b/src/finn/custom_op/fpgadataflow/quantsoftmax.py @@ -1,6 +1,9 @@ from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp from finn.util.data_packing import numpy_to_hls_code, pack_innermost_dim_as_hex_string +from onnx.helper import make_node +import warnings +from qonnx.core.datatype import DataType class QuantSoftmax(HWCustomOp): """Abstraction layer for HW implementation of VectorVectorActivation layers.""" @@ -10,7 +13,7 @@ def __init__(self, onnx_node, **kwargs): def get_nodeattr_types(self): my_attrs = { - "img_dim": ("i", True, 0), + "ifm_dim": ("ints", True, []), "simd": ("i", False, 1), "channels": ("i", True, 0), # FINN DataTypes for inputs, weights, outputs @@ -20,10 +23,9 @@ def get_nodeattr_types(self): return my_attrs def get_normal_input_shape(self, ind=0): - idim_h, idim_w = self.get_nodeattr("img_dim") - num_ch = self.get_nodeattr("channels") - ishape = (1, idim_h, idim_w, num_ch) - return ishape + h, w = self.get_nodeattr("ifm_dim") + c = self.get_nodeattr("channels") + return (1, h, w, c) def get_normal_output_shape(self, ind=0): return self.get_normal_input_shape() @@ -37,14 +39,35 @@ def execute_node(self, context, graph): def get_number_output_values(self): raise NotImplementedError - def get_nodeattr_types(self): - raise NotImplementedError - def make_shape_compatible_op(self, model): - raise NotImplementedError + def get_input_datatype(self, ind=0): + """Returns FINN DataType of input.""" + data_type = DataType[self.get_nodeattr("data_type")] + # the hlslib op always pads with zeros, so ensure that the DataType + # is able to represent zeros + assert data_type.allowed(0), "DataType must support zero" + return data_type + def make_shape_compatible_op(self, model): + shape = self.get_normal_input_shape() + # create an ONNX Softmax node with the same shape as this one + return make_node("Softmax", + inputs=[self.onnx_node.input[0]], + outputs=[self.onnx_node.output[0]], + shape=list(shape) + ) def infer_node_datatype(self, model): - raise NotImplementedError + node = self.onnx_node + idt = model.get_tensor_datatype(node.input[0]) + if idt != self.get_input_datatype(): + warn_str = "data_type changing for %s: %s -> %s " % ( + node.name, + str(self.get_input_datatype()), + str(idt), + ) + warnings.warn(warn_str) + self.set_nodeattr("data_type", idt.name) + model.set_tensor_datatype(node.output[0], idt) def verify_node(self): raise NotImplementedError \ No newline at end of file diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index 52999c4c1a..2966bf1cc0 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -1714,15 +1714,17 @@ def apply(self, model): node_ind += 1 # check that an optype of Softmax is present followed by a MultiThreshold consumer = model.find_consumer(n.output[0]) - if consumer is not None and consumer.op_type == "MultiThreshold": + if n.op_type == "Softmax" and consumer is not None and consumer.op_type == "MultiThreshold": print("Found Softmax followed by MultiThreshold") # get the shape of the input/output tensor input_shape = model.get_tensor_shape(n.input[0]) - dim_h = int(input_shape[1]) - dim_w = int(input_shape[2]) - output_shape = model.get_tensor_shape(n.output[0]) + assert input_shape == model.get_tensor_shape(consumer.input[0]), ( + "Softmax and MultiThreshold input shapes do not match" + ) + h = int(input_shape[1]) + w = int(input_shape[2]) + c = int(input_shape[3]) idt0 = model.get_tensor_datatype(n.input[0]) - num_channels = int(input_shape[-1]) # create node with no parallelization first simd = 1 # create and insert new node @@ -1732,8 +1734,8 @@ def apply(self, model): [n.output[0]], # output tensor(s) domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - img_dim=[dim_h, dim_w], - channels=num_channels, + ifm_dim=[h, w], + channels=c, data_type = idt0.name, name=n.name, simd=simd From 7727a3f91cc241d7a7cc85019a82a65190ce024c Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Mon, 29 Jul 2024 16:26:59 +0100 Subject: [PATCH 17/49] softmax: hls class stub --- .../custom_op/fpgadataflow/hls/__init__.py | 2 + .../fpgadataflow/hls/quantsoftmax_hls.py | 53 +++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py diff --git a/src/finn/custom_op/fpgadataflow/hls/__init__.py b/src/finn/custom_op/fpgadataflow/hls/__init__.py index 405c47a08d..8f5a0a7cc7 100644 --- a/src/finn/custom_op/fpgadataflow/hls/__init__.py +++ b/src/finn/custom_op/fpgadataflow/hls/__init__.py @@ -52,6 +52,7 @@ from finn.custom_op.fpgadataflow.hls.tlastmarker_hls import TLastMarker_hls from finn.custom_op.fpgadataflow.hls.upsampler_hls import UpsampleNearestNeighbour_hls from finn.custom_op.fpgadataflow.hls.vectorvectoractivation_hls import VVAU_hls +from finn.custom_op.fpgadataflow.hls.quantsoftmax_hls import QuantSoftmax_hls custom_op = dict() @@ -79,3 +80,4 @@ custom_op["UpsampleNearestNeighbour_hls"] = UpsampleNearestNeighbour_hls custom_op["MVAU_hls"] = MVAU_hls custom_op["VVAU_hls"] = VVAU_hls +custom_op["QuantSoftmax_hls"] = QuantSoftmax_hls diff --git a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py new file mode 100644 index 0000000000..804fe35ab9 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py @@ -0,0 +1,53 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from finn.custom_op.fpgadataflow.quantsoftmax import QuantSoftmax +from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend + +class QuantSoftmax_hls(QuantSoftmax, HLSBackend): + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = {} + my_attrs.update(QuantSoftmax.get_nodeattr_types(self)) + my_attrs.update(HLSBackend.get_nodeattr_types(self)) + return my_attrs + + def global_includes(self): + # not implemented + raise NotImplementedError + + def defines(self, var): + raise NotImplementedError + + def docompute(self): + raise NotImplementedError + + def blackboxfunction(self): + raise NotImplementedError \ No newline at end of file From 26899d2649be953faf7a6a4a5e273e9db00c2745 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Tue, 30 Jul 2024 08:56:47 +0100 Subject: [PATCH 18/49] softmax: extend test to apply folding config --- .../fpgadataflow/test_fpgadataflow_softmax.py | 38 ++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py index e1242b7283..06e091fc9d 100644 --- a/tests/fpgadataflow/test_fpgadataflow_softmax.py +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -32,13 +32,13 @@ from onnx import helper, numpy_helper import numpy as np import os +import finn.core.onnx_exec as oxe from brevitas.export import export_qonnx from qonnx.util.cleanup import cleanup as qonnx_cleanup from onnx import TensorProto, helper from qonnx.core.datatype import DataType from qonnx.core.modelwrapper import ModelWrapper from qonnx.custom_op.registry import getCustomOp -from qonnx.transformation.general import GiveUniqueNodeNames from qonnx.transformation.infer_shapes import InferShapes from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model from qonnx.transformation.infer_datatypes import InferDataTypes @@ -66,6 +66,13 @@ MakeMaxPoolNHWC, MoveScalarLinearPastInvariants, ) +from qonnx.transformation.general import ( + ApplyConfig, + GiveReadableTensorNames, + GiveUniqueNodeNames, + RemoveStaticGraphInputs, + RemoveUnusedTensors, +) from finn.transformation.streamline import Streamline import finn.transformation.streamline.absorb as absorb import onnx @@ -79,7 +86,7 @@ export_onnx_path = "softmax_dut_qonnx.onnx" ### Make model wrapper -# 1. make node, +# 1. make node, ### Test @@ -91,13 +98,14 @@ # 1. Check that we can run cpp/rtl sims # 2. check values are correct + def create_model(): ''' Create a quantized softmax model. - Input and output are quantized to Int8ActPerTensorFloat, this is to make sure + Input and output are quantized to Int8ActPerTensorFloat, this is to make sure that the softmax layer is followed by a Quant node. ''' - io_shape = (1, 8, 8, 2) + io_shape = (1, 12, 128, 128) class QuantSoftMaxSimple(nn.Module): def __init__(self): super(QuantSoftMaxSimple, self).__init__() @@ -122,34 +130,46 @@ def forward(self, x): return model @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.parametrize("simd", ["simd1", "simd2", "simd3", "simd4"]) @pytest.mark.fpgadataflow -def test_convert_to_hw_softmax_layer(exec_mode): +def test_convert_to_hw_softmax_layer(exec_mode, simd): ''' Test that all transofrmations can be applied to a model with a softmax layer. ''' # Create the qonnx model # modelproto = create_softmax_graph() - + model = create_model() + simd = int(simd[-1]) + folding_config = { + "Defaults": {}, + "QuantSoftmax_0": { + "simd": simd + } + } try: model = model.transform(ConvertQONNXtoFINN()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) - model.save("qonnx_softmax_dut.onnx") model = model.transform(to_hw.InferQuantSoftmax()) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(ApplyConfig(folding_config)) model = model.transform(SpecializeLayers(test_fpga_part)) if exec_mode == "cppsim": model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": - model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(SetExecMode("rtlsim")) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) except Exception as e: pytest.fail(f"Failed to transform the model: {str(e)}") + # oxe.execute_onnx() + def test_fpgadataflow_quantsoftmax(): # Create the qonnx model # create_model() @@ -160,6 +180,6 @@ def test_fpgadataflow_quantsoftmax(): model = model.transform(FoldConstants()) model = model.transform(to_hw.InferQuantSoftmax()) model = model.transform(SpecializeLayers(test_fpga_part)) - + except Exception as e: pytest.fail(f"Failed to transform the model: {str(e)}") \ No newline at end of file From f917bfc33400f3961b958af538d0da29147be581 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Tue, 30 Jul 2024 08:57:16 +0100 Subject: [PATCH 19/49] softmax: add Quant to node name --- src/finn/transformation/fpgadataflow/convert_to_hw_layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index 2966bf1cc0..aef5f6a64c 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -1737,7 +1737,7 @@ def apply(self, model): ifm_dim=[h, w], channels=c, data_type = idt0.name, - name=n.name, + name="Quant"+n.name, simd=simd ) graph.node.insert(node_ind, new_node) From 74c338ccd8bc1d104bb8eaf31493918118fe947e Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Tue, 30 Jul 2024 08:57:58 +0100 Subject: [PATCH 20/49] softmax: generate hls code --- .../fpgadataflow/hls/quantsoftmax_hls.py | 61 ++++++++++++++++--- .../custom_op/fpgadataflow/quantsoftmax.py | 25 +++++++- 2 files changed, 75 insertions(+), 11 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py index 804fe35ab9..b4ada72a32 100644 --- a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py @@ -32,22 +32,63 @@ class QuantSoftmax_hls(QuantSoftmax, HLSBackend): def __init__(self, onnx_node, **kwargs): super().__init__(onnx_node, **kwargs) - + def get_nodeattr_types(self): my_attrs = {} my_attrs.update(QuantSoftmax.get_nodeattr_types(self)) my_attrs.update(HLSBackend.get_nodeattr_types(self)) return my_attrs - + def global_includes(self): - # not implemented - raise NotImplementedError - + self.code_gen_dict["$GLOBALS$"] = [ + '#include "softmax.hpp"', + '#include "utils.hpp"' + ] + def defines(self, var): - raise NotImplementedError - + simd = self.get_nodeattr("simd") + ibits = self.get_input_datatype().bitwidth() + channels = self.get_nodeattr("channels") + self.code_gen_dict["$DEFINES$"] = [ + f""" + constexpr unsigned SIMD = {simd}; + constexpr unsigned W = {channels}; + using T = ap_uint<{ibits}>; + using F = float; + """ + ] + def docompute(self): - raise NotImplementedError - + self.code_gen_dict["$DOCOMPUTE$"] = [ + f''' + static hls::stream> src0; + static hls::stream> dst0; + + move(src, src0); + smaxquant(src0, dst0); + move(dst0, dst); + ''' + ] + def blackboxfunction(self): - raise NotImplementedError \ No newline at end of file + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ + f''' + void {self.onnx_node.name}( + hls::stream> &src, + hls::stream> &dst + ) + ''' + ] + + def pragmas(self): + self.code_gen_dict["$PRAGMAS$"] = [ + f''' + #pragma HLS interface AXIS port=src + #pragma HLS interface AXIS port=dst + #pragma HLS aggregate variable=src compact=bit + #pragma HLS aggregate variable=dst compact=bit + + #pragma HLS interface ap_ctrl_none port=return + #pragma HLS dataflow disable_start_propagation + ''' + ] \ No newline at end of file diff --git a/src/finn/custom_op/fpgadataflow/quantsoftmax.py b/src/finn/custom_op/fpgadataflow/quantsoftmax.py index c37f791270..cac2c1a327 100644 --- a/src/finn/custom_op/fpgadataflow/quantsoftmax.py +++ b/src/finn/custom_op/fpgadataflow/quantsoftmax.py @@ -70,4 +70,27 @@ def infer_node_datatype(self, model): model.set_tensor_datatype(node.output[0], idt) def verify_node(self): - raise NotImplementedError \ No newline at end of file + raise NotImplementedError + + def get_instream_width(self, ind=0): + ibits = self.get_input_datatype().bitwidth() + simd = self.get_nodeattr("simd") + return ibits * simd + + def get_outstream_width(self, ind=0): + obits = self.get_output_datatype().bitwidth() + simd = self.get_nodeattr("simd") + return obits * simd + + def get_output_datatype(self, ind=0): + """Returns FINN DataType of output. (Same as input datatype)""" + return self.get_input_datatype() + + def get_folded_output_shape(self, ind=0): + normal_oshape = list(self.get_normal_output_shape()) + ifm_ch = self.get_nodeattr("channels") + simd = self.get_nodeattr("simd") + assert ifm_ch % simd == 0, "SIMD must divide input channels" + fold = int(normal_oshape[-1] / simd) + folded_oshape = normal_oshape[:-1] + [fold, simd] + return tuple(folded_oshape) \ No newline at end of file From 41d0f06354721962d8ac4118d4a5dbe3b5824a3c Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Tue, 30 Jul 2024 12:38:13 +0100 Subject: [PATCH 21/49] softmax: use sname for input name --- .../fpgadataflow/hls/quantsoftmax_hls.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py index b4ada72a32..e78c1a3473 100644 --- a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py @@ -64,9 +64,9 @@ def docompute(self): static hls::stream> src0; static hls::stream> dst0; - move(src, src0); + move(in0_{self.hls_sname()}, src0); smaxquant(src0, dst0); - move(dst0, dst); + move(dst0, out_{self.hls_sname()}); ''' ] @@ -74,8 +74,8 @@ def blackboxfunction(self): self.code_gen_dict["$BLACKBOXFUNCTION$"] = [ f''' void {self.onnx_node.name}( - hls::stream> &src, - hls::stream> &dst + hls::stream> &in0_{self.hls_sname()}, + hls::stream> &out_{self.hls_sname()} ) ''' ] @@ -83,10 +83,10 @@ def blackboxfunction(self): def pragmas(self): self.code_gen_dict["$PRAGMAS$"] = [ f''' - #pragma HLS interface AXIS port=src - #pragma HLS interface AXIS port=dst - #pragma HLS aggregate variable=src compact=bit - #pragma HLS aggregate variable=dst compact=bit + #pragma HLS interface AXIS port=in0_{self.hls_sname()} + #pragma HLS interface AXIS port=out_{self.hls_sname()} + #pragma HLS aggregate variable=in0_{self.hls_sname()} compact=bit + #pragma HLS aggregate variable=out_{self.hls_sname()} compact=bit #pragma HLS interface ap_ctrl_none port=return #pragma HLS dataflow disable_start_propagation From 4fc3e2c39a42cb8931f725726b9e258d92c67d67 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Tue, 30 Jul 2024 12:38:30 +0100 Subject: [PATCH 22/49] softmax: quantsoftmax replaces softmax+multithreshold --- src/finn/transformation/fpgadataflow/convert_to_hw_layers.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index aef5f6a64c..257db2c79a 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -1731,7 +1731,7 @@ def apply(self, model): new_node = helper.make_node( "QuantSoftmax", [n.input[0]], # input tensor(s) - [n.output[0]], # output tensor(s) + [consumer.output[0]], # output tensor(s) domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", ifm_dim=[h, w], @@ -1742,6 +1742,8 @@ def apply(self, model): ) graph.node.insert(node_ind, new_node) graph.node.remove(n) + # remove multithreshold too + graph.node.remove(consumer) graph_modified = True if graph_modified: From e26cc5f535a0ae4b53d1d5c22ee542a0d7d4877b Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Wed, 31 Jul 2024 10:37:00 +0100 Subject: [PATCH 23/49] softmax: run stitchedip --- tests/fpgadataflow/test_fpgadataflow_softmax.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py index 06e091fc9d..a1ba6ef5bb 100644 --- a/tests/fpgadataflow/test_fpgadataflow_softmax.py +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -153,6 +153,11 @@ def test_convert_to_hw_softmax_layer(exec_mode, simd): model = model.transform(InferDataTypes()) model = model.transform(to_hw.InferQuantSoftmax()) model = model.transform(GiveUniqueNodeNames()) + # isolate fpga dataflow layers + parent_model = model.transform(CreateDataflowPartition()) + sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] + sdp_node_path = getCustomOp(sdp_node).get_nodeattr("model") + model = ModelWrapper(sdp_node_path) model = model.transform(ApplyConfig(folding_config)) model = model.transform(SpecializeLayers(test_fpga_part)) if exec_mode == "cppsim": @@ -164,7 +169,8 @@ def test_convert_to_hw_softmax_layer(exec_mode, simd): model = model.transform(GiveUniqueNodeNames()) model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) - model = model.transform(PrepareRTLSim()) + model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) + # model = model.transform(PrepareRTLSim()) except Exception as e: pytest.fail(f"Failed to transform the model: {str(e)}") From 85b97a6fccb91a64b03a370873e950d79a53fd1e Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Wed, 31 Jul 2024 10:37:19 +0100 Subject: [PATCH 24/49] softmax: hls execute softmax bin --- .../fpgadataflow/hls/quantsoftmax_hls.py | 27 ++++++++++++++++++- .../custom_op/fpgadataflow/quantsoftmax.py | 20 ++++++++++++-- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py index e78c1a3473..a3980b0749 100644 --- a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py @@ -26,6 +26,8 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import os +import numpy as np from finn.custom_op.fpgadataflow.quantsoftmax import QuantSoftmax from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend @@ -91,4 +93,27 @@ def pragmas(self): #pragma HLS interface ap_ctrl_none port=return #pragma HLS dataflow disable_start_propagation ''' - ] \ No newline at end of file + ] + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + exp_ishape = self.get_normal_input_shape() + exp_oshape = self.get_normal_output_shape() + folded_ishape = self.get_folded_input_shape() + + + if mode == "cppsim": + print("Executing node with cppsim") + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + inp = context[node.input[0]] + export_idt = self.get_input_datatype() + inp = inp.reshape(folded_ishape) + np.save(os.path.join(code_gen_dir, "input_0.npy"), inp) + # # execute the precompiled model + super().exec_precompiled_singlenode_model() + # # load output npy file + super().npy_to_dynamic_output(context) + else: + raise Exception(f"Unsupported execution mode: {mode}") + diff --git a/src/finn/custom_op/fpgadataflow/quantsoftmax.py b/src/finn/custom_op/fpgadataflow/quantsoftmax.py index cac2c1a327..47167cbc3c 100644 --- a/src/finn/custom_op/fpgadataflow/quantsoftmax.py +++ b/src/finn/custom_op/fpgadataflow/quantsoftmax.py @@ -4,6 +4,10 @@ from onnx.helper import make_node import warnings from qonnx.core.datatype import DataType +import onnx +from onnx.helper import make_node, make_tensor_value_info +import numpy as np +import torch class QuantSoftmax(HWCustomOp): """Abstraction layer for HW implementation of VectorVectorActivation layers.""" @@ -34,7 +38,10 @@ def get_number_output_values(self): raise NotImplementedError("This function is not yet implemented.") def execute_node(self, context, graph): - raise NotImplementedError + node = self.onnx_node + input_data = context[node.input[0]] + output_data = torch.softmax(input_data, dim=3) + context[node.output[0]] = output_data def get_number_output_values(self): raise NotImplementedError @@ -93,4 +100,13 @@ def get_folded_output_shape(self, ind=0): assert ifm_ch % simd == 0, "SIMD must divide input channels" fold = int(normal_oshape[-1] / simd) folded_oshape = normal_oshape[:-1] + [fold, simd] - return tuple(folded_oshape) \ No newline at end of file + return tuple(folded_oshape) + + def get_folded_input_shape(self, ind=0): + normal_ishape = list(self.get_normal_input_shape()) + ifm_ch = self.get_nodeattr("channels") + simd = self.get_nodeattr("simd") + assert ifm_ch % simd == 0, "SIMD must divide input channels" + fold = int(normal_ishape[-1] / simd) + folded_ishape = normal_ishape[:-1] + [fold, simd] + return tuple(folded_ishape) \ No newline at end of file From f53a8386501cccc09f3be7aa5ec665cf9b2aaef5 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Wed, 31 Jul 2024 13:52:05 +0100 Subject: [PATCH 25/49] softmax: include correct imports --- tests/fpgadataflow/test_fpgadataflow_softmax.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py index a1ba6ef5bb..8e118d6178 100644 --- a/tests/fpgadataflow/test_fpgadataflow_softmax.py +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -61,11 +61,15 @@ from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP from finn.util.basic import pynq_part_map from finn.transformation.streamline.reorder import ( MakeMaxPoolNHWC, MoveScalarLinearPastInvariants, ) +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) from qonnx.transformation.general import ( ApplyConfig, GiveReadableTensorNames, From 9dbcc13e1acb6472af79fb49158e4351980d2916 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Wed, 31 Jul 2024 13:52:33 +0100 Subject: [PATCH 26/49] softmax: set preferred impl style --- tests/fpgadataflow/test_fpgadataflow_softmax.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py index 8e118d6178..ff621dd026 100644 --- a/tests/fpgadataflow/test_fpgadataflow_softmax.py +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -148,7 +148,8 @@ def test_convert_to_hw_softmax_layer(exec_mode, simd): folding_config = { "Defaults": {}, "QuantSoftmax_0": { - "simd": simd + "simd": simd, + "preferred_impl_style": "hls" } } try: From 843b7b142e0e14a4bf6df1cb293ce25010b224e3 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Wed, 31 Jul 2024 17:46:32 +0100 Subject: [PATCH 27/49] finn: add hls library paths --- docker/finn_entrypoint.sh | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/docker/finn_entrypoint.sh b/docker/finn_entrypoint.sh index c7500bcaa6..4004523bad 100644 --- a/docker/finn_entrypoint.sh +++ b/docker/finn_entrypoint.sh @@ -81,14 +81,14 @@ if [ -f "$VITIS_PATH/settings64.sh" ];then export XILINX_XRT=/opt/xilinx/xrt source $VITIS_PATH/settings64.sh gecho "Found Vitis at $VITIS_PATH" - if [ -f "$XILINX_XRT/setup.sh" ];then - # source XRT - source $XILINX_XRT/setup.sh - gecho "Found XRT at $XILINX_XRT" - else - recho "XRT not found on $XILINX_XRT, did you skip the download or did the installation fail?" - exit -1 - fi + # if [ -f "$XILINX_XRT/setup.sh" ];then + # # source XRT + # source $XILINX_XRT/setup.sh + # gecho "Found XRT at $XILINX_XRT" + # else + # recho "XRT not found on $XILINX_XRT, did you skip the download or did the installation fail?" + # exit -1 + # fi else yecho "Unable to find $VITIS_PATH/settings64.sh" yecho "Functionality dependent on Vitis will not be available." @@ -137,6 +137,15 @@ else echo "See https://docs.xilinx.com/r/en-US/ug835-vivado-tcl-commands/Tcl-Initialization-Scripts" fi +# add hls library path to LD_LIBRARY_PATH +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HLS_PATH/lnx64/tools/fpo_v7_1" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HLS_PATH/lnx64/tools/fft_v9_1" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HLS_PATH/lnx64/tools/fir_v7_0" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HLS_PATH/lnx64/tools/dds_v6_0" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HLS_PATH/tps/lnx64/gcc-8.3.0/lib" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HLS_PATH/lib/lnx64.o/Rhel" +export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$HLS_PATH/lnx64/lib/csim" + export PATH=$PATH:$HOME/.local/bin # execute the provided command(s) as root exec "$@" From 705294797fb4567bca21d34f7c12752054c7dac7 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Thu, 1 Aug 2024 10:54:57 +0100 Subject: [PATCH 28/49] cpp compiler: raise exception if compilation fails --- src/finn/util/basic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/finn/util/basic.py b/src/finn/util/basic.py index 91c191962f..c2e2cbcd8a 100644 --- a/src/finn/util/basic.py +++ b/src/finn/util/basic.py @@ -192,8 +192,12 @@ def build(self, code_gen_dir): f.write("#!/bin/bash \n") f.write(bash_compile + "\n") bash_command = ["bash", self.compile_script] - process_compile = subprocess.Popen(bash_command, stdout=subprocess.PIPE) - process_compile.communicate() + + with open(str(self.code_gen_dir) + "/compile.log", "w") as f: + try: + subprocess.check_output(bash_command, stderr=f) + except subprocess.CalledProcessError: + raise Exception(f"Error in compiling the generated code. Check {f.name} for more details.") def launch_process_helper(args, proc_env=None, cwd=None): From ffeb69ca4d9ccbdd22c806b8622fe01aef77358a Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Thu, 1 Aug 2024 10:56:03 +0100 Subject: [PATCH 29/49] softmax: compile node for cpp sim --- .../fpgadataflow/hls/quantsoftmax_hls.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py index a3980b0749..5337f4561b 100644 --- a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py @@ -30,6 +30,8 @@ import numpy as np from finn.custom_op.fpgadataflow.quantsoftmax import QuantSoftmax from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend +import subprocess +from finn.util.basic import CppBuilder, get_rtlsim_trace_depth, make_build_dir class QuantSoftmax_hls(QuantSoftmax, HLSBackend): def __init__(self, onnx_node, **kwargs): @@ -117,3 +119,25 @@ def execute_node(self, context, graph): else: raise Exception(f"Unsupported execution mode: {mode}") + def compile_singlenode_code(self): + """Builds the bash script for compilation using the CppBuilder from + finn.util.basic and executes the script to produce the executable.""" + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + builder = CppBuilder() + # to enable additional debug features please uncommand the next line + # builder.append_includes("-DDEBUG") + builder.append_includes("-I$FINN_ROOT/src/finn/qnn-data/cpp") + builder.append_includes("-I$FINN_ROOT/deps/cnpy/") + builder.append_includes("-I$FINN_ROOT/deps/finn-hlslib") + builder.append_includes("-I$FINN_ROOT/custom_hls") + builder.append_includes("-I{}/include".format(os.environ["HLS_PATH"])) + builder.append_includes("--std=c++14") + builder.append_includes("-O3") + builder.append_sources(code_gen_dir + "/*.cpp") + builder.append_sources("$FINN_ROOT/deps/cnpy/cnpy.cpp") + builder.append_includes("-lz") + builder.append_includes("-fno-builtin -fno-inline -Wl,-rpath,\"$HLS_PATH/lnx64/lib/csim\" -L$HLS_PATH/lnx64/lib/csim -lhlsmc++-GCC46") + builder.append_includes("-L$HLS_PATH/lnx64/tools/fpo_v7_1 -lgmp -lmpfr -lIp_floating_point_v7_1_bitacc_cmodel") + builder.set_executable_path(code_gen_dir + "/node_model") + builder.build(code_gen_dir) + self.set_nodeattr("executable_path", builder.executable_path) \ No newline at end of file From ef80c8e412dc1b64e2a701c0e66201221a7405db Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Fri, 2 Aug 2024 02:36:40 +0100 Subject: [PATCH 30/49] softmax: generate cppsim code --- .../fpgadataflow/hls/quantsoftmax_hls.py | 47 ++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py index 5337f4561b..41541274d2 100644 --- a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py @@ -30,6 +30,7 @@ import numpy as np from finn.custom_op.fpgadataflow.quantsoftmax import QuantSoftmax from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend +from finn.custom_op.fpgadataflow import templates import subprocess from finn.util.basic import CppBuilder, get_rtlsim_trace_depth, make_build_dir @@ -140,4 +141,48 @@ def compile_singlenode_code(self): builder.append_includes("-L$HLS_PATH/lnx64/tools/fpo_v7_1 -lgmp -lmpfr -lIp_floating_point_v7_1_bitacc_cmodel") builder.set_executable_path(code_gen_dir + "/node_model") builder.build(code_gen_dir) - self.set_nodeattr("executable_path", builder.executable_path) \ No newline at end of file + self.set_nodeattr("executable_path", builder.executable_path) + + def code_generation_cppsim(self, model): + """Generates c++ code for simulation (cppsim).""" + node = self.onnx_node + path = self.get_nodeattr("code_gen_dir_cppsim") + self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())] + self.generate_params(model, path) + self.global_includes() + self.defines("cppsim") + self.read_npy_data() + self.strm_decl() + self.pragmas() + + self.code_gen_dict["$DOCOMPUTE$"] = [ + f""" + static hls::stream> src0; + static hls::stream> dst0; + + hls::vector x; + for(unsigned i=0; i(src0, dst0); + + for(unsigned i=0; i Date: Fri, 2 Aug 2024 11:43:40 +0100 Subject: [PATCH 31/49] softmax: does not suppor rtlsim --- src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py index 41541274d2..3a3cb2b076 100644 --- a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py @@ -186,3 +186,6 @@ def code_generation_cppsim(self, model): code_gen_line = "\n".join(self.code_gen_dict[key]) template = template.replace(key, code_gen_line) f.write(template) + def prepare_rtlsim(self): + # this node currently does not support rtlsim + raise NotImplementedError("QuantSoftmax_hls does not support rtlsim") \ No newline at end of file From 1c150224e322748790a84cf8b8a3986e2e513775 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Fri, 2 Aug 2024 11:44:23 +0100 Subject: [PATCH 32/49] softmax: transformation test --- .../fpgadataflow/test_fpgadataflow_softmax.py | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py index ff621dd026..851e86cddd 100644 --- a/tests/fpgadataflow/test_fpgadataflow_softmax.py +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -103,22 +103,19 @@ # 2. check values are correct -def create_model(): +def create_model(io_shape=(1, 12, 128, 128)): ''' Create a quantized softmax model. Input and output are quantized to Int8ActPerTensorFloat, this is to make sure that the softmax layer is followed by a Quant node. ''' - io_shape = (1, 12, 128, 128) class QuantSoftMaxSimple(nn.Module): def __init__(self): super(QuantSoftMaxSimple, self).__init__() - # self.input_identity = qnn.QuantIdentity(act_quant=Int8ActPerTensorFloat) self.output_identity = qnn.QuantIdentity() self.softmax = nn.Softmax(dim=3) # softmax along the last dimension def forward(self, x): - # x = self.input_identity(x) x = self.softmax(x) x = self.output_identity(x) return x @@ -130,20 +127,24 @@ def forward(self, x): # set the model input to INT8 model = ModelWrapper(export_onnx_path) model.set_tensor_datatype(model.graph.input[0].name, DataType["UINT8"]) - # import pdb; pdb.set_trace() return model -@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim", "stitched_ip"]) @pytest.mark.parametrize("simd", ["simd1", "simd2", "simd3", "simd4"]) @pytest.mark.fpgadataflow def test_convert_to_hw_softmax_layer(exec_mode, simd): ''' - Test that all transofrmations can be applied to a model with a softmax layer. + This test checks that the softmax layer can be converted to a HW layer. ''' + if (exec_mode == "stitched_ip" or exec_mode == "rtlsim") and simd != "simd1": + pytest.skip("Skipping this test to avoid long test times") # Create the qonnx model - # modelproto = create_softmax_graph() + io_shape = (1, 12, 128, 128) + # input = torch.randn(io_shape) + input = gen_finn_dt_tensor(DataType["UINT8"], io_shape) + input_t = {"global_in": input} - model = create_model() + model = create_model(io_shape) simd = int(simd[-1]) folding_config = { "Defaults": {}, @@ -165,22 +166,28 @@ def test_convert_to_hw_softmax_layer(exec_mode, simd): model = ModelWrapper(sdp_node_path) model = model.transform(ApplyConfig(folding_config)) model = model.transform(SpecializeLayers(test_fpga_part)) + model = model.transform(GiveUniqueNodeNames()) if exec_mode == "cppsim": + model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) - model = model.transform(SetExecMode("cppsim")) elif exec_mode == "rtlsim": model = model.transform(SetExecMode("rtlsim")) - model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model = model.transform(HLSSynthIP()) + try: + model = model.transform(PrepareRTLSim()) + pytest.fail("PrepareRTLSim should have failed") + except Exception as e: + # expected to fail because this node do not support rtlsim + pass + elif exec_mode == "stitched_ip": model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) model = model.transform(CreateStitchedIP(test_fpga_part, target_clk_ns)) - # model = model.transform(PrepareRTLSim()) except Exception as e: pytest.fail(f"Failed to transform the model: {str(e)}") - # oxe.execute_onnx() - def test_fpgadataflow_quantsoftmax(): # Create the qonnx model # create_model() From 3a78f7953e6172dca7adaf96094588e6c8aaeeb4 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Tue, 6 Aug 2024 09:14:32 +0100 Subject: [PATCH 33/49] softmax: more generic testcase --- .../fpgadataflow/test_fpgadataflow_softmax.py | 66 ++++++++++++++++--- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py index 851e86cddd..b4e0129a5c 100644 --- a/tests/fpgadataflow/test_fpgadataflow_softmax.py +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -129,6 +129,42 @@ def forward(self, x): model.set_tensor_datatype(model.graph.input[0].name, DataType["UINT8"]) return model +def make_single_quantsoftmax_modelwrapper(impl_style="hls", simd=1, idt=DataType["UINT8"], ifm_dim=(128, 128), channels=12): + ''' + Create a single quantized softmax node with variable parameters. + this is before SpecializeLayers() transformation. + ''' + h = ifm_dim[0] + w = ifm_dim[1] + + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, h, w, channels]) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, h, w, channels]) + new_node = helper.make_node( + "QuantSoftmax", + ["inp"], + ["outp"], + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + ifm_dim=[h, w], + channels=channels, + data_type = idt.name, + simd=simd, + preferred_impl_style=impl_style, + ) + graph = helper.make_graph( + [new_node], + "softmax_graph", + inputs=[inp], + outputs=[outp] + ) + model = qonnx_make_model(graph, producer_name="fmpadding-model") + model = ModelWrapper(model) + + model.set_tensor_datatype("inp", idt) + model.set_tensor_datatype("outp", idt) + + return model + @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim", "stitched_ip"]) @pytest.mark.parametrize("simd", ["simd1", "simd2", "simd3", "simd4"]) @pytest.mark.fpgadataflow @@ -188,16 +224,30 @@ def test_convert_to_hw_softmax_layer(exec_mode, simd): except Exception as e: pytest.fail(f"Failed to transform the model: {str(e)}") -def test_fpgadataflow_quantsoftmax(): + +@pytest.mark.parametrize("impl_style", ["hls","rtl"]) +@pytest.mark.parametrize("simd", ["simd1", "simd2", "simd3", "simd4"]) +@pytest.mark.parametrize("idt", [DataType["INT2"], DataType["INT4"]]) +@pytest.mark.parametrize("ifm_dim", [(12,128)]) +@pytest.mark.parametrize("channels", [128, 384]) +@pytest.mark.fpgadataflow +def test_fpga_dataflow_quantsoftmax(impl_style, simd, idt, ifm_dim, channels): + simd = int(simd[-1]) + model = make_single_quantsoftmax_modelwrapper(impl_style=impl_style, simd=simd, idt=idt, ifm_dim=ifm_dim, channels=channels) + # Create the qonnx model - # create_model() - model = create_model() + io_shape = (1, 12, 128, 128) + # input = torch.randn(io_shape) + input = gen_finn_dt_tensor(DataType["UINT8"], io_shape) + input_t = {"global_in": input} + try: - model = model.transform(InferShapes()) - model = model.transform(InferDataTypes()) - model = model.transform(FoldConstants()) - model = model.transform(to_hw.InferQuantSoftmax()) model = model.transform(SpecializeLayers(test_fpga_part)) - + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(SetExecMode("cppsim")) + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + # run the model + oxe.execute_onnx(model, input_t) except Exception as e: pytest.fail(f"Failed to transform the model: {str(e)}") \ No newline at end of file From cd8d27080f32db3f1e360800ae29f5a36532133c Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Wed, 7 Aug 2024 15:26:31 +0100 Subject: [PATCH 34/49] hlsbackend: handle subprocess exceptions and log them --- src/finn/custom_op/fpgadataflow/hlsbackend.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index d8210fd684..5436aa31af 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -307,16 +307,21 @@ def npy_to_dynamic_outputs(self, context, npy_list): def exec_precompiled_singlenode_model(self): """Executes precompiled executable.""" - executable_path = self.get_nodeattr("executable_path") - if executable_path == "": + executable = self.get_nodeattr("executable_path") + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + if executable == "": raise Exception( """ Found no executable for this node, did you run the codegen and compilation transformations? """ ) - process_execute = subprocess.Popen(executable_path, stdout=subprocess.PIPE) - process_execute.communicate() + with open(code_gen_dir + "/sim.log", "w") as f: + try: + subprocess.check_output(executable, stderr=f) + except subprocess.CalledProcessError: + raise Exception(f"Error running the generated code. Check {f.name} for more details.") + def hls_sname(self): """Get the naming convention used by Vitis HLS for stream signals From f6a7b8b5295808f5f634f0122e224fc857e2e2a8 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Wed, 7 Aug 2024 15:28:29 +0100 Subject: [PATCH 35/49] cpp template: try catch in cppsim templates --- src/finn/custom_op/fpgadataflow/templates.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 3d89a0ab23..920711909a 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -42,18 +42,22 @@ $DEFINES$ int main(){ -$PRAGMAS$ -$STREAMDECLARATIONS$ + $PRAGMAS$ -$READNPYDATA$ + try { + $STREAMDECLARATIONS$ -$DOCOMPUTE$ + $READNPYDATA$ -$DATAOUTSTREAM$ + $DOCOMPUTE$ -$SAVEASCNPY$ + $DATAOUTSTREAM$ + $SAVEASCNPY$ + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + } } """ From 618a529e9963e2739158aee689e3414e204b06d8 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Thu, 8 Aug 2024 09:27:41 +0100 Subject: [PATCH 36/49] softmax: generate cppsim with npyvector stream --- .../fpgadataflow/hls/quantsoftmax_hls.py | 51 ++++++++----------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py index 3a3cb2b076..ac9abd86c0 100644 --- a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py @@ -31,9 +31,7 @@ from finn.custom_op.fpgadataflow.quantsoftmax import QuantSoftmax from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend from finn.custom_op.fpgadataflow import templates -import subprocess -from finn.util.basic import CppBuilder, get_rtlsim_trace_depth, make_build_dir - +from finn.util.basic import CppBuilder class QuantSoftmax_hls(QuantSoftmax, HLSBackend): def __init__(self, onnx_node, **kwargs): super().__init__(onnx_node, **kwargs) @@ -46,19 +44,21 @@ def get_nodeattr_types(self): def global_includes(self): self.code_gen_dict["$GLOBALS$"] = [ + '#include "npy2vectorstream.hpp"', + '#include "debug_print.hpp"', '#include "softmax.hpp"', '#include "utils.hpp"' ] def defines(self, var): simd = self.get_nodeattr("simd") - ibits = self.get_input_datatype().bitwidth() + dtype = self.get_input_datatype() channels = self.get_nodeattr("channels") self.code_gen_dict["$DEFINES$"] = [ f""" constexpr unsigned SIMD = {simd}; constexpr unsigned W = {channels}; - using T = ap_uint<{ibits}>; + using T = {dtype.get_hls_datatype_str()}; using F = float; """ ] @@ -101,17 +101,13 @@ def pragmas(self): def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node - exp_ishape = self.get_normal_input_shape() - exp_oshape = self.get_normal_output_shape() folded_ishape = self.get_folded_input_shape() - if mode == "cppsim": - print("Executing node with cppsim") code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") inp = context[node.input[0]] export_idt = self.get_input_datatype() - inp = inp.reshape(folded_ishape) + # inp = inp.reshape(folded_ishape) np.save(os.path.join(code_gen_dir, "input_0.npy"), inp) # # execute the precompiled model super().exec_precompiled_singlenode_model() @@ -145,36 +141,32 @@ def compile_singlenode_code(self): def code_generation_cppsim(self, model): """Generates c++ code for simulation (cppsim).""" + self.code_gen_dict["$READNPYDATA$"] = [""] + self.code_gen_dict["$DATAOUTSTREAM$"] = [""] + self.code_gen_dict["$STREAMDECLARATIONS$"] = [""] node = self.onnx_node path = self.get_nodeattr("code_gen_dir_cppsim") self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())] self.generate_params(model, path) self.global_includes() self.defines("cppsim") - self.read_npy_data() - self.strm_decl() self.pragmas() - + oshape = self.get_normal_output_shape() + oshape_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DOCOMPUTE$"] = [ - f""" - static hls::stream> src0; - static hls::stream> dst0; + f''' + static hls::stream> in0_V; + static hls::stream> out_V; - hls::vector x; - for(unsigned i=0; i(src0, dst0); + npy2vectorstream("{path}/input_0.npy", in0_V); + + for (unsigned i = 0; i < 300; i++){{ + smaxquant(in0_V, out_V); + }} - for(unsigned i=0; i(out_V,{oshape_str}, "{path}/output.npy"); + ''' ] - self.dataoutstrm() self.save_as_npy() template = templates.docompute_template @@ -186,6 +178,7 @@ def code_generation_cppsim(self, model): code_gen_line = "\n".join(self.code_gen_dict[key]) template = template.replace(key, code_gen_line) f.write(template) + def prepare_rtlsim(self): # this node currently does not support rtlsim raise NotImplementedError("QuantSoftmax_hls does not support rtlsim") \ No newline at end of file From 89dfd56f796616e58c87e8f61f3e88dbe909a1cc Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Thu, 8 Aug 2024 09:29:00 +0100 Subject: [PATCH 37/49] softmax: functional model --- .../custom_op/fpgadataflow/quantsoftmax.py | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/quantsoftmax.py b/src/finn/custom_op/fpgadataflow/quantsoftmax.py index 47167cbc3c..ac9c17fb63 100644 --- a/src/finn/custom_op/fpgadataflow/quantsoftmax.py +++ b/src/finn/custom_op/fpgadataflow/quantsoftmax.py @@ -1,14 +1,11 @@ from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp -from finn.util.data_packing import numpy_to_hls_code, pack_innermost_dim_as_hex_string from onnx.helper import make_node import warnings from qonnx.core.datatype import DataType -import onnx -from onnx.helper import make_node, make_tensor_value_info +from onnx.helper import make_node import numpy as np -import torch - +from scipy.special import softmax class QuantSoftmax(HWCustomOp): """Abstraction layer for HW implementation of VectorVectorActivation layers.""" @@ -37,11 +34,22 @@ def get_normal_output_shape(self, ind=0): def get_number_output_values(self): raise NotImplementedError("This function is not yet implemented.") + def quantise_to_int(self, arr, dtype): + max_val = np.iinfo(dtype).max + output = np.zeros_like(arr, dtype=dtype) + frac_part = arr - np.floor(arr) + scaled_frac = frac_part * max_val + output = scaled_frac.astype(dtype) + output[arr >= 1.0] = max_val + return output + def execute_node(self, context, graph): node = self.onnx_node input_data = context[node.input[0]] - output_data = torch.softmax(input_data, dim=3) - context[node.output[0]] = output_data + output_data = softmax(input_data, axis=-1) + qsm_out = self.quantise_to_int(output_data, np.int8) + context[node.output[0]] = qsm_out + def get_number_output_values(self): raise NotImplementedError From d98561a86e7fad94537feb7f39ceb4b5d9765d01 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Thu, 8 Aug 2024 09:30:12 +0100 Subject: [PATCH 38/49] softmax: clean up prints --- .../fpgadataflow/convert_to_hw_layers.py | 1 - tests/fpgadataflow/test_fpgadataflow_softmax.py | 14 -------------- 2 files changed, 15 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index 257db2c79a..e400e4335f 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -1715,7 +1715,6 @@ def apply(self, model): # check that an optype of Softmax is present followed by a MultiThreshold consumer = model.find_consumer(n.output[0]) if n.op_type == "Softmax" and consumer is not None and consumer.op_type == "MultiThreshold": - print("Found Softmax followed by MultiThreshold") # get the shape of the input/output tensor input_shape = model.get_tensor_shape(n.input[0]) assert input_shape == model.get_tensor_shape(consumer.input[0]), ( diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py index b4e0129a5c..db5f7fe053 100644 --- a/tests/fpgadataflow/test_fpgadataflow_softmax.py +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -89,20 +89,6 @@ target_clk_ns = 5 export_onnx_path = "softmax_dut_qonnx.onnx" -### Make model wrapper -# 1. make node, - - -### Test -## 1. Compiler integration -# 1. check all transforms can be applied to a model with a softmax layer -# 2. Check that IP stitching produces valid HLS package - -## 2. Functionality test -# 1. Check that we can run cpp/rtl sims -# 2. check values are correct - - def create_model(io_shape=(1, 12, 128, 128)): ''' Create a quantized softmax model. From 2eaec1e2a7f59539c2aeba86313e50e4dfcc61c0 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Thu, 8 Aug 2024 09:31:33 +0100 Subject: [PATCH 39/49] softmax: clear up functional test --- .../fpgadataflow/test_fpgadataflow_softmax.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py index db5f7fe053..6824c02913 100644 --- a/tests/fpgadataflow/test_fpgadataflow_softmax.py +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -148,7 +148,7 @@ def make_single_quantsoftmax_modelwrapper(impl_style="hls", simd=1, idt=DataType model.set_tensor_datatype("inp", idt) model.set_tensor_datatype("outp", idt) - + return model @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim", "stitched_ip"]) @@ -213,19 +213,20 @@ def test_convert_to_hw_softmax_layer(exec_mode, simd): @pytest.mark.parametrize("impl_style", ["hls","rtl"]) @pytest.mark.parametrize("simd", ["simd1", "simd2", "simd3", "simd4"]) -@pytest.mark.parametrize("idt", [DataType["INT2"], DataType["INT4"]]) +@pytest.mark.parametrize("idt", [DataType["UINT8"],DataType["INT8"],DataType["INT4"],DataType["UINT4"]]) @pytest.mark.parametrize("ifm_dim", [(12,128)]) @pytest.mark.parametrize("channels", [128, 384]) @pytest.mark.fpgadataflow def test_fpga_dataflow_quantsoftmax(impl_style, simd, idt, ifm_dim, channels): simd = int(simd[-1]) model = make_single_quantsoftmax_modelwrapper(impl_style=impl_style, simd=simd, idt=idt, ifm_dim=ifm_dim, channels=channels) - + # Create the qonnx model - io_shape = (1, 12, 128, 128) - # input = torch.randn(io_shape) - input = gen_finn_dt_tensor(DataType["UINT8"], io_shape) - input_t = {"global_in": input} + io_shape = (1, ifm_dim[0], ifm_dim[1], channels) + input = gen_finn_dt_tensor(idt, io_shape) + input_t = {"inp": input} + + y_expected = oxe.execute_onnx(model, input_t)["outp"] try: model = model.transform(SpecializeLayers(test_fpga_part)) @@ -234,6 +235,7 @@ def test_fpga_dataflow_quantsoftmax(impl_style, simd, idt, ifm_dim, channels): model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) # run the model - oxe.execute_onnx(model, input_t) + y_hw = oxe.execute_onnx(model, input_t)["outp"] + assert (y_hw == y_expected).all(), "HW layer execution failed" except Exception as e: pytest.fail(f"Failed to transform the model: {str(e)}") \ No newline at end of file From 42c810feeb0764b4d23f8cbbe1b1086994d8eec3 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Thu, 8 Aug 2024 09:39:19 +0100 Subject: [PATCH 40/49] softmax: clean up unused modules in test --- .../fpgadataflow/test_fpgadataflow_softmax.py | 24 +------------------ 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py index 6824c02913..9ba35e7d8d 100644 --- a/tests/fpgadataflow/test_fpgadataflow_softmax.py +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -28,9 +28,7 @@ import pytest import torch -import onnx -from onnx import helper, numpy_helper -import numpy as np +from onnx import helper import os import finn.core.onnx_exec as oxe from brevitas.export import export_qonnx @@ -42,17 +40,8 @@ from qonnx.transformation.infer_shapes import InferShapes from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model from qonnx.transformation.infer_datatypes import InferDataTypes -from qonnx.transformation.fold_constants import FoldConstants -from finn.transformation.streamline.absorb import ( - AbsorbAddIntoMultiThreshold, - AbsorbMulIntoMultiThreshold, - FactorOutMulSignMagnitude, - Absorb1BitMulIntoConv, -) import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw -from brevitas.quant.scaled_int import Int8ActPerTensorFloat, Int8WeightPerTensorFloat import finn.core.onnx_exec as oxe -from finn.analysis.fpgadataflow.exp_cycles_per_layer import exp_cycles_per_layer from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim @@ -62,26 +51,15 @@ from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP -from finn.util.basic import pynq_part_map -from finn.transformation.streamline.reorder import ( - MakeMaxPoolNHWC, - MoveScalarLinearPastInvariants, -) from finn.transformation.fpgadataflow.create_dataflow_partition import ( CreateDataflowPartition, ) from qonnx.transformation.general import ( ApplyConfig, - GiveReadableTensorNames, GiveUniqueNodeNames, - RemoveStaticGraphInputs, - RemoveUnusedTensors, ) -from finn.transformation.streamline import Streamline import finn.transformation.streamline.absorb as absorb -import onnx from onnx import helper -import onnxruntime import torch import torch.nn as nn import brevitas.nn as qnn From 4b49c66569669862fb24909e3d90dd5fb7b6dd9e Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Thu, 8 Aug 2024 11:23:42 +0100 Subject: [PATCH 41/49] softmax: use folded output shape --- src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py index ac9abd86c0..71f1b30b40 100644 --- a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py @@ -107,7 +107,7 @@ def execute_node(self, context, graph): code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") inp = context[node.input[0]] export_idt = self.get_input_datatype() - # inp = inp.reshape(folded_ishape) + inp = inp.reshape(folded_ishape) np.save(os.path.join(code_gen_dir, "input_0.npy"), inp) # # execute the precompiled model super().exec_precompiled_singlenode_model() @@ -151,7 +151,7 @@ def code_generation_cppsim(self, model): self.global_includes() self.defines("cppsim") self.pragmas() - oshape = self.get_normal_output_shape() + oshape = self.get_folded_output_shape() oshape_str = str(oshape).replace("(", "{").replace(")", "}") self.code_gen_dict["$DOCOMPUTE$"] = [ f''' @@ -160,7 +160,7 @@ def code_generation_cppsim(self, model): npy2vectorstream("{path}/input_0.npy", in0_V); - for (unsigned i = 0; i < 300; i++){{ + for (unsigned i = 0; i < 900; i++){{ smaxquant(in0_V, out_V); }} From 1ff725fb82c02a2e1e8640b3656a0af82d78126a Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Thu, 8 Aug 2024 16:46:52 +0100 Subject: [PATCH 42/49] softmax: use onnx graph to validate the finn integration --- .../fpgadataflow/test_fpgadataflow_softmax.py | 82 +++++++++++-------- 1 file changed, 48 insertions(+), 34 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py index 9ba35e7d8d..358a278bb9 100644 --- a/tests/fpgadataflow/test_fpgadataflow_softmax.py +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -29,7 +29,6 @@ import pytest import torch from onnx import helper -import os import finn.core.onnx_exec as oxe from brevitas.export import export_qonnx from qonnx.util.cleanup import cleanup as qonnx_cleanup @@ -41,7 +40,6 @@ from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model from qonnx.transformation.infer_datatypes import InferDataTypes import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw -import finn.core.onnx_exec as oxe from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim @@ -63,35 +61,39 @@ import torch import torch.nn as nn import brevitas.nn as qnn +import numpy as np test_fpga_part = "xczu3eg-sbva484-1-e" target_clk_ns = 5 -export_onnx_path = "softmax_dut_qonnx.onnx" +export_onnx_path = "pytest_quantsoftmax_dut.onnx" + +class QuantSoftMaxSimple(nn.Module): + def __init__(self, bit_width=8, signed=True): + super(QuantSoftMaxSimple, self).__init__() + self.output_identity = qnn.QuantIdentity(bit_width=bit_width, scaling_per_tensor=True, bias=False, signed = signed) + self.softmax = nn.Softmax(dim=3) # softmax along the last dimension + + def get_quant_scale(self): + return self.output_identity.quant_act_scale() -def create_model(io_shape=(1, 12, 128, 128)): + def forward(self, x): + x = self.softmax(x) + x = self.output_identity(x) + return x + +def create_model(io_shape=(1, 12, 128, 128), idt=DataType["INT8"]): ''' Create a quantized softmax model. Input and output are quantized to Int8ActPerTensorFloat, this is to make sure that the softmax layer is followed by a Quant node. ''' - class QuantSoftMaxSimple(nn.Module): - def __init__(self): - super(QuantSoftMaxSimple, self).__init__() - self.output_identity = qnn.QuantIdentity() - self.softmax = nn.Softmax(dim=3) # softmax along the last dimension - - def forward(self, x): - x = self.softmax(x) - x = self.output_identity(x) - return x - - dut = QuantSoftMaxSimple() - input = torch.randn(io_shape) + dut = QuantSoftMaxSimple(idt.bitwidth(), idt.signed()) + input = torch.rand(io_shape) export_qonnx(dut, input, export_onnx_path, opset_version=11) qonnx_cleanup(export_onnx_path, out_file=export_onnx_path) - # set the model input to INT8 + # set the model input to UINT8 model = ModelWrapper(export_onnx_path) - model.set_tensor_datatype(model.graph.input[0].name, DataType["UINT8"]) - return model + model.set_tensor_datatype(model.graph.input[0].name, idt) + return model, dut.get_quant_scale() def make_single_quantsoftmax_modelwrapper(impl_style="hls", simd=1, idt=DataType["UINT8"], ifm_dim=(128, 128), channels=12): ''' @@ -101,12 +103,12 @@ def make_single_quantsoftmax_modelwrapper(impl_style="hls", simd=1, idt=DataType h = ifm_dim[0] w = ifm_dim[1] - inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, [1, h, w, channels]) - outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, [1, h, w, channels]) + inp = helper.make_tensor_value_info("global_in", TensorProto.FLOAT, [1, h, w, channels]) + outp = helper.make_tensor_value_info("global_out", TensorProto.FLOAT, [1, h, w, channels]) new_node = helper.make_node( "QuantSoftmax", - ["inp"], - ["outp"], + ["global_in"], + ["global_out"], domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", ifm_dim=[h, w], @@ -121,11 +123,11 @@ def make_single_quantsoftmax_modelwrapper(impl_style="hls", simd=1, idt=DataType inputs=[inp], outputs=[outp] ) - model = qonnx_make_model(graph, producer_name="fmpadding-model") + model = qonnx_make_model(graph) model = ModelWrapper(model) - model.set_tensor_datatype("inp", idt) - model.set_tensor_datatype("outp", idt) + model.set_tensor_datatype("global_in", idt) + model.set_tensor_datatype("global_out", idt) return model @@ -144,7 +146,8 @@ def test_convert_to_hw_softmax_layer(exec_mode, simd): input = gen_finn_dt_tensor(DataType["UINT8"], io_shape) input_t = {"global_in": input} - model = create_model(io_shape) + model, _ = create_model(io_shape) + simd = int(simd[-1]) folding_config = { "Defaults": {}, @@ -192,19 +195,28 @@ def test_convert_to_hw_softmax_layer(exec_mode, simd): @pytest.mark.parametrize("impl_style", ["hls","rtl"]) @pytest.mark.parametrize("simd", ["simd1", "simd2", "simd3", "simd4"]) @pytest.mark.parametrize("idt", [DataType["UINT8"],DataType["INT8"],DataType["INT4"],DataType["UINT4"]]) -@pytest.mark.parametrize("ifm_dim", [(12,128)]) -@pytest.mark.parametrize("channels", [128, 384]) +@pytest.mark.parametrize("ifm_dim", [(12,12)]) +@pytest.mark.parametrize("channels", [12, 384]) @pytest.mark.fpgadataflow def test_fpga_dataflow_quantsoftmax(impl_style, simd, idt, ifm_dim, channels): simd = int(simd[-1]) + model = make_single_quantsoftmax_modelwrapper(impl_style=impl_style, simd=simd, idt=idt, ifm_dim=ifm_dim, channels=channels) # Create the qonnx model io_shape = (1, ifm_dim[0], ifm_dim[1], channels) + input = gen_finn_dt_tensor(idt, io_shape) - input_t = {"inp": input} + input_t = {"global_in": input} - y_expected = oxe.execute_onnx(model, input_t)["outp"] + # Create reference values using the qonnx model + ref_model, scale = create_model(io_shape, idt) + y_ref = oxe.execute_onnx(ref_model, input_t)["global_out"] + y_ref = y_ref / scale + y_ref = y_ref.numpy() + + y_out = oxe.execute_onnx(model, input_t)["global_out"] + assert np.allclose(y_ref, y_out, atol=5), "Model output does not match expected output" try: model = model.transform(SpecializeLayers(test_fpga_part)) @@ -213,7 +225,9 @@ def test_fpga_dataflow_quantsoftmax(impl_style, simd, idt, ifm_dim, channels): model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) # run the model - y_hw = oxe.execute_onnx(model, input_t)["outp"] - assert (y_hw == y_expected).all(), "HW layer execution failed" + y_hw = oxe.execute_onnx(model, input_t)["global_out"] + + assert np.allclose(y_ref, y_hw, atol=5), "Model output does not match expected output" + except Exception as e: pytest.fail(f"Failed to transform the model: {str(e)}") \ No newline at end of file From 949d1e8c8c4b8272d38d05c950ba964e7a77b098 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Thu, 8 Aug 2024 17:43:11 +0100 Subject: [PATCH 43/49] softmax: update latest hls implementation --- src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py | 8 ++++---- src/finn/custom_op/fpgadataflow/templates.py | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py index 71f1b30b40..697bd2cfa6 100644 --- a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py @@ -44,10 +44,10 @@ def get_nodeattr_types(self): def global_includes(self): self.code_gen_dict["$GLOBALS$"] = [ - '#include "npy2vectorstream.hpp"', - '#include "debug_print.hpp"', + '#include ', '#include "softmax.hpp"', - '#include "utils.hpp"' + '#include "utils.hpp"', + '#include "sm_utils.hpp"' ] def defines(self, var): @@ -70,7 +70,7 @@ def docompute(self): static hls::stream> dst0; move(in0_{self.hls_sname()}, src0); - smaxquant(src0, dst0); + smaxquant(src0, dst0); move(dst0, out_{self.hls_sname()}); ''' ] diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 920711909a..8c9e99a578 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -32,6 +32,7 @@ #define AP_INT_MAX_W $AP_INT_MAX_W$ #include "cnpy.h" #include "npy2apintstream.hpp" +#include "npy2vectorstream.hpp" #include #include "bnn-library.h" From 98ac566cb1b97df0ac35a6560881bf87748a045e Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Mon, 12 Aug 2024 07:31:38 +0100 Subject: [PATCH 44/49] softmax: cpp sim stream size check --- src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py index 697bd2cfa6..d759535da8 100644 --- a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py @@ -47,7 +47,6 @@ def global_includes(self): '#include ', '#include "softmax.hpp"', '#include "utils.hpp"', - '#include "sm_utils.hpp"' ] def defines(self, var): @@ -159,8 +158,9 @@ def code_generation_cppsim(self, model): static hls::stream> out_V; npy2vectorstream("{path}/input_0.npy", in0_V); + int stream_size = in0_V.size() - 1; - for (unsigned i = 0; i < 900; i++){{ + while(out_V.size() != stream_size){{ smaxquant(in0_V, out_V); }} From 43b15774d8d980e89d931f163ce761ae17fa8f47 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Mon, 12 Aug 2024 14:20:55 +0100 Subject: [PATCH 45/49] softmax: fix expected stream size --- src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py index d759535da8..19903866b3 100644 --- a/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/quantsoftmax_hls.py @@ -158,7 +158,7 @@ def code_generation_cppsim(self, model): static hls::stream> out_V; npy2vectorstream("{path}/input_0.npy", in0_V); - int stream_size = in0_V.size() - 1; + int stream_size = in0_V.size(); while(out_V.size() != stream_size){{ smaxquant(in0_V, out_V); From 8297bb44aef271ebda24121332d416e2f8c88420 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Mon, 12 Aug 2024 14:21:15 +0100 Subject: [PATCH 46/49] softmax: add debug prints into testbench --- tests/fpgadataflow/test_fpgadataflow_softmax.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py index 358a278bb9..7d5b1b7782 100644 --- a/tests/fpgadataflow/test_fpgadataflow_softmax.py +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -227,7 +227,17 @@ def test_fpga_dataflow_quantsoftmax(impl_style, simd, idt, ifm_dim, channels): # run the model y_hw = oxe.execute_onnx(model, input_t)["global_out"] - assert np.allclose(y_ref, y_hw, atol=5), "Model output does not match expected output" + # loop through the output tensor and compare the values + tollerance = 2 + + # Debug prints to help identify the failing values + for i in range(len(y_ref)): + for j in range(len(y_ref[i])): + for k in range(len(y_ref[i][j])): + for l in range(len(y_ref[i][j][k])): + if np.allclose(y_ref[i][j][k][l], y_hw[i][j][k][l], atol=tollerance) == False: + print(f"| {i},{j},{k},{l:<2} | {y_ref[i][j][k][l]:<4.0f} | {y_hw[i][j][k][l]:<4.0f} | {y_ref[i][j][k][l] - y_hw[i][j][k][l]:<4.0f} |") + except Exception as e: pytest.fail(f"Failed to transform the model: {str(e)}") \ No newline at end of file From abefce6cdf593269ee14f8e561b58fbf9853e1da Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Mon, 12 Aug 2024 15:05:24 +0100 Subject: [PATCH 47/49] softmax: simplify testcase --- .../fpgadataflow/test_fpgadataflow_softmax.py | 51 +++++++++---------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_softmax.py b/tests/fpgadataflow/test_fpgadataflow_softmax.py index 7d5b1b7782..c813bc3ff9 100644 --- a/tests/fpgadataflow/test_fpgadataflow_softmax.py +++ b/tests/fpgadataflow/test_fpgadataflow_softmax.py @@ -100,8 +100,8 @@ def make_single_quantsoftmax_modelwrapper(impl_style="hls", simd=1, idt=DataType Create a single quantized softmax node with variable parameters. this is before SpecializeLayers() transformation. ''' - h = ifm_dim[0] - w = ifm_dim[1] + h = ifm_dim[1] + w = ifm_dim[2] inp = helper.make_tensor_value_info("global_in", TensorProto.FLOAT, [1, h, w, channels]) outp = helper.make_tensor_value_info("global_out", TensorProto.FLOAT, [1, h, w, channels]) @@ -192,19 +192,20 @@ def test_convert_to_hw_softmax_layer(exec_mode, simd): pytest.fail(f"Failed to transform the model: {str(e)}") -@pytest.mark.parametrize("impl_style", ["hls","rtl"]) +@pytest.mark.parametrize("impl_style", ["hls"]) @pytest.mark.parametrize("simd", ["simd1", "simd2", "simd3", "simd4"]) -@pytest.mark.parametrize("idt", [DataType["UINT8"],DataType["INT8"],DataType["INT4"],DataType["UINT4"]]) -@pytest.mark.parametrize("ifm_dim", [(12,12)]) -@pytest.mark.parametrize("channels", [12, 384]) +@pytest.mark.parametrize("idt", ["INT8"]) +@pytest.mark.parametrize("ifm_dim", [(1, 12, 12, 12), (1, 128, 128, 384)]) @pytest.mark.fpgadataflow -def test_fpga_dataflow_quantsoftmax(impl_style, simd, idt, ifm_dim, channels): +def test_fpga_dataflow_quantsoftmax(impl_style, simd, idt, ifm_dim): + idt = DataType[idt] simd = int(simd[-1]) + io_shape = (ifm_dim[0], ifm_dim[1], ifm_dim[2], ifm_dim[3]) + tollerance = 2 + model = make_single_quantsoftmax_modelwrapper(impl_style=impl_style, simd=simd, idt=idt, ifm_dim=ifm_dim, channels=ifm_dim[3]) - model = make_single_quantsoftmax_modelwrapper(impl_style=impl_style, simd=simd, idt=idt, ifm_dim=ifm_dim, channels=channels) - - # Create the qonnx model - io_shape = (1, ifm_dim[0], ifm_dim[1], channels) + if(ifm_dim[3] % 3 != 0): + pytest.skip(f"Skipping this test because the number of channels is not a multiple of {simd}") input = gen_finn_dt_tensor(idt, io_shape) input_t = {"global_in": input} @@ -216,7 +217,7 @@ def test_fpga_dataflow_quantsoftmax(impl_style, simd, idt, ifm_dim, channels): y_ref = y_ref.numpy() y_out = oxe.execute_onnx(model, input_t)["global_out"] - assert np.allclose(y_ref, y_out, atol=5), "Model output does not match expected output" + assert np.allclose(y_ref, y_out, atol=tollerance), "Model output does not match expected output" try: model = model.transform(SpecializeLayers(test_fpga_part)) @@ -224,20 +225,18 @@ def test_fpga_dataflow_quantsoftmax(impl_style, simd, idt, ifm_dim, channels): model = model.transform(SetExecMode("cppsim")) model = model.transform(PrepareCppSim()) model = model.transform(CompileCppSim()) - # run the model - y_hw = oxe.execute_onnx(model, input_t)["global_out"] - - # loop through the output tensor and compare the values - tollerance = 2 + except Exception as e: + pytest.fail(f"Failed to transform the model: {str(e)}") - # Debug prints to help identify the failing values - for i in range(len(y_ref)): - for j in range(len(y_ref[i])): - for k in range(len(y_ref[i][j])): - for l in range(len(y_ref[i][j][k])): - if np.allclose(y_ref[i][j][k][l], y_hw[i][j][k][l], atol=tollerance) == False: - print(f"| {i},{j},{k},{l:<2} | {y_ref[i][j][k][l]:<4.0f} | {y_hw[i][j][k][l]:<4.0f} | {y_ref[i][j][k][l] - y_hw[i][j][k][l]:<4.0f} |") + # run the model + y_hw = oxe.execute_onnx(model, input_t)["global_out"] + # Debug prints to help identify the failing values + for i in range(len(y_ref)): + for j in range(len(y_ref[i])): + for k in range(len(y_ref[i][j])): + for l in range(len(y_ref[i][j][k])): + if np.allclose(y_ref[i][j][k][l], y_hw[i][j][k][l], atol=tollerance) == False: + print(f"| {i},{j},{k},{l:<2} | {y_ref[i][j][k][l]:<4.0f} | {y_hw[i][j][k][l]:<4.0f} | {y_ref[i][j][k][l] - y_hw[i][j][k][l]:<4.0f} |") - except Exception as e: - pytest.fail(f"Failed to transform the model: {str(e)}") \ No newline at end of file + assert np.allclose(y_ref, y_hw, atol=tollerance), "Model output does not match expected output" \ No newline at end of file From 2b026f87b4f634d06d6b487341d4069f8d7b9a7d Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Wed, 14 Aug 2024 10:48:24 +0100 Subject: [PATCH 48/49] softmax: move hls source to custom hls directory --- custom_hls/sm_utils.hpp | 164 ++++++++++++++++++++++++ custom_hls/softmax.hpp | 275 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 439 insertions(+) create mode 100644 custom_hls/sm_utils.hpp create mode 100644 custom_hls/softmax.hpp diff --git a/custom_hls/sm_utils.hpp b/custom_hls/sm_utils.hpp new file mode 100644 index 0000000000..918f8879bf --- /dev/null +++ b/custom_hls/sm_utils.hpp @@ -0,0 +1,164 @@ +// Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. +// +// This file is subject to the Xilinx Design License Agreement located +// in the LICENSE.md file in the root directory of this repository. +// +// This file contains confidential and proprietary information of Xilinx, Inc. +// and is protected under U.S. and international copyright and other +// intellectual property laws. +// +// DISCLAIMER +// This disclaimer is not a license and does not grant any rights to the materials +// distributed herewith. Except as otherwise provided in a valid license issued to +// you by Xilinx, and to the maximum extent permitted by applicable law: (1) THESE +// MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX HEREBY +// DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, +// INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR +// FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether +// in contract or tort, including negligence, or under any other theory of +// liability) for any loss or damage of any kind or nature related to, arising +// under or in connection with these materials, including for any direct, or any +// indirect, special, incidental, or consequential loss or damage (including loss +// of data, profits, goodwill, or any type of loss or damage suffered as a result +// of any action brought by a third party) even if such damage or loss was +// reasonably foreseeable or Xilinx had been advised of the possibility of the +// same. +// +// CRITICAL APPLICATIONS +// Xilinx products are not designed or intended to be fail-safe, or for use in +// any application requiring failsafe performance, such as life-support or safety +// devices or systems, Class III medical devices, nuclear facilities, applications +// related to the deployment of airbags, or any other applications that could lead +// to death, personal injury, or severe property or environmental damage +// (individually and collectively, "Critical Applications"). Customer assumes the +// sole risk and liability of any use of Xilinx products in Critical Applications, +// subject only to applicable laws and regulations governing limitations on product +// liability. +// +// THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT ALL TIMES. +#ifndef SM_UTIL_HPP +#define SM_UTIL_HPP +#include "hls_vector.h" + +//- Compile-Time Functions -------------------------------------------------- + +// ceil(log2(x)) +template +constexpr unsigned clog2(T x) { + return x<2? 0 : 1+clog2((x+1)/2); +} + +//- Streaming Flit with `last` Marking -------------------------------------- +template +struct flit_t { + bool last; + T data; + +public: + flit_t(bool last_, T const &data_) : last(last_), data(data_) {} + ~flit_t() {} +}; + +//- Streaming Copy ---------------------------------------------------------- +template +void move(hls::stream &src, hls::stream &dst) { +#pragma HLS pipeline II=1 style=flp + if(!src.empty()) dst.write(src.read()); +} + +//- Tree Reduce ------------------------------------------------------------- +template< unsigned long N, typename TA, typename TR = TA, typename F > +TR tree_reduce(hls::stream &v, F f) { +#pragma HLS inline +#pragma HLS function_instantiate variable=f + TR tree[2*N-1]; +#pragma HLS array_partition complete dim=1 variable=tree + for(unsigned i = N; i-- > 0;) { +#pragma HLS unroll + tree[N-1 + i] = v.read(); + } + for(unsigned i = N-1; i-- > 0;) { +#pragma HLS unroll + tree[i] = f(tree[2*i+1], tree[2*i+2]); + } + return tree[0]; +} + +// Recursive comparison and count (of max) +// Builds a tree to compute the max of a vector +template +struct MaxReduction { + + static T max(const hls::vector& input) { +#pragma HLS INLINE + constexpr unsigned M = (N + 1) / 2; + hls::vector res; + + for(unsigned i = 0; i < M; ++i) { +#pragma HLS unroll + if (2*i + 1 < N) + res[i] = input[2*i] > input[2*i + 1] ? input[2*i] : input[2*i + 1]; + else + res[i] = input[2*i]; // Handle the case where the input size is odd + } + + return MaxReduction::max(res); + } + +}; + +template +struct MaxReduction<2, T> { + static T max(const hls::vector& input) { +#pragma HLS INLINE + return (input[0] > input[1]) ? input[0] : input[1]; + } +}; + +template +struct MaxReduction<1, T> { + static T max(const hls::vector& input) { +#pragma HLS INLINE + return input[0]; + } +}; + +// Recursive reduction tree for the total summation +// Code for the Nth stage +template +struct TreeReduction { + static float reduce(const hls::vector& input) { +#pragma HLS INLINE + constexpr unsigned M = (N + 1) / 2; + hls::vector sum; + + for(unsigned i = 0; i < M; ++i) { +#pragma HLS unroll + if (2*i + 1 < N) + sum[i] = input[2*i] + input[2*i + 1]; + else + sum[i] = input[2*i]; // Handle the case where the input size is odd + } + + return TreeReduction::reduce(sum); + } +}; + +template<> +struct TreeReduction<2> { + static float reduce(const hls::vector& input) { +#pragma HLS INLINE + return input[0] + input[1]; + } +}; + +template<> +struct TreeReduction<1> { + static float reduce(const hls::vector& input) { +#pragma HLS INLINE + return input[0]; + } +}; + + +#endif \ No newline at end of file diff --git a/custom_hls/softmax.hpp b/custom_hls/softmax.hpp new file mode 100644 index 0000000000..61d8bab0e2 --- /dev/null +++ b/custom_hls/softmax.hpp @@ -0,0 +1,275 @@ +// Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. +// +// This file is subject to the Xilinx Design License Agreement located +// in the LICENSE.md file in the root directory of this repository. +// +// This file contains confidential and proprietary information of Xilinx, Inc. +// and is protected under U.S. and international copyright and other +// intellectual property laws. +// +// DISCLAIMER +// This disclaimer is not a license and does not grant any rights to the materials +// distributed herewith. Except as otherwise provided in a valid license issued to +// you by Xilinx, and to the maximum extent permitted by applicable law: (1) THESE +// MATERIALS ARE MADE AVAILABLE "AS IS" AND WITH ALL FAULTS, AND XILINX HEREBY +// DISCLAIMS ALL WARRANTIES AND CONDITIONS, EXPRESS, IMPLIED, OR STATUTORY, +// INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR +// FITNESS FOR ANY PARTICULAR PURPOSE; and (2) Xilinx shall not be liable (whether +// in contract or tort, including negligence, or under any other theory of +// liability) for any loss or damage of any kind or nature related to, arising +// under or in connection with these materials, including for any direct, or any +// indirect, special, incidental, or consequential loss or damage (including loss +// of data, profits, goodwill, or any type of loss or damage suffered as a result +// of any action brought by a third party) even if such damage or loss was +// reasonably foreseeable or Xilinx had been advised of the possibility of the +// same. +// +// CRITICAL APPLICATIONS +// Xilinx products are not designed or intended to be fail-safe, or for use in +// any application requiring failsafe performance, such as life-support or safety +// devices or systems, Class III medical devices, nuclear facilities, applications +// related to the deployment of airbags, or any other applications that could lead +// to death, personal injury, or severe property or environmental damage +// (individually and collectively, "Critical Applications"). Customer assumes the +// sole risk and liability of any use of Xilinx products in Critical Applications, +// subject only to applicable laws and regulations governing limitations on product +// liability. +// +// THIS COPYRIGHT NOTICE AND DISCLAIMER MUST BE RETAINED AS PART OF THIS FILE AT ALL TIMES. + +#include +#include +#include +#include +#include +#include +#include +#include "sm_utils.hpp" + +// First stage of the pipeline: +// +// Trigger: When a vector of SIMD elements is present in the stream +// +// Desc: Pass over the input N items and calc the max value +template +void max_calc_stage( + hls::stream> &ins, + hls::stream> &outs, + hls::stream &maxs +) { +#pragma HLS pipeline II=1 style=flp + static ap_uint count = 0; + static T max = 0; +#pragma HLS reset variable=count +#pragma HLS reset variable=max + + if(!ins.empty()){ + hls::vector out; + hls::vector max_v; + hls::vector const in = ins.read(); + + + for(unsigned i=0; i::max(max_v); + + count++; + if (count == (N/SIMD)-1) { + count = 0; + maxs.write(max); + max = 0; + } + } +} + + +// Second stage of the pipeline +// +// Trigger: When a max value is sent from the preceeding stage +// +// Desc: For each item in a N item sequence calc the (exp - max) in float +// track the sum while processing the N items. +template +void exp_sum_calc( + hls::stream> &ins, + hls::stream &maxs, + hls::stream> &outs, + hls::stream &sums +){ +#pragma HLS pipeline II=1 style=flp + static ap_uint count = 0; + static float sum = 0.0f; + static bool valid = false; + static float max = 0.0f; +#pragma HLS reset variable=count +#pragma HLS reset variable=sum +#pragma HLS reset variable=valid +#pragma HLS reset variable=max + + if (count == (N/SIMD)) { + count = 0; + valid = false; + sums.write(sum); + sum = 0.0f; + return; + } + + if(valid && !ins.empty()) { + hls::vector const in = ins.read(); + hls::vector out; + for (unsigned i=0; i::reduce(out); + outs.write(out); + + count++; + } + + if (!maxs.empty() && !valid) { + max = maxs.read(); + valid = true; + } + +} + +// Third stage of the pipeline +// +// Trigger: When a sum value is sent from the preceeding stage +// +// Desc: For the N items take the input and divide it by the sum +template +void div_calc( + hls::stream> &ins, + hls::stream &sums, + hls::stream> &outs +){ +#pragma HLS pipeline II=1 style=flp + static ap_uint count = 0; + static bool valid = false; + static float sum = 0.0f; +#pragma HLS reset variable=count +#pragma HLS reset variable=valid +#pragma HLS reset variable=sum + + if (count == (N/SIMD)) { + count = 0; + valid = false; + return; + } + + if (valid && !ins.empty()) { + hls::vector const in = ins.read(); + hls::vector out; + for(unsigned i=0; i +void smax( + hls::stream> &src, + hls::stream> &dst +) { +#pragma HLS dataflow disable_start_propagation + static_assert(N%SIMD == 0, "N must be a multiple of SIMD"); + + static hls::stream> max_data_s; +#pragma HLS stream variable=max_data_s depth=N + static hls::stream max_s; +#pragma HLS stream variable=max_s depth=2 + + static hls::stream> exp_data_s; +#pragma HLS stream variable=exp_data_s depth=N + static hls::stream sum_s; +#pragma HLS stream variable=sum_s depth=2 + + max_calc_stage(src, max_data_s, max_s); + exp_sum_calc(max_data_s, max_s, exp_data_s, sum_s); + div_calc(exp_data_s, sum_s, dst); + +} // smax() + +// Threshold/quantisation at the output of the softmax +template< + typename T, // The quantised output type (Needs to be signed) + typename F // The float based input type +> +T quant_threshold(F val) { +#pragma HLS INLINE + if(val>=1.0f) + return T((~unsigned(0)) >> 1); + + constexpr unsigned N_fracbits = (sizeof(T)*CHAR_BIT); + + ap_fixed fixed_point_val = val; + T frac_val = fixed_point_val.range(N_fracbits - 2, 0); + return frac_val; +} + +// Quantisation pipeline stage +// +// Trigger: When a SIMD vector is received from the preceeding stage +// +// Desc: Apply quantisation to the SIMD elements and write them into the +// SIMD width output stream. +template< + unsigned N, + unsigned SIMD, + typename T +> +void quant_stage( + hls::stream> &in, + hls::stream> &out +) { +#pragma HLS pipeline II=1 style=flp + if(!in.empty()) { + hls::vector const x = in.read(); + hls::vector y; + for(unsigned i=0; i(x[i]); + } + out.write(y); + } +} + +// Quantised version of softmax +// This is the same as the float softmax with an additional baked in quantisation stage at the end +template< + unsigned N, // The width of the input dimension + unsigned SIMD, // Amount of parallelism (how many items consumed/produced at a time + typename T + > +void smaxquant( + hls::stream> &src, + hls::stream> &dst +) { +#pragma HLS DATAFLOW disable_start_propagation + hls::stream> smax_out; +#pragma HLS stream variable=smax_out depth=2 + static_assert(N%SIMD == 0, "SIMD must be a factor of N"); + + smax(src, smax_out); + quant_stage(smax_out, dst); + +} // smaxquant() From d629093023f93c3ca42fc41788e31b48cfb90c23 Mon Sep 17 00:00:00 2001 From: aziz bahri Date: Wed, 14 Aug 2024 15:37:56 +0100 Subject: [PATCH 49/49] softmax: quantization fix --- custom_hls/softmax.hpp | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/custom_hls/softmax.hpp b/custom_hls/softmax.hpp index 61d8bab0e2..9452045a77 100644 --- a/custom_hls/softmax.hpp +++ b/custom_hls/softmax.hpp @@ -44,6 +44,7 @@ #include #include #include +#include #include "sm_utils.hpp" // First stage of the pipeline: @@ -211,20 +212,29 @@ void smax( // Threshold/quantisation at the output of the softmax template< - typename T, // The quantised output type (Needs to be signed) - typename F // The float based input type + typename T, // The quantised output type (Needs to be signed) + typename TF // The float based input type > -T quant_threshold(F val) { +T quant_threshold(TF val) { #pragma HLS INLINE - if(val>=1.0f) - return T((~unsigned(0)) >> 1); + constexpr unsigned numBits = sizeof(T)*CHAR_BIT; + if(val>=1.0f){ + T frac_val = ~T(0); + if(std::is_signed::value) { + return frac_val; + } else { + T mask = ~(T(1) << (numBits - 1)); + return frac_val & mask; + } + } + + + ap_fixed fixed_point_val = val; + T frac_val = fixed_point_val.range(numBits - 2, 0); + return frac_val; +} - constexpr unsigned N_fracbits = (sizeof(T)*CHAR_BIT); - ap_fixed fixed_point_val = val; - T frac_val = fixed_point_val.range(N_fracbits - 2, 0); - return frac_val; -} // Quantisation pipeline stage //