diff --git a/docs/user_guide/advanced_usage.rst b/docs/user_guide/advanced_usage.rst index d05a1921..792dc80f 100644 --- a/docs/user_guide/advanced_usage.rst +++ b/docs/user_guide/advanced_usage.rst @@ -598,3 +598,16 @@ These weights can be quantized to 8bit to reduce the size to a quarter, whereas For example, the top-1 accuracy of MobileNetV1 after quantization of weights is 68.2% on the ImageNet validation set. ``quantize_large_weights`` can be specified as 1 in the deployment file to save these weights in 8bit and actual inference in float. It can be used for both CPU and GPU. + +Build with rpcmem +------------------- +For Qualcomm-based devices, it's possible to build the models and the ``mace_run`` executable with ```rpcmem`` support: + +.. code-block:: sh + + python tools/converter.py convert --config=/path/to/model_deployment_file.yml --enable_rpcmem + python tools/converter.py run --config=/path/to/model_deployment_file.yml --enable_rpcmem + +For deployment, make sure the use of ``rpcmem`` is consisten between ``libmace.a`` and built models. Both have to be built +with or without ``rpcmem`` support. For details about how to built ``libmace.a`` with ``rpcmem`` support see +`here `. diff --git a/docs/user_guide/advanced_usage_cmake.rst b/docs/user_guide/advanced_usage_cmake.rst index 62e5f555..cbc15dd5 100644 --- a/docs/user_guide/advanced_usage_cmake.rst +++ b/docs/user_guide/advanced_usage_cmake.rst @@ -175,7 +175,7 @@ After that you can rebuild the engine. .. code-block:: bash - RUNTIME=GPU RUNMODE=code QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh + RUNTIME=GPU RUNMODE=code QUANTIZE=OFF RPCMEM=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh ``RUNMODE=code`` means you compile and link model library with MACE engine. diff --git a/docs/user_guide/basic_usage.rst b/docs/user_guide/basic_usage.rst index c9b19cdc..67807d11 100644 --- a/docs/user_guide/basic_usage.rst +++ b/docs/user_guide/basic_usage.rst @@ -45,7 +45,7 @@ Here we use the mobilenet-v2 model as an example. cd path/to/mace # Build library # output lib path: build/lib - bash tools/bazel_build_standalone_lib.sh [-abi=abi][-runtimes=rt1,rt2,...][-quantize][-static] + bash tools/bazel_build_standalone_lib.sh [-abi=abi][-runtimes=rt1,rt2,...][-quantize][-static][-rpcmem] .. note:: @@ -53,6 +53,7 @@ Here we use the mobilenet-v2 model as an example. - Use the `-abi` parameter to specify the ABI. Supported ABIs are armeabi-v7a, arm64-v8a, arm_linux_gnueabihf, aarch64_linux_gnu and host (for host machine, linux-x86-64). The default ABI is arm64-v8a. - For each ABI, several runtimes can be chosen by specifying the `-runtimes` parameter. Supported runtimes are CPU, GPU, DSP and APU. By default, the library is built to run on CPU. - Omit the `-static` option if a shared library is desired instead of a static one. By default, a shared library is built. + - Omit the `-rpcmem` option if your target device chipset is not manufactured by Qualcomm. - See 'bash tools/bazel_build_standalone_lib.sh -help' for detailed information. - DO respect the hyphens ('-') and the underscores ('_') in the ABI. @@ -189,7 +190,7 @@ Or use bazel to build MACE source code into a library. cd path/to/mace # Build library # output lib path: build/lib - bash tools/bazel_build_standalone_lib.sh [-abi=abi][-runtimes=rt1,rt2,...][-static] + bash tools/bazel_build_standalone_lib.sh [-abi=abi][-runtimes=rt1,rt2,...][-static][-rpcmem] The above command will generate static library ``build/lib/libmace.a`` dynamic library ``build/lib/libmace.so``. diff --git a/docs/user_guide/basic_usage_cmake.rst b/docs/user_guide/basic_usage_cmake.rst index 0f33a80e..314a82a2 100644 --- a/docs/user_guide/basic_usage_cmake.rst +++ b/docs/user_guide/basic_usage_cmake.rst @@ -20,13 +20,15 @@ Please make sure you have CMake installed. .. code-block:: sh - RUNTIME=GPU QUANTIZE=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh + RUNTIME=GPU QUANTIZE=OFF RPCMEM=OFF bash tools/cmake/cmake-build-armeabi-v7a.sh which generate libraries in ``build/cmake-build/armeabi-v7a``, you can use either static libraries or the ``libmace.so`` shared library. You can also build for other target abis: ``arm64-v8a``, ``arm-linux-gnueabihf``, ``aarch64-linux-gnu``, ``host``; and runtime: ``GPU``, ``HEXAGON``, ``HTA``, ``APU``. +For Qualcomm devices, it is possible to set ``RPCMEM=ON`` to enable the use of specific memory optimizations. + Model Conversion ------------------------------- diff --git a/tools/bazel_adb_run.py b/tools/bazel_adb_run.py index 857ab2cf..fba54321 100644 --- a/tools/bazel_adb_run.py +++ b/tools/bazel_adb_run.py @@ -108,7 +108,7 @@ def parse_args(): parser.add_argument( "--enable_rpcmem", type=str2bool, - default=True, + default=False, help="Whether to use rpcmem") parser.add_argument( "--enable_hta", diff --git a/tools/bazel_build_standalone_lib.sh b/tools/bazel_build_standalone_lib.sh index f397d819..7f3de374 100755 --- a/tools/bazel_build_standalone_lib.sh +++ b/tools/bazel_build_standalone_lib.sh @@ -42,7 +42,7 @@ enable_dsp=false enable_apu=false enable_quantize=false enable_bfloat16=false -enable_rpcmem=true +enable_rpcmem=false static_lib=false symbol_hidden= runtime_label="cpu" @@ -103,6 +103,9 @@ for opt in "${@}";do bfloat16|-bfloat16|--bfloat16) enable_bfloat16=true ;; + rpcmem|-rpcmem|--rpcmem) + enable_rpcmem=true + ;; help|-help|--help) helper ;; @@ -115,10 +118,6 @@ $(echo "$1" | cut -d '=' -f -1)" esac done -if [[ "${enable_apu}" == true || ("${abi}" != armeabi-v7a && "${abi}" != arm64-v8a) ]];then - enable_rpcmem=false -fi - if [[ "${static_lib}" == true ]];then lib_type=static lib_label=static diff --git a/tools/cmake/cmake-build-arm64-v8a.sh b/tools/cmake/cmake-build-arm64-v8a.sh index 78a6fe1d..ecb67d14 100755 --- a/tools/cmake/cmake-build-arm64-v8a.sh +++ b/tools/cmake/cmake-build-arm64-v8a.sh @@ -37,6 +37,11 @@ if [[ "$BFLOAT16" == "ON" ]]; then DMACE_ENABLE_BFLOAT16=ON fi +MACE_ENABLE_RPCMEM=OFF +if [[ "$RPCMEM" == "ON" ]]; then + MACE_ENABLE_RPCMEM=ON +fi + mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} cmake -DANDROID_ABI="arm64-v8a" \ -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake \ @@ -56,7 +61,7 @@ cmake -DANDROID_ABI="arm64-v8a" \ -DMACE_ENABLE_TESTS=ON \ -DMACE_ENABLE_BENCHMARKS=ON \ -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \ - -DMACE_ENABLE_RPCMEM=ON \ + -DMACE_ENABLE_RPCMEM=${MACE_ENABLE_RPCMEM} \ -DCMAKE_INSTALL_PREFIX=install \ ../../.. make -j$(nproc) VERBOSE=1 && make install diff --git a/tools/cmake/cmake-build-armeabi-v7a.sh b/tools/cmake/cmake-build-armeabi-v7a.sh index 760901f7..df22a608 100755 --- a/tools/cmake/cmake-build-armeabi-v7a.sh +++ b/tools/cmake/cmake-build-armeabi-v7a.sh @@ -37,6 +37,11 @@ if [[ "$BFLOAT16" == "ON" ]]; then DMACE_ENABLE_BFLOAT16=ON fi +MACE_ENABLE_RPCMEM=OFF +if [[ "$RPCMEM" == "ON" ]]; then + MACE_ENABLE_RPCMEM=ON +fi + mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} cmake -DANDROID_ABI="armeabi-v7a" \ -DANDROID_ARM_NEON=ON \ @@ -56,7 +61,7 @@ cmake -DANDROID_ABI="armeabi-v7a" \ -DMACE_ENABLE_TESTS=ON \ -DMACE_ENABLE_BENCHMARKS=ON \ -DMACE_ENABLE_CODE_MODE=${MACE_ENABLE_CODE_MODE} \ - -DMACE_ENABLE_RPCMEM=ON \ + -DMACE_ENABLE_RPCMEM=${MACE_ENABLE_RPCMEM} \ -DCMAKE_INSTALL_PREFIX=install \ ../../.. make -j$(nproc) VERBOSE=1 && make install diff --git a/tools/converter.py b/tools/converter.py index 33107acb..7584007a 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -753,7 +753,7 @@ def print_configuration(configs): MaceLogger.summary(StringFormatter.table(header, data, title)) -def build_model_lib(configs, address_sanitizer, debug_mode): +def build_model_lib(configs, address_sanitizer, use_rpc_mem, debug_mode): MaceLogger.header(StringFormatter.block("Building model library")) # create model library dir @@ -776,6 +776,7 @@ def build_model_lib(configs, address_sanitizer, debug_mode): enable_quantize=quantize_enabled(configs), enable_bfloat16=bfloat16_enabled(configs), enable_fp16=fp16_enabled(configs), + enable_rpcmem=use_rpc_mem, address_sanitizer=address_sanitizer, symbol_hidden=get_symbol_hidden_mode(debug_mode), debug_mode=debug_mode @@ -901,7 +902,10 @@ def convert_func(flags): StringFormatter.block("Model %s converted" % model_name)) if model_graph_format == ModelFormat.code: - build_model_lib(configs, flags.address_sanitizer, flags.debug_mode) + build_model_lib(configs, + flags.address_sanitizer, + flags.use_rpc_mem, + flags.debug_mode) print_library_summary(configs) @@ -910,7 +914,7 @@ def convert_func(flags): # run ################################ def build_mace_run(configs, target_abi, toolchain, address_sanitizer, - mace_lib_type, debug_mode, device): + mace_lib_type, use_rpc_mem, debug_mode, device): library_name = configs[YAMLKeyword.library_name] build_tmp_binary_dir = get_build_binary_dir(library_name, target_abi) @@ -941,6 +945,7 @@ def build_mace_run(configs, target_abi, toolchain, address_sanitizer, enable_quantize=quantize_enabled(configs), enable_bfloat16=bfloat16_enabled(configs), enable_fp16=fp16_enabled(configs), + enable_rpcmem=use_rpc_mem, address_sanitizer=address_sanitizer, symbol_hidden=get_symbol_hidden_mode(debug_mode, mace_lib_type), debug_mode=debug_mode, @@ -987,6 +992,7 @@ def run_mace(flags): toolchain, flags.address_sanitizer, flags.mace_lib_type, + flags.use_rpc_mem, flags.debug_mode, device) # run @@ -1065,6 +1071,10 @@ def parse_args(): type=str, default="", help="Target SOCs, comma seperated list.") + all_type_parent_parser.add_argument( + "--use_rpc_mem", + action="store_true", + help="Enable rpc_mem optimizations.") all_type_parent_parser.add_argument( "--debug_mode", action="store_true", diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 79419202..20771bde 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -283,7 +283,7 @@ def bazel_build(target, enable_quantize=True, enable_bfloat16=False, enable_fp16=False, - enable_rpcmem=True, + enable_rpcmem=False, address_sanitizer=False, symbol_hidden=True, debug_mode=False,