diff --git a/CMakeLists.txt b/CMakeLists.txt index d4240cc753cc..2dd1695ba311 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,13 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -cmake_minimum_required(VERSION 3.16.3) - -# Do not set MSVC warning flags like /W3 by default (since 3.15): -# https://cmake.org/cmake/help/v3.15/policy/CMP0092.html -if(POLICY CMP0092) - cmake_policy(SET CMP0092 NEW) -endif() +cmake_minimum_required(VERSION 3.16.3...3.21) # LLVM requires CMP0116 for tblgen: https://reviews.llvm.org/D101083 # CMP0116: Ninja generators transform `DEPFILE`s from `add_custom_command()` @@ -37,6 +31,7 @@ endif() option(IREE_ENABLE_RUNTIME_TRACING "Enables instrumented runtime tracing." OFF) option(IREE_ENABLE_COMPILER_TRACING "Enables instrumented compiler tracing." OFF) +option(IREE_ENABLE_THREADING "Builds IREE in with thread library support." ON) option(IREE_BUILD_COMPILER "Builds the IREE compiler." ON) option(IREE_BUILD_TESTS "Builds IREE unit tests." ON) @@ -46,10 +41,9 @@ option(IREE_BUILD_SAMPLES "Builds IREE sample projects." ON) option(IREE_BUILD_TRACY "Builds tracy server tools." OFF) option(IREE_BUILD_TENSORFLOW_ALL "Builds all TensorFlow compiler frontends." OFF) -option(IREE_BUILD_TENSORFLOW_COMPILER "Builds TensorFlow compiler frontend." OFF) -option(IREE_BUILD_TFLITE_COMPILER "Builds the TFLite compiler frontend." OFF) -option(IREE_BUILD_XLA_COMPILER "Builds TensorFlow XLA compiler frontend." OFF) -option(IREE_ENABLE_THREADING "Builds IREE in with thread library support." ON) +option(IREE_BUILD_TENSORFLOW_COMPILER "Builds TensorFlow compiler frontend." "${IREE_BUILD_TENSORFLOW_ALL}") +option(IREE_BUILD_TFLITE_COMPILER "Builds the TFLite compiler frontend." "${IREE_BUILD_TENSORFLOW_ALL}") +option(IREE_BUILD_XLA_COMPILER "Builds TensorFlow XLA compiler frontend." "${IREE_BUILD_TENSORFLOW_ALL}") set(IREE_HAL_DRIVERS_TO_BUILD "all" CACHE STRING "Semicolon-separated list of HAL drivers to build, or \"all\".") @@ -72,15 +66,12 @@ if(${IREE_BUILD_TENSORFLOW_ALL} OR set(IREE_ENABLE_TENSORFLOW ON) endif() + option(IREE_BUILD_BINDINGS_TFLITE "Builds the IREE TFLite C API compatibility shim" ON) option(IREE_BUILD_BINDINGS_TFLITE_JAVA "Builds the IREE TFLite Java bindings with the C API compatibility shim" ON) # Default python bindings to enabled for some features. -if(${IREE_ENABLE_TENSORFLOW}) - option(IREE_BUILD_PYTHON_BINDINGS "Builds the IREE python bindings" ON) -else() - option(IREE_BUILD_PYTHON_BINDINGS "Builds the IREE python bindings" OFF) -endif() +option(IREE_BUILD_PYTHON_BINDINGS "Builds the IREE python bindings" "${IREE_ENABLE_TENSORFLOW}") #------------------------------------------------------------------------------- # Experimental project flags @@ -440,17 +431,17 @@ if(IREE_ENABLE_THREADING) add_subdirectory(third_party/cpuinfo EXCLUDE_FROM_ALL) endif() -iree_set_flatcc_cmake_options() add_subdirectory(build_tools/third_party/flatcc EXCLUDE_FROM_ALL) -add_subdirectory(third_party/flatcc EXCLUDE_FROM_ALL) add_subdirectory(third_party/vulkan_headers EXCLUDE_FROM_ALL) # TODO(scotttodd): Iterate some more and find a better place for this. if (NOT CMAKE_CROSSCOMPILING) - install(TARGETS iree-flatcc-cli - COMPONENT iree-flatcc-cli - RUNTIME DESTINATION bin) + install( + TARGETS iree-flatcc-cli + COMPONENT iree-flatcc-cli + RUNTIME DESTINATION bin + ) endif() if(IREE_BUILD_COMPILER) diff --git a/benchmarks/TFLite/CMakeLists.txt b/benchmarks/TFLite/CMakeLists.txt index e78feaff8524..bfa27c357932 100644 --- a/benchmarks/TFLite/CMakeLists.txt +++ b/benchmarks/TFLite/CMakeLists.txt @@ -7,37 +7,52 @@ ################################################################################ # # -# Benchmark models for Tosa # +# Benchmark models from TFLite # # # -# Each module specification should be a list that contains the following # -# fields: MODULE_NAME, MODULE_TAGS, MLIR_SOURCE, ENTRY_FUNCTION, # -# FUNCTION_INPUTS. See iree_mlir_benchmark_suite definition for details about # -# these fields. # +# Each module specification should be a list containing alternating keys and # +# values. The fields are: NAME, TAGS, MLIR_SOURCE, ENTRY_FUNCTION, and # +# FUNCTION_INPUTS. See the iree_mlir_benchmark_suite definition for details # +# about these fields. # # # ################################################################################ set(DEEPLABV3_FP32_MODULE - "DeepLabV3" # MODULE_NAME - "fp32" # MODULE_TAGS - "https://storage.googleapis.com/iree-model-artifacts/DeepLabV3-2bcafb1.tar.gz" # MLIR_SOURCE - "main" # ENTRY_FUNCTION - "1x257x257x3xf32" # FUNCTION_INPUTS + NAME + "DeepLabV3" + TAGS + "fp32" + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/DeepLabV3-2bcafb1.tar.gz" + ENTRY_FUNCTION + "main" + FUNCTION_INPUTS + "1x257x257x3xf32" ) set(MOBILESSD_FP32_MODULE - "MobileSSD" # MODULE_NAME - "fp32" # MODULE_TAGS - "https://storage.googleapis.com/iree-model-artifacts/MobileSSD-2bcafb1.tar.gz" # MLIR_SOURCE - "main" # ENTRY_FUNCTION - "1x320x320x3xf32" # FUNCTION_INPUTS + NAME + "MobileSSD" + TAGS + "fp32" + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/MobileSSD-2bcafb1.tar.gz" + ENTRY_FUNCTION + "main" + FUNCTION_INPUTS + "1x320x320x3xf32" ) set(POSENET_FP32_MODULE - "PoseNet" # MODULE_NAME - "fp32" # MODULE_TAGS - "https://storage.googleapis.com/iree-model-artifacts/PoseNet-2bcafb1.tar.gz" # MLIR_SOURCE - "main" # ENTRY_FUNCTION - "1x353x257x3xf32" # FUNCTION_INPUTS + NAME + "PoseNet" + TAGS + "fp32" + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/PoseNet-2bcafb1.tar.gz" + ENTRY_FUNCTION + "main" + FUNCTION_INPUTS + "1x353x257x3xf32" ) ################################################################################ @@ -53,9 +68,9 @@ set(POSENET_FP32_MODULE # CPU, Dylib-Sync, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${DEEPLABV3_FP32_MODULE} - ${MOBILESSD_FP32_MODULE} - ${POSENET_FP32_MODULE} + "${DEEPLABV3_FP32_MODULE}" + "${MOBILESSD_FP32_MODULE}" + "${POSENET_FP32_MODULE}" BENCHMARK_MODES "big-core,full-inference" @@ -77,9 +92,9 @@ iree_mlir_benchmark_suite( # CPU, Dylib, 1-thread, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${DEEPLABV3_FP32_MODULE} - ${MOBILESSD_FP32_MODULE} - ${POSENET_FP32_MODULE} + "${DEEPLABV3_FP32_MODULE}" + "${MOBILESSD_FP32_MODULE}" + "${POSENET_FP32_MODULE}" BENCHMARK_MODES "1-thread,big-core,full-inference" @@ -102,9 +117,9 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Adreno, full-inference iree_mlir_benchmark_suite( MODULES - ${DEEPLABV3_FP32_MODULE} - ${MOBILESSD_FP32_MODULE} - ${POSENET_FP32_MODULE} + "${DEEPLABV3_FP32_MODULE}" + "${MOBILESSD_FP32_MODULE}" + "${POSENET_FP32_MODULE}" BENCHMARK_MODES "full-inference" @@ -124,9 +139,9 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Mali, full-inference iree_mlir_benchmark_suite( MODULES - ${DEEPLABV3_FP32_MODULE} - ${MOBILESSD_FP32_MODULE} - ${POSENET_FP32_MODULE} + "${DEEPLABV3_FP32_MODULE}" + "${MOBILESSD_FP32_MODULE}" + "${POSENET_FP32_MODULE}" BENCHMARK_MODES "full-inference" @@ -146,9 +161,9 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Mali, kernel-execution iree_mlir_benchmark_suite( MODULES - ${DEEPLABV3_FP32_MODULE} - ${MOBILESSD_FP32_MODULE} - ${POSENET_FP32_MODULE} + "${DEEPLABV3_FP32_MODULE}" + "${MOBILESSD_FP32_MODULE}" + "${POSENET_FP32_MODULE}" BENCHMARK_MODES "kernel-execution" diff --git a/benchmarks/TensorFlow/CMakeLists.txt b/benchmarks/TensorFlow/CMakeLists.txt index 48c79b3ebdb6..827b11b41474 100644 --- a/benchmarks/TensorFlow/CMakeLists.txt +++ b/benchmarks/TensorFlow/CMakeLists.txt @@ -7,49 +7,69 @@ ################################################################################ # # -# Benchmark models # +# Benchmark models from TensorFlow # # # -# Each module specification should be a list that contains the following # -# fields: MODULE_NAME, MODULE_TAGS, MLIR_SOURCE, ENTRY_FUNCTION, # -# FUNCTION_INPUTS. See iree_mlir_benchmark_suite definition for details about # -# these fields. # +# Each module specification should be a list containing alternating keys and # +# values. The fields are: NAME, TAGS, MLIR_SOURCE, ENTRY_FUNCTION, and # +# FUNCTION_INPUTS. See the iree_mlir_benchmark_suite definition for details # +# about these fields. # # # ################################################################################ set(MOBILEBERT_FP16_MODULE - "MobileBertSquad" # MODULE_NAME - "fp16" # MODULE_TAGS + NAME + "MobileBertSquad" + TAGS + "fp16" # This uses the same input MLIR source as fp32 to save download time. # It requires users to have "--iree-flow-demote-f32-to-f16". - "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-89edfa50d.tar.gz" # MLIR_SOURCE - "serving_default" # ENTRY_FUNCTION + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-89edfa50d.tar.gz" + ENTRY_FUNCTION + "serving_default" # The conversion done by "--iree-flow-demote-f32-to-f16" won't change the # original input signature. - "1x384xi32,1x384xi32,1x384xi32" # FUNCTION_INPUTS + FUNCTION_INPUTS + "1x384xi32,1x384xi32,1x384xi32" ) set(MOBILEBERT_FP32_MODULE - "MobileBertSquad" # MODULE_NAME - "fp32" # MODULE_TAGS - "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-89edfa50d.tar.gz" # MLIR_SOURCE - "serving_default" # ENTRY_FUNCTION - "1x384xi32,1x384xi32,1x384xi32" # FUNCTION_INPUTS + NAME + "MobileBertSquad" + TAGS + "fp32" + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-89edfa50d.tar.gz" + ENTRY_FUNCTION + "serving_default" + FUNCTION_INPUTS + "1x384xi32,1x384xi32,1x384xi32" ) set(MOBILENET_V2_MODULE - "MobileNetV2" # MODULE_NAME - "fp32,imagenet" # MODULE_TAGS - "https://storage.googleapis.com/iree-model-artifacts/MobileNetV2-89edfa50d.tar.gz" # MLIR_SOURCE - "call" # ENTRY_FUNCTION - "1x224x224x3xf32" # FUNCTION_INPUTS + NAME + "MobileNetV2" + TAGS + "fp32,imagenet" + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/MobileNetV2-89edfa50d.tar.gz" + ENTRY_FUNCTION + "call" + FUNCTION_INPUTS + "1x224x224x3xf32" ) set(MOBILENET_V3SMALL_MODULE - "MobileNetV3Small" # MODULE_NAME - "fp32,imagenet" # MODULE_TAGS - "https://storage.googleapis.com/iree-model-artifacts/MobileNetV3Small-89edfa50d.tar.gz" # MLIR_SOURCE - "call" # ENTRY_FUNCTION - "1x224x224x3xf32" # FUNCTION_INPUTS + NAME + "MobileNetV3Small" + TAGS + "fp32,imagenet" + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/MobileNetV3Small-89edfa50d.tar.gz" + ENTRY_FUNCTION + "call" + FUNCTION_INPUTS + "1x224x224x3xf32" ) ################################################################################ @@ -65,8 +85,8 @@ set(MOBILENET_V3SMALL_MODULE # CPU, VMVX, 3-thread, little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "3-thread,little-core,full-inference" @@ -86,8 +106,8 @@ iree_mlir_benchmark_suite( # CPU, Dylib-Sync, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "big-core,full-inference" @@ -109,8 +129,8 @@ iree_mlir_benchmark_suite( # CPU, Dylib, 1-thread, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "1-thread,big-core,full-inference" @@ -133,8 +153,8 @@ iree_mlir_benchmark_suite( # CPU, Dylib, 3-thread, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "3-thread,big-core,full-inference" @@ -157,9 +177,9 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Adreno, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP32_MODULE} - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILEBERT_FP32_MODULE}" + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "full-inference" @@ -179,8 +199,8 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Adreno, kernel-execution iree_mlir_benchmark_suite( MODULES - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "kernel-execution" @@ -203,9 +223,9 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Mali, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP32_MODULE} - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILEBERT_FP32_MODULE}" + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "full-inference" @@ -225,8 +245,8 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Mali, kernel-execution iree_mlir_benchmark_suite( MODULES - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "kernel-execution" @@ -249,7 +269,7 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Mali, kernel-execution iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP16_MODULE} + "${MOBILEBERT_FP16_MODULE}" BENCHMARK_MODES "kernel-execution" @@ -273,7 +293,7 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Mali, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP16_MODULE} + "${MOBILEBERT_FP16_MODULE}" BENCHMARK_MODES "full-inference" @@ -303,7 +323,7 @@ iree_mlir_benchmark_suite( # CPU, Dylib-Sync, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP32_MODULE} + "${MOBILEBERT_FP32_MODULE}" BENCHMARK_MODES "big-core,full-inference" @@ -325,7 +345,7 @@ iree_mlir_benchmark_suite( # CPU, Dylib, 1-thread, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP32_MODULE} + "${MOBILEBERT_FP32_MODULE}" BENCHMARK_MODES "1-thread,big-core,full-inference" @@ -348,7 +368,7 @@ iree_mlir_benchmark_suite( # CPU, Dylib, 3-thread, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP32_MODULE} + "${MOBILEBERT_FP32_MODULE}" BENCHMARK_MODES "3-thread,big-core,full-inference" diff --git a/build_tools/cmake/build_android.sh b/build_tools/cmake/build_android.sh index 12d1bd1175ae..dcf07f48862a 100755 --- a/build_tools/cmake/build_android.sh +++ b/build_tools/cmake/build_android.sh @@ -45,6 +45,7 @@ cd build-host # Configure, build, install. "${CMAKE_BIN?}" -G Ninja .. \ -DCMAKE_INSTALL_PREFIX=./install \ + -DIREE_ENABLE_ASSERTIONS=ON \ -DIREE_BUILD_COMPILER=ON \ -DIREE_BUILD_TESTS=OFF \ -DIREE_BUILD_BENCHMARKS=ON \ @@ -74,6 +75,7 @@ cd build-android -DANDROID_ABI="${ANDROID_ABI?}" \ -DANDROID_PLATFORM=android-29 \ -DIREE_HOST_BINARY_ROOT=$PWD/../build-host/install \ + -DIREE_ENABLE_ASSERTIONS=ON \ -DIREE_BUILD_COMPILER=OFF \ -DIREE_BUILD_TESTS=ON \ -DIREE_BUILD_SAMPLES=OFF diff --git a/build_tools/cmake/build_riscv.sh b/build_tools/cmake/build_riscv.sh index bf085acec339..4bc9ae4d5bc0 100755 --- a/build_tools/cmake/build_riscv.sh +++ b/build_tools/cmake/build_riscv.sh @@ -40,6 +40,7 @@ fi -DCMAKE_INSTALL_PREFIX="${BUILD_HOST_DIR?}/install" \ -DCMAKE_C_COMPILER=clang \ -DCMAKE_CXX_COMPILER=clang++ \ + -DIREE_ENABLE_ASSERTIONS=ON \ -DIREE_BUILD_COMPILER=ON \ -DIREE_BUILD_TESTS=OFF \ -DIREE_BUILD_SAMPLES=OFF \ @@ -66,6 +67,7 @@ args=( -DCMAKE_TOOLCHAIN_FILE="$(realpath ${ROOT_DIR?}/build_tools/cmake/riscv.toolchain.cmake)" -DIREE_HOST_BINARY_ROOT="$(realpath ${BUILD_HOST_DIR?}/install)" -DRISCV_CPU="${RISCV_CONFIG?}" + -DIREE_ENABLE_ASSERTIONS=ON -DIREE_BUILD_COMPILER=OFF -DIREE_BUILD_SAMPLES=ON ) diff --git a/build_tools/cmake/iree_macros.cmake b/build_tools/cmake/iree_macros.cmake index 3dccb5af0828..aebbf9f15c16 100644 --- a/build_tools/cmake/iree_macros.cmake +++ b/build_tools/cmake/iree_macros.cmake @@ -284,7 +284,7 @@ endfunction() # Adds test environment variable properties based on the current build options. # # Parameters: -# TEST_NAME: the test name, e.g. iree/base:math_test +# TEST_NAME: the test name, e.g. iree/base:math_test function(iree_add_test_environment_properties TEST_NAME) # IREE_*_DISABLE environment variables may used to skip test cases which # require both a compiler target backend and compatible runtime HAL driver. @@ -303,3 +303,51 @@ function(iree_add_test_environment_properties TEST_NAME) set_property(TEST ${TEST_NAME} APPEND PROPERTY ENVIRONMENT "IREE_LLVMAOT_DISABLE=1") endif() endfunction() + +# iree_check_defined +# +# A lightweight way to check that all the given variables are defined. Useful +# in cases like checking that a function has been passed all required arguments. +# Doesn't give usage-specific error messages, but still significantly better +# than no error checking. +# Variable names should be passed directly without quoting or dereferencing. +# Example: +# iree_check_defined(_SOME_VAR _AND_ANOTHER_VAR) +macro(iree_check_defined) + foreach(_VAR ${ARGN}) + if(NOT DEFINED "${_VAR}") + message(SEND_ERROR "${_VAR} is not defined") + endif() + endforeach() +endmacro() + +# iree_validate_required_arguments +# +# Validates that no arguments went unparsed or were given no values and that all +# required arguments have values. Expects to be called after +# cmake_parse_arguments and verifies that the variables it creates have been +# populated as appropriate. +function(iree_validate_required_arguments + PREFIX + REQUIRED_ONE_VALUE_KEYWORDS + REQUIRED_MULTI_VALUE_KEYWORDS) + if(DEFINED ${PREFIX}_UNPARSED_ARGUMENTS) + message(SEND_ERROR "Unparsed argument(s): '${${PREFIX}_UNPARSED_ARGUMENTS}'") + endif() + if(DEFINED ${PREFIX}_KEYWORDS_MISSING_VALUES) + message(SEND_ERROR + "No values for field(s) '${${PREFIX}_KEYWORDS_MISSING_VALUES}'") + endif() + + foreach(_ONE_VALUE_KEYWORD IN LISTS REQUIRED_ONE_VALUE_KEYWORDS) + if(NOT DEFINED ${PREFIX}_${_ONE_VALUE_KEYWORD}) + message(SEND_ERROR "Missing required argument ${_ONE_VALUE_KEYWORD}") + endif() + endforeach() + + foreach(_MULTI_VALUE_KEYWORD IN LISTS REQUIRED_MULTI_VALUE_KEYWORDS) + if(NOT DEFINED ${PREFIX}_${_MULTI_VALUE_KEYWORD}) + message(SEND_ERROR "Missing required argument ${_MULTI_VALUE_KEYWORD}") + endif() + endforeach() +endfunction() diff --git a/build_tools/cmake/iree_mlir_benchmark_suite.cmake b/build_tools/cmake/iree_mlir_benchmark_suite.cmake index 8028809c8ce0..f2971bd99fc1 100644 --- a/build_tools/cmake/iree_mlir_benchmark_suite.cmake +++ b/build_tools/cmake/iree_mlir_benchmark_suite.cmake @@ -71,62 +71,44 @@ function(iree_mlir_benchmark_suite) "BENCHMARK_MODES;MODULES;TRANSLATION_FLAGS;RUNTIME_FLAGS" ) - # All fields' names for each module. - set(_FIELD_NAMES "_MODULE_NAME" "_MODULE_TAGS" - "_MLIR_SOURCE" "_ENTRY_FUNCTION" "_FUNCTION_INPUTS") - list(LENGTH _FIELD_NAMES _FIELD_COUNT) - math(EXPR _MAX_FIELD_INDEX "${_FIELD_COUNT} - 1") - - # Make sure we have some multiple of six elements. - list(LENGTH _RULE_MODULES _MODULE_TOTAL_ELEMENT_COUNT) - math(EXPR _MODULE_COUNT - "${_MODULE_TOTAL_ELEMENT_COUNT} / ${_FIELD_COUNT}") - math(EXPR _MODULE_ELEMENT_REMAINDER - "${_MODULE_TOTAL_ELEMENT_COUNT} % ${_FIELD_COUNT}") - if(NOT ${_MODULE_ELEMENT_REMAINDER} EQUAL 0) - message(SEND_ERROR "MODULES expected to have some multiple of six " - "elements; some module has missing/redundant fields.") - endif() + iree_validate_required_arguments( + _RULE + "DRIVER;TARGET_BACKEND;TARGET_ARCHITECTURE" + "BENCHMARK_MODES;MODULES" + ) - # Loop over all modules to create targets. - math(EXPR _MAX_MODULE_INDEX "${_MODULE_COUNT} - 1") - foreach(_MODULE_INDEX RANGE 0 "${_MAX_MODULE_INDEX}") - # Loop over all elements for the current module and assign them to the - # corresponding field names for later use. - foreach(_FIELD_INDEX RANGE 0 "${_MAX_FIELD_INDEX}") - list(GET _FIELD_NAMES ${_FIELD_INDEX} _FIELD_NAME) - math(EXPR _INDEX "${_MODULE_INDEX} * ${_FIELD_COUNT} + ${_FIELD_INDEX}") - list(GET _RULE_MODULES ${_INDEX} ${_FIELD_NAME}) - endforeach() - - # Use the last directory's name as the category. - get_filename_component(_CATEGORY "${CMAKE_CURRENT_SOURCE_DIR}" NAME) + foreach(_MODULE IN LISTS _RULE_MODULES) + cmake_parse_arguments( + _MODULE + "" + "NAME;TAGS;MLIR_SOURCE;ENTRY_FUNCTION;FUNCTION_INPUTS" + "" + ${_MODULE} + ) + iree_validate_required_arguments( + _MODULE + "NAME;TAGS;MLIR_SOURCE;ENTRY_FUNCTION;FUNCTION_INPUTS" + "" + ) - # Generate all benchmarks to the root build directory. This helps for - # discovering them and execute them on devices. + get_filename_component(_CATEGORY "${CMAKE_CURRENT_SOURCE_DIR}" NAME) set(_ROOT_ARTIFACTS_DIR "${IREE_BINARY_DIR}/benchmark_suites/${_CATEGORY}") set(_VMFB_ARTIFACTS_DIR "${_ROOT_ARTIFACTS_DIR}/vmfb") - # The source file used to generate benchmark artifacts. - set(_SOURCE_FILE "${_MLIR_SOURCE}") # The CMake target's name if we need to download from the web. set(_DOWNLOAD_TARGET_NAME "") - # If the source file is from the web, create a custom command to download it. - # And wrap that with a custom target so later we can use for dependency. + # If the source file is from the web, create a custom command to download + # it and wrap that with a custom target so later we can use for dependency. # # Note: We actually should not do this; instead, we should directly compile # from the initial source (i.e., TensorFlow Python models). But that is - # tangled with the pending Python testing infrastructure revamp so we'd prefer - # to not do that right now. - if("${_MLIR_SOURCE}" MATCHES "^https?://") + # tangled with the pending Python testing infrastructure revamp so we'd + # prefer to not do that right now. + if("${_MODULE_MLIR_SOURCE}" MATCHES "^https?://") # Update the source file to the downloaded-to place. - string(REPLACE "/" ";" _SOURCE_URL_SEGMENTS "${_MLIR_SOURCE}") - # TODO: we can do `list(POP_BACK _SOURCE_URL_SEGMENTS _LAST_URL_SEGMENT)` - # after migrating to CMake 3.15. - list(LENGTH _SOURCE_URL_SEGMENTS _URL_SEGMENT_COUNT) - math(EXPR _SEGMENT_LAST_INDEX "${_URL_SEGMENT_COUNT} - 1") - list(GET _SOURCE_URL_SEGMENTS ${_SEGMENT_LAST_INDEX} _LAST_URL_SEGMENT) + string(REPLACE "/" ";" _SOURCE_URL_SEGMENTS "${_MODULE_MLIR_SOURCE}") + list(POP_BACK _SOURCE_URL_SEGMENTS _LAST_URL_SEGMENT) set(_DOWNLOAD_TARGET_NAME "iree-download-benchmark-source-${_LAST_URL_SEGMENT}") string(REPLACE "tar.gz" "mlir" _FILE_NAME "${_LAST_URL_SEGMENT}") @@ -137,10 +119,10 @@ function(iree_mlir_benchmark_suite) OUTPUT "${_SOURCE_FILE}" COMMAND "${Python3_EXECUTABLE}" "${IREE_ROOT_DIR}/scripts/download_file.py" - "${_MLIR_SOURCE}" -o "${_ROOT_ARTIFACTS_DIR}" + "${_MODULE_MLIR_SOURCE}" -o "${_ROOT_ARTIFACTS_DIR}" DEPENDS "${IREE_ROOT_DIR}/scripts/download_file.py" - COMMENT "Downloading ${_MLIR_SOURCE}" + COMMENT "Downloading ${_MODULE_MLIR_SOURCE}" ) add_custom_target("${_DOWNLOAD_TARGET_NAME}" DEPENDS "${_SOURCE_FILE}" @@ -160,8 +142,8 @@ function(iree_mlir_benchmark_suite) string(REPLACE "," "-" _TAGS "${_MODULE_TAGS}") string(REPLACE "," "-" _MODE "${_BENCHMARK_MODE}") list(APPEND _COMMON_NAME_SEGMENTS - "${_TAGS}" "${_MODE}" "${_RULE_TARGET_BACKEND}" - "${_RULE_TARGET_ARCHITECTURE}") + "${_TAGS}" "${_MODE}" "${_RULE_TARGET_BACKEND}" + "${_RULE_TARGET_ARCHITECTURE}") # The full list of translation flags. set(_TRANSLATION_ARGS "--iree-mlir-to-vm-bytecode-module") @@ -220,8 +202,8 @@ function(iree_mlir_benchmark_suite) "${Python3_EXECUTABLE}" "${IREE_ROOT_DIR}/scripts/generate_flagfile.py" --module_file="../../vmfb/compiled-${_VMFB_HASH}.vmfb" --driver=${_RULE_DRIVER} - --entry_function=${_ENTRY_FUNCTION} - --function_inputs=${_FUNCTION_INPUTS} + --entry_function=${_MODULE_ENTRY_FUNCTION} + --function_inputs=${_MODULE_FUNCTION_INPUTS} "${_ADDITIONAL_ARGS_CL}" -o "${_FLAG_FILE}" DEPENDS @@ -241,5 +223,6 @@ function(iree_mlir_benchmark_suite) # Mark dependency so that we have one target to drive them all. add_dependencies(iree-benchmark-suites "${_FLAGFILE_GEN_TARGET_NAME}") endforeach(_BENCHMARK_MODE IN LISTS _RULE_BENCHMARK_MODES) - endforeach(_MODULE_INDEX RANGE 0 "${_MAX_MODULE_INDEX}") -endfunction() + + endforeach(_MODULE IN LISTS _RULE_MODULES) +endfunction(iree_mlir_benchmark_suite) diff --git a/build_tools/cmake/iree_run_binary_test.cmake b/build_tools/cmake/iree_run_binary_test.cmake index c233760b1927..5d689004e749 100644 --- a/build_tools/cmake/iree_run_binary_test.cmake +++ b/build_tools/cmake/iree_run_binary_test.cmake @@ -74,8 +74,7 @@ function(iree_run_binary_test) endif() if(ANDROID) - set(_ANDROID_REL_DIR "${_PACKAGE_PATH}/${_RULE_NAME}") - set(_ANDROID_ABS_DIR "/data/local/tmp/${_ANDROID_REL_DIR}") + set(_ANDROID_ABS_DIR "/data/local/tmp/${_PACKAGE_PATH}/${_RULE_NAME}") endif() if (DEFINED _RULE_TEST_INPUT_FILE_ARG) @@ -92,9 +91,6 @@ function(iree_run_binary_test) string(REGEX REPLACE "^::" "${_PACKAGE_NS}::" _TEST_BINARY_TARGET ${_RULE_TEST_BINARY}) if(ANDROID) - set(_ANDROID_REL_DIR "${_PACKAGE_PATH}/${_RULE_NAME}") - set(_ANDROID_ABS_DIR "/data/local/tmp/${_ANDROID_REL_DIR}") - # Define a custom target for pushing and running the test on Android device. set(_TEST_NAME ${_TEST_NAME}_on_android_device) add_test( @@ -102,7 +98,7 @@ function(iree_run_binary_test) ${_TEST_NAME} COMMAND "${CMAKE_SOURCE_DIR}/build_tools/cmake/run_android_test.${IREE_HOST_SCRIPT_EXT}" - "${_ANDROID_REL_DIR}/$" + "${_ANDROID_ABS_DIR}/$" ${_RULE_ARGS} ) # Use environment variables to instruct the script to push artifacts diff --git a/build_tools/cmake/iree_third_party_cmake_options.cmake b/build_tools/cmake/iree_third_party_cmake_options.cmake index 2efba4e42db9..37fd9da2508b 100644 --- a/build_tools/cmake/iree_third_party_cmake_options.cmake +++ b/build_tools/cmake/iree_third_party_cmake_options.cmake @@ -17,19 +17,6 @@ macro(iree_set_cpuinfo_cmake_options) set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "" FORCE) endmacro() -macro(iree_set_flatcc_cmake_options) - set(FLATCC_TEST OFF CACHE BOOL "" FORCE) - set(FLATCC_CXX_TEST OFF CACHE BOOL "" FORCE) - set(FLATCC_REFLECTION OFF CACHE BOOL "" FORCE) - set(FLATCC_ALLOW_WERROR OFF CACHE BOOL "" FORCE) - - if(CMAKE_CROSSCOMPILING) - set(FLATCC_RTONLY ON CACHE BOOL "" FORCE) - else() - set(FLATCC_RTONLY OFF CACHE BOOL "" FORCE) - endif() -endmacro() - macro(iree_set_googletest_cmake_options) set(INSTALL_GTEST OFF CACHE BOOL "" FORCE) set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) diff --git a/build_tools/cmake/rebuild.sh b/build_tools/cmake/rebuild.sh index 2e202457b967..68e94200f089 100755 --- a/build_tools/cmake/rebuild.sh +++ b/build_tools/cmake/rebuild.sh @@ -41,6 +41,13 @@ CMAKE_ARGS=( # Enable building the python bindings on CI. Most heavy targets are gated on # IREE_ENABLE_TENSORFLOW, so what's left here should be fast. "-DIREE_BUILD_PYTHON_BINDINGS=ON" + + # Enable assertions. We wouldn't want to be testing *only* with assertions + # enabled, but at the moment only certain CI builds are using this script, + # e.g. ASan builds are not using this, so by enabling assertions here, we + # get a reasonable mix of {builds with asserts, builds with other features + # such as ASan but without asserts}. + "-DIREE_ENABLE_ASSERTIONS=ON" ) "$CMAKE_BIN" "${CMAKE_ARGS[@]?}" .. diff --git a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh index 3c6a6158b02d..1d9512db0484 100755 --- a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh +++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh @@ -100,6 +100,9 @@ label_exclude_regex="($(IFS="|" ; echo "${label_exclude_args[*]?}"))" # These tests currently have asan failures # TODO(#5715): Fix these declare -a excluded_tests=( + "iree/base/internal/file_io_test" + "iree/samples/static_library/static_library_demo_test" + "bindings/tflite/smoke_test" "iree/hal/cts/allocator_test" "iree/hal/cts/buffer_mapping_test" "iree/hal/cts/command_buffer_test" @@ -107,10 +110,9 @@ declare -a excluded_tests=( "iree/hal/cts/driver_test" "iree/hal/cts/event_test" "iree/hal/cts/executable_layout_test" - "iree/hal/cts/semaphore_test" "iree/hal/cts/semaphore_submission_test" + "iree/hal/cts/semaphore_test" "iree/modules/check/check_test" - "bindings/tflite/smoke_test" "iree/samples/simple_embedding/simple_embedding_vulkan_test" ) diff --git a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh index c4e0084c4603..4af8334c0970 100755 --- a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh +++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh @@ -25,7 +25,7 @@ docker_setup docker run "${DOCKER_RUN_ARGS[@]?}" \ gcr.io/iree-oss/cmake-swiftshader@sha256:031aded9cd66d30fcfa4dabea05a69721f33239516bc2e10ca216afd9ae4c012 \ - build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build.sh + build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh # Kokoro will rsync this entire directory back to the executor orchestrating the # build which takes forever and is totally useless. diff --git a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/common.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/common.cfg index de6fb31e1e2e..73224e5e83f2 100644 --- a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/common.cfg +++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/common.cfg @@ -6,4 +6,4 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -build_file: "iree/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build_kokoro.sh" +build_file: "iree/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh" diff --git a/build_tools/third_party/flatcc/CMakeLists.txt b/build_tools/third_party/flatcc/CMakeLists.txt index f707b567f03b..74caf63763c1 100644 --- a/build_tools/third_party/flatcc/CMakeLists.txt +++ b/build_tools/third_party/flatcc/CMakeLists.txt @@ -85,14 +85,35 @@ external_cc_library( if(NOT CMAKE_CROSSCOMPILING) add_executable(iree-flatcc-cli "${FLATCC_ROOT}/src/cli/flatcc_cli.c" + "${FLATCC_ROOT}/external/hash/cmetrohash64.c" + "${FLATCC_ROOT}/external/hash/str_set.c" + "${FLATCC_ROOT}/external/hash/ptr_set.c" + "${FLATCC_ROOT}/src/compiler/hash_tables/symbol_table.c" + "${FLATCC_ROOT}/src/compiler/hash_tables/scope_table.c" + "${FLATCC_ROOT}/src/compiler/hash_tables/name_table.c" + "${FLATCC_ROOT}/src/compiler/hash_tables/schema_table.c" + "${FLATCC_ROOT}/src/compiler/hash_tables/value_set.c" + "${FLATCC_ROOT}/src/compiler/fileio.c" + "${FLATCC_ROOT}/src/compiler/parser.c" + "${FLATCC_ROOT}/src/compiler/semantics.c" + "${FLATCC_ROOT}/src/compiler/coerce.c" + "${FLATCC_ROOT}/src/compiler/codegen_schema.c" + "${FLATCC_ROOT}/src/compiler/flatcc.c" + "${FLATCC_ROOT}/src/compiler/codegen_c.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_reader.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_sort.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_builder.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_verifier.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_sorter.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_json_parser.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_json_printer.c" + "${FLATCC_ROOT}/src/runtime/builder.c" + "${FLATCC_ROOT}/src/runtime/emitter.c" + "${FLATCC_ROOT}/src/runtime/refmap.c" ) - - target_link_libraries(iree-flatcc-cli - flatcc - ) - target_include_directories(iree-flatcc-cli SYSTEM PUBLIC + "${FLATCC_ROOT}/external" "${FLATCC_ROOT}/include" "${FLATCC_ROOT}/config" ) diff --git a/docs/website/docs/building-from-source/optional-features.md b/docs/website/docs/building-from-source/optional-features.md index 216f63e8f6df..b29458608923 100644 --- a/docs/website/docs/building-from-source/optional-features.md +++ b/docs/website/docs/building-from-source/optional-features.md @@ -79,7 +79,8 @@ package manager ([about](https://docs.python.org/3/library/venv.html), python -m pip install -r bindings\python\build_requirements.txt ``` -When done, close your shell or run `deactivate`. +When you are done with the venv, you can close it by closing your shell +or running `deactivate`. ### Usage diff --git a/integrations/tensorflow/CMakeLists.txt b/integrations/tensorflow/CMakeLists.txt index 777bf753b35b..71c188ee396d 100644 --- a/integrations/tensorflow/CMakeLists.txt +++ b/integrations/tensorflow/CMakeLists.txt @@ -8,57 +8,10 @@ # dependent code under this directory tree. The CMake support is limited to # compiler binaries and python bindings. # -# Bazel is a beast that likes to be the center of the universe. There is some -# fragility in delegating to it in this fashion. -# # If this directory is included, then building TensorFlow is assumed (the # config option happens at the higher level). -set(_bazel_targets) -set(_executable_paths) - -set(IREE_TF_TOOLS_ROOT - "${CMAKE_SOURCE_DIR}/integrations/tensorflow/bazel-bin/iree_tf_compiler" - CACHE STRING "Root directory for IREE TensorFlow integration binaries") - - -if(${IREE_BUILD_TENSORFLOW_COMPILER} OR ${IREE_BUILD_TENSORFLOW_ALL}) - add_executable(iree_tf_compiler_iree-import-tf IMPORTED GLOBAL) - set_property(TARGET iree_tf_compiler_iree-import-tf - PROPERTY IMPORTED_LOCATION - "${IREE_TF_TOOLS_ROOT}/iree-import-tf" - ) -endif() - -if(${IREE_BUILD_TFLITE_COMPILER} OR ${IREE_BUILD_TENSORFLOW_ALL}) - add_executable(iree_tf_compiler_iree-import-tflite IMPORTED GLOBAL) - set_property(TARGET iree_tf_compiler_iree-import-tflite - PROPERTY IMPORTED_LOCATION - "${IREE_TF_TOOLS_ROOT}/iree-import-tflite" - ) -endif() - -if(${IREE_BUILD_XLA_COMPILER} OR ${IREE_BUILD_TENSORFLOW_ALL}) - add_executable(iree_tf_compiler_iree-import-xla IMPORTED GLOBAL) - set_property(TARGET iree_tf_compiler_iree-import-xla - PROPERTY IMPORTED_LOCATION - "${IREE_TF_TOOLS_ROOT}/iree-import-xla" - ) -endif() - -if(${IREE_BUILD_TESTS}) - add_executable(iree_tf_compiler_iree-tf-opt IMPORTED GLOBAL) - set_property(TARGET iree_tf_compiler_iree-tf-opt - PROPERTY IMPORTED_LOCATION - "${IREE_TF_TOOLS_ROOT}/iree-tf-opt" - ) - - add_executable(iree_tf_compiler_iree-opt-tflite IMPORTED GLOBAL) - set_property(TARGET iree_tf_compiler_iree-opt-tflite - PROPERTY IMPORTED_LOCATION - "${IREE_TF_TOOLS_ROOT}/iree-opt-tflite" - ) -endif() +add_subdirectory(iree_tf_compiler) if(${IREE_BUILD_PYTHON_BINDINGS}) add_subdirectory(bindings/python) diff --git a/integrations/tensorflow/bindings/python/iree/tools/tf/CMakeLists.txt b/integrations/tensorflow/bindings/python/iree/tools/tf/CMakeLists.txt index 0cd23102a47d..77fd6c117012 100644 --- a/integrations/tensorflow/bindings/python/iree/tools/tf/CMakeLists.txt +++ b/integrations/tensorflow/bindings/python/iree/tools/tf/CMakeLists.txt @@ -14,12 +14,12 @@ iree_py_library( tf SRCS ${_srcs} DEPS - iree_tf_compiler_iree-import-tf + integrations::tensorflow::iree_tf_compiler::iree-import-tf ) iree_symlink_tool( TARGET tf - FROM_TOOL_TARGET iree_tf_compiler_iree-import-tf + FROM_TOOL_TARGET integrations::tensorflow::iree_tf_compiler::iree-import-tf TO_EXE_NAME iree-import-tf ) @@ -29,13 +29,13 @@ iree_py_install_package( MODULE_PATH iree/tools/tf FILES_MATCHING ${_srcs} DEPS - iree_tf_compiler_iree-import-tf + integrations::tensorflow::iree_tf_compiler::iree-import-tf ) # Since imported, need to resolve the TARGET_FILE ourselves instead of # install TARGETS form. install( - PROGRAMS "$" + PROGRAMS "$" DESTINATION "${PY_INSTALL_MODULE_DIR}" COMPONENT "${PY_INSTALL_COMPONENT}" ) diff --git a/integrations/tensorflow/bindings/python/iree/tools/tflite/CMakeLists.txt b/integrations/tensorflow/bindings/python/iree/tools/tflite/CMakeLists.txt index 69d301f7ab27..d5cbb650ea71 100644 --- a/integrations/tensorflow/bindings/python/iree/tools/tflite/CMakeLists.txt +++ b/integrations/tensorflow/bindings/python/iree/tools/tflite/CMakeLists.txt @@ -14,12 +14,12 @@ iree_py_library( tflite SRCS ${_srcs} DEPS - iree_tf_compiler_iree-import-tflite + integrations::tensorflow::iree_tf_compiler::iree-import-tflite ) iree_symlink_tool( TARGET tflite - FROM_TOOL_TARGET iree_tf_compiler_iree-import-tflite + FROM_TOOL_TARGET integrations::tensorflow::iree_tf_compiler::iree-import-tflite TO_EXE_NAME iree-import-tflite ) @@ -29,13 +29,13 @@ iree_py_install_package( MODULE_PATH iree/tools/tflite FILES_MATCHING ${_srcs} DEPS - iree_tf_compiler_iree-import-tflite + integrations::tensorflow::iree_tf_compiler::iree-import-tflite ) # Since imported, need to resolve the TARGET_FILE ourselves instead of # install TARGETS form. install( - PROGRAMS "$" + PROGRAMS "$" DESTINATION "${PY_INSTALL_MODULE_DIR}" COMPONENT "${PY_INSTALL_COMPONENT}" ) diff --git a/integrations/tensorflow/bindings/python/iree/tools/xla/CMakeLists.txt b/integrations/tensorflow/bindings/python/iree/tools/xla/CMakeLists.txt index 8e7133d600b3..637bb8179abb 100644 --- a/integrations/tensorflow/bindings/python/iree/tools/xla/CMakeLists.txt +++ b/integrations/tensorflow/bindings/python/iree/tools/xla/CMakeLists.txt @@ -14,12 +14,12 @@ iree_py_library( xla SRCS ${_srcs} DEPS - iree_tf_compiler_iree-import-xla + integrations::tensorflow::iree_tf_compiler::iree-import-xla ) iree_symlink_tool( TARGET xla - FROM_TOOL_TARGET iree_tf_compiler_iree-import-xla + FROM_TOOL_TARGET integrations::tensorflow::iree_tf_compiler::iree-import-xla TO_EXE_NAME iree-import-xla ) @@ -29,13 +29,13 @@ iree_py_install_package( MODULE_PATH iree/tools/xla FILES_MATCHING ${_srcs} DEPS - iree_tf_compiler_iree-import-xla + integrations::tensorflow::iree_tf_compiler::iree-import-xla ) # Since imported, need to resolve the TARGET_FILE ourselves instead of # install TARGETS form. install( - PROGRAMS "$" + PROGRAMS "$" DESTINATION "${PY_INSTALL_MODULE_DIR}" COMPONENT "${PY_INSTALL_COMPONENT}" ) diff --git a/integrations/tensorflow/iree_tf_compiler/CMakeLists.txt b/integrations/tensorflow/iree_tf_compiler/CMakeLists.txt new file mode 100644 index 000000000000..0e3e42b4c018 --- /dev/null +++ b/integrations/tensorflow/iree_tf_compiler/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright 2020 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +set(IREE_TF_TOOLS_ROOT + "${CMAKE_SOURCE_DIR}/integrations/tensorflow/bazel-bin/iree_tf_compiler" + CACHE STRING "Root directory for IREE TensorFlow integration binaries") + +iree_package_name(_PACKAGE_NAME) +iree_package_ns(_PACKAGE_NS) + +function(configure_tf_binary BINARY_NAME) + set(_NAME "${_PACKAGE_NAME}_${BINARY_NAME}") + add_executable("${_NAME}" IMPORTED GLOBAL) + set_property(TARGET ${_NAME} + PROPERTY IMPORTED_LOCATION + "${IREE_TF_TOOLS_ROOT}/${BINARY_NAME}" + ) + add_executable(${_PACKAGE_NS}::${BINARY_NAME} ALIAS ${_NAME}) + add_executable(${BINARY_NAME} ALIAS ${_NAME}) +endfunction() + +if(${IREE_BUILD_TENSORFLOW_COMPILER}) + configure_tf_binary("iree-import-tf") +endif() + +if(${IREE_BUILD_TFLITE_COMPILER}) + configure_tf_binary("iree-import-tflite") +endif() + +if(${IREE_BUILD_XLA_COMPILER}) + configure_tf_binary("iree-import-xla") +endif() + +if(${IREE_BUILD_TESTS}) + configure_tf_binary("iree-tf-opt") + configure_tf_binary("iree-opt-tflite") +endif() diff --git a/iree/base/status.h b/iree/base/status.h index 790c29797e9c..81baad3a8004 100644 --- a/iree/base/status.h +++ b/iree/base/status.h @@ -230,6 +230,8 @@ typedef struct iree_status_handle_t* iree_status_t; #if IREE_STATUS_FEATURES == 0 #define IREE_STATUS_IMPL_MAKE_(code, ...) \ (iree_status_t)(uintptr_t)((code)&IREE_STATUS_CODE_MASK) +#define IREE_STATUS_IMPL_MAKE_LOC_(file, line, code, ...) \ + IREE_STATUS_IMPL_MAKE_(code) #undef IREE_STATUS_IMPL_RETURN_IF_API_ERROR_ #define IREE_STATUS_IMPL_RETURN_IF_API_ERROR_(var, ...) \ iree_status_t var = (IREE_STATUS_IMPL_IDENTITY_( \ @@ -254,6 +256,8 @@ typedef struct iree_status_handle_t* iree_status_t; #else #define IREE_STATUS_IMPL_MAKE_(...) \ IREE_STATUS_IMPL_MAKE_SWITCH_(__FILE__, __LINE__, __VA_ARGS__) +#define IREE_STATUS_IMPL_MAKE_LOC_(file, line, ...) \ + IREE_STATUS_IMPL_MAKE_SWITCH_(file, line, __VA_ARGS__) #endif // !IREE_STATUS_FEATURES // Returns an IREE_STATUS_OK. @@ -271,6 +275,15 @@ typedef struct iree_status_handle_t* iree_status_t; // return iree_make_status(IREE_STATUS_CANCELLED, "because %d > %d", a, b); #define iree_make_status IREE_STATUS_IMPL_MAKE_ +// Makes an iree_status_t with the given iree_status_code_t code using the given +// source location. Besides taking the file and line of the source location this +// is the same as iree_make_status. +// +// Examples: +// return iree_make_status_with_location( +// "file.c", 40, IREE_STATUS_CANCELLED, "because %d > %d", a, b); +#define iree_make_status_with_location IREE_STATUS_IMPL_MAKE_LOC_ + // Propagates the error returned by (expr) by returning from the current // function on non-OK status. Optionally annotates the status with additional // information (see iree_status_annotate for more information). diff --git a/iree/compiler/Codegen/Common/BUILD b/iree/compiler/Codegen/Common/BUILD index 6f71b73b74a9..0593b9b1b5bb 100644 --- a/iree/compiler/Codegen/Common/BUILD +++ b/iree/compiler/Codegen/Common/BUILD @@ -46,6 +46,7 @@ cc_library( deps = [ "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common:FoldTensorExtractOpIncGen", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/Transforms", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/Flow/IR", diff --git a/iree/compiler/Codegen/Common/CMakeLists.txt b/iree/compiler/Codegen/Common/CMakeLists.txt index 5e77f084ba4b..ce29221d09ec 100644 --- a/iree/compiler/Codegen/Common/CMakeLists.txt +++ b/iree/compiler/Codegen/Common/CMakeLists.txt @@ -56,6 +56,7 @@ iree_cc_library( MLIRTransforms MLIRVector iree::compiler::Codegen::Common::FoldTensorExtractOpIncGen + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::Transforms iree::compiler::Codegen::Utils diff --git a/iree/compiler/Codegen/Common/SetNumWorkgroupsPass.cpp b/iree/compiler/Codegen/Common/SetNumWorkgroupsPass.cpp index 0f70605ec154..06f3f51ff1ca 100644 --- a/iree/compiler/Codegen/Common/SetNumWorkgroupsPass.cpp +++ b/iree/compiler/Codegen/Common/SetNumWorkgroupsPass.cpp @@ -4,14 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/Transforms/Transforms.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/Flow/IR/FlowOps.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -90,14 +89,9 @@ void SetNumWorkgroupsPass::runOnOperation() { if (!workloadPerWorkgroup.empty()) { currWorkloadPerWorkgroup.assign(workloadPerWorkgroup.begin(), workloadPerWorkgroup.end()); - } else if (IREE::HAL::TranslationInfo translationInfo = + } else if (IREE::Codegen::TranslationInfoAttr translationInfo = getTranslationInfo(entryPointOp)) { - if (ArrayAttr workloadPerWorkgroupAttr = - translationInfo.workloadPerWorkgroup()) { - currWorkloadPerWorkgroup = llvm::to_vector<4>(llvm::map_range( - workloadPerWorkgroupAttr, - [](Attribute attr) { return attr.cast().getInt(); })); - } + currWorkloadPerWorkgroup = translationInfo.getWorkloadPerWorkgroupVals(); } if (!currWorkloadPerWorkgroup.empty()) { diff --git a/iree/compiler/Codegen/Common/VectorizeMMT4d.cpp b/iree/compiler/Codegen/Common/VectorizeMMT4d.cpp index 34340efeeb6b..c7db560c70bf 100644 --- a/iree/compiler/Codegen/Common/VectorizeMMT4d.cpp +++ b/iree/compiler/Codegen/Common/VectorizeMMT4d.cpp @@ -14,6 +14,23 @@ namespace iree_compiler { namespace { +Value promoteVector(Location loc, Value inputVector, Type promotedElementType, + PatternRewriter &rewriter) { + VectorType inputVectorType = inputVector.getType().cast(); + if (inputVectorType.getElementType() == promotedElementType) { + return inputVector; + } else { + auto promotedVectorType = inputVectorType.clone(promotedElementType); + if (promotedElementType.isIntOrIndex()) { + return rewriter.create(loc, inputVector, + promotedVectorType); + } else { + return rewriter.create(loc, inputVector, + promotedVectorType); + } + } +} + /// Converts linalg.mmt4d into vector.contract. /// This converts linalg.mmt4d with operands <1x1xM0xK0>, <1x1xK0xN0> /// to vector.contract where K0 is the contraction dimension. @@ -22,12 +39,13 @@ struct VectorizeMMT4DOp : public OpRewritePattern { LogicalResult matchAndRewrite(linalg::Mmt4DOp mmt4DOp, PatternRewriter &rewriter) const override { - auto lhs = mmt4DOp.inputs()[0]; - auto rhs = mmt4DOp.inputs()[1]; - auto dst = mmt4DOp.outputs()[0]; + Value lhs = mmt4DOp.inputs()[0]; + Value rhs = mmt4DOp.inputs()[1]; + Value dst = mmt4DOp.outputs()[0]; - auto lhsType = lhs.getType().dyn_cast(); - auto rhsType = rhs.getType().dyn_cast(); + ShapedType lhsType = lhs.getType().dyn_cast(); + ShapedType rhsType = rhs.getType().dyn_cast(); + ShapedType dstType = dst.getType().dyn_cast(); // This pattern expects tensors of static shapes. // In practice, dynamic shapes are meant to be handled by other passes, @@ -55,16 +73,20 @@ struct VectorizeMMT4DOp : public OpRewritePattern { int N0 = rhsType.getShape()[2]; int K0 = lhsType.getShape()[3]; - auto loc = mmt4DOp.getLoc(); - auto c0 = rewriter.create(loc, 0); + Location loc = mmt4DOp.getLoc(); + Value c0 = rewriter.create(loc, 0); + + Type lhsElementType = lhsType.getElementType(); + Type rhsElementType = rhsType.getElementType(); + Type dstElementType = dstType.getElementType(); - auto lhsVecType = VectorType::get({1, 1, M0, K0}, rewriter.getF32Type()); - auto rhsVecType = VectorType::get({1, 1, N0, K0}, rewriter.getF32Type()); - auto dstVecType = VectorType::get({1, 1, M0, N0}, rewriter.getF32Type()); + auto lhsVecType = VectorType::get({1, 1, M0, K0}, lhsElementType); + auto rhsVecType = VectorType::get({1, 1, N0, K0}, rhsElementType); + auto dstVecType = VectorType::get({1, 1, M0, N0}, dstElementType); - auto lhsVecType2D = VectorType::get({M0, K0}, rewriter.getF32Type()); - auto rhsVecType2D = VectorType::get({N0, K0}, rewriter.getF32Type()); - auto dstVecType2D = VectorType::get({M0, N0}, rewriter.getF32Type()); + auto lhsVecType2D = VectorType::get({M0, K0}, lhsElementType); + auto rhsVecType2D = VectorType::get({N0, K0}, rhsElementType); + auto dstVecType2D = VectorType::get({M0, N0}, dstElementType); auto identityMap = rewriter.getMultiDimIdentityMap(4); @@ -84,6 +106,14 @@ struct VectorizeMMT4DOp : public OpRewritePattern { Value dstVec2D = rewriter.create(loc, dstVecType2D, dstVec); + // Promote, if needed, the element type in the lhs and rhs vectors to + // match the dst vector, so that the vector.contract below will involve + // only one element type. This is in line with planned design, see + // the closing comment on https://reviews.llvm.org/D112508 where the + // alternative of using mixed types was considered. + Value promLhsVec2d = promoteVector(loc, lhsVec2D, dstElementType, rewriter); + Value promRhsVec2d = promoteVector(loc, rhsVec2D, dstElementType, rewriter); + // Generate the vector.contract on 2D vectors replacing the mmt4d op. auto m = rewriter.getAffineDimExpr(0); auto n = rewriter.getAffineDimExpr(1); @@ -96,7 +126,7 @@ struct VectorizeMMT4DOp : public OpRewritePattern { {getParallelIteratorTypeName(), getParallelIteratorTypeName(), getReductionIteratorTypeName()}); Value contractResult = rewriter.create( - loc, lhsVec2D, rhsVec2D, dstVec2D, indexingMaps, iterators); + loc, promLhsVec2d, promRhsVec2d, dstVec2D, indexingMaps, iterators); // Convert the output vector from 2D shape (M0xN0) to 4D shape (1x1xM0xN0) Value contractResult4D = diff --git a/iree/compiler/Codegen/Dialect/BUILD b/iree/compiler/Codegen/Dialect/BUILD new file mode 100644 index 000000000000..9c6008b7ec0c --- /dev/null +++ b/iree/compiler/Codegen/Dialect/BUILD @@ -0,0 +1,107 @@ +# Copyright 2019 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +load("@llvm-project//mlir:tblgen.bzl", "gentbl_cc_library", "td_library") +load("//build_tools/bazel:enforce_glob.bzl", "enforce_glob") + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], + licenses = ["notice"], # Apache 2.0 +) + +exports_files([ + "IREECodegenAttributes.td", + "IREECodegenDialect.td", + "LoweringConfig.td", +]) + +td_library( + name = "td_files", + srcs = enforce_glob( + [ + "IREECodegenAttributes.td", + "IREECodegenDialect.td", + "LoweringConfig.td", + ], + include = ["*.td"], + ), + deps = [ + "@llvm-project//mlir:OpBaseTdFiles", + ], +) + +cc_library( + name = "IREECodegenDialect", + srcs = [ + "IREECodegenDialect.cpp", + "LoweringConfig.cpp", + ], + hdrs = [ + "IREECodegenDialect.h", + "LoweringConfig.h", + ], + textual_hdrs = [ + "IREECodegenDialect.cpp.inc", + "IREECodegenDialect.h.inc", + "LoweringConfig.cpp.inc", + "LoweringConfig.h.inc", + "LoweringConfigEnums.cpp.inc", + "LoweringConfigEnums.h.inc", + ], + deps = [ + ":IREECodegenDialectGen", + ":LoweringConfigGen", + "//iree/compiler/Codegen/Utils", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:DialectUtils", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Parser", + "@llvm-project//mlir:StandardOps", + ], +) + +gentbl_cc_library( + name = "IREECodegenDialectGen", + tbl_outs = [ + ( + ["-gen-dialect-decls"], + "IREECodegenDialect.h.inc", + ), + ( + ["-gen-dialect-defs"], + "IREECodegenDialect.cpp.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "IREECodegenAttributes.td", + deps = [":td_files"], +) + +gentbl_cc_library( + name = "LoweringConfigGen", + tbl_outs = [ + ( + ["-gen-attrdef-decls"], + "LoweringConfig.h.inc", + ), + ( + ["-gen-attrdef-defs"], + "LoweringConfig.cpp.inc", + ), + ( + ["-gen-enum-decls"], + "LoweringConfigEnums.h.inc", + ), + ( + ["-gen-enum-defs"], + "LoweringConfigEnums.cpp.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "LoweringConfig.td", + deps = [":td_files"], +) diff --git a/iree/compiler/Codegen/Dialect/CMakeLists.txt b/iree/compiler/Codegen/Dialect/CMakeLists.txt new file mode 100644 index 000000000000..16f6826cb558 --- /dev/null +++ b/iree/compiler/Codegen/Dialect/CMakeLists.txt @@ -0,0 +1,62 @@ +################################################################################ +# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # +# iree/compiler/Codegen/Dialect/BUILD # +# # +# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # +# CMake-only content. # +# # +# To disable autogeneration for this file entirely, delete this header. # +################################################################################ + +iree_add_all_subdirs() + +iree_cc_library( + NAME + IREECodegenDialect + HDRS + "IREECodegenDialect.h" + "LoweringConfig.h" + TEXTUAL_HDRS + "IREECodegenDialect.cpp.inc" + "IREECodegenDialect.h.inc" + "LoweringConfig.cpp.inc" + "LoweringConfig.h.inc" + "LoweringConfigEnums.cpp.inc" + "LoweringConfigEnums.h.inc" + SRCS + "IREECodegenDialect.cpp" + "LoweringConfig.cpp" + DEPS + ::IREECodegenDialectGen + ::LoweringConfigGen + LLVMSupport + MLIRIR + MLIRParser + MLIRStandard + iree::compiler::Codegen::Utils + PUBLIC +) + +iree_tablegen_library( + NAME + IREECodegenDialectGen + TD_FILE + "IREECodegenAttributes.td" + OUTS + -gen-dialect-decls IREECodegenDialect.h.inc + -gen-dialect-defs IREECodegenDialect.cpp.inc +) + +iree_tablegen_library( + NAME + LoweringConfigGen + TD_FILE + "LoweringConfig.td" + OUTS + -gen-attrdef-decls LoweringConfig.h.inc + -gen-attrdef-defs LoweringConfig.cpp.inc + -gen-enum-decls LoweringConfigEnums.h.inc + -gen-enum-defs LoweringConfigEnums.cpp.inc +) + +### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/iree/compiler/Codegen/Dialect/IREECodegenAttributes.td b/iree/compiler/Codegen/Dialect/IREECodegenAttributes.td new file mode 100644 index 000000000000..e5f8b2607de0 --- /dev/null +++ b/iree/compiler/Codegen/Dialect/IREECodegenAttributes.td @@ -0,0 +1,14 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef IREE_CODEGEN_DIALECT_IREECODEGEN_ATTRIBUTES +#define IREE_CODEGEN_DIALECT_IREECODEGEN_ATTRIBUTES + +include "iree/compiler/Codegen/Dialect/IREECodegenDialect.td" +include "iree/compiler/Codegen/Dialect/LoweringConfig.td" + + +#endif // IREE_CODEGEN_DIALECT_IREECODEGEN_ATTRIBUTES diff --git a/iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp b/iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp new file mode 100644 index 000000000000..8f2cf4769979 --- /dev/null +++ b/iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" + +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp.inc" +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" +#include "mlir/IR/DialectImplementation.h" + +namespace mlir { +namespace iree_compiler { +namespace IREE { +namespace Codegen { + +struct IREECodegenDialectOpAsmInterface : public OpAsmDialectInterface { + using OpAsmDialectInterface::OpAsmDialectInterface; + AliasResult getAlias(Attribute attr, raw_ostream &os) const override { + if (attr.isa()) { + os << "translation"; + return AliasResult::OverridableAlias; + } else if (attr.isa()) { + os << "compilation"; + return AliasResult::OverridableAlias; + } else if (attr.isa()) { + os << "config"; + return AliasResult::OverridableAlias; + } + return AliasResult::NoAlias; + } +}; + +void IREECodegenDialect::initialize() { + initializeCodegenAttrs(); + addInterfaces(); +} + +Attribute IREECodegenDialect::parseAttribute(DialectAsmParser &parser, + Type type) const { + StringRef mnemonic; + if (failed(parser.parseKeyword(&mnemonic))) return {}; + Attribute genAttr; + OptionalParseResult parseResult = + parseCodegenAttrs(parser, mnemonic, type, genAttr); + if (parseResult.hasValue()) return genAttr; + parser.emitError(parser.getNameLoc(), "unknown iree_codegen attribute"); + return Attribute(); +} + +void IREECodegenDialect::printAttribute(Attribute attr, + DialectAsmPrinter &p) const { + if (failed(printCodegenAttrs(attr, p))) { + llvm_unreachable("unhandled iree_codegen attribute"); + } +} + +} // namespace Codegen +} // namespace IREE +} // namespace iree_compiler +} // namespace mlir diff --git a/iree/compiler/Codegen/Dialect/IREECodegenDialect.h b/iree/compiler/Codegen/Dialect/IREECodegenDialect.h new file mode 100644 index 000000000000..bdb94731af1e --- /dev/null +++ b/iree/compiler/Codegen/Dialect/IREECodegenDialect.h @@ -0,0 +1,17 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef IREE_COMPILER_CODEGEN_DIALECT_IREECODEGEN_DIALECT_H_ +#define IREE_COMPILER_CODEGEN_DIALECT_IREECODEGEN_DIALECT_H_ + +#include "mlir/IR/Dialect.h" +#include "mlir/IR/OpDefinition.h" + +// clang-format off: must be included after all LLVM/MLIR eaders +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h.inc" // IWYU pragma: keep +// clang-format on + +#endif // IREE_COMPILER_CODEGEN_DIALECT_IREECODEGEN_DIALECT_H_ diff --git a/iree/compiler/Codegen/Dialect/IREECodegenDialect.td b/iree/compiler/Codegen/Dialect/IREECodegenDialect.td new file mode 100644 index 000000000000..b1233e490e0e --- /dev/null +++ b/iree/compiler/Codegen/Dialect/IREECodegenDialect.td @@ -0,0 +1,44 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef IREE_CODEGEN_DIALECT_IREECODEGEN_DIALECT +#define IREE_CODEGEN_DIALECT_IREECODEGEN_DIALECT + +include "mlir/IR/OpBase.td" + +//===----------------------------------------------------------------------===// +// IREE Codegen dialect +//===----------------------------------------------------------------------===// + +def IREECodegen_Dialect : Dialect { + let name = "iree_codegen"; + let cppNamespace = "::mlir::iree_compiler::IREE::Codegen"; + + let summary = [{ + A dialect representing attributes used by the IREE Code generation. + }]; + let description = [{ + This dialect is primarily meant to hold attributes that carry the + state of the compilation when lowered to scalar code for an + architecture. Typically, a backend starts by analysing the entry + point functions within the `hal.executable.variant` and deciding + which compilation pipeline to chose. During this, even the values + for parameters such as tile sizes, etc. are also decided. The rest + of the compilation flow does not make any heuristic decisions, + rather just looks at the values of the decision specified in + attributes that belong to this dialect. This allows an external + search to easily override the heuristics that are hard-coded + within a backend. + }]; + let extraClassDeclaration = [{ + void initializeCodegenAttrs(); + OptionalParseResult parseCodegenAttrs(DialectAsmParser &parser, + StringRef mnemonic, Type type, Attribute &value) const; + LogicalResult printCodegenAttrs(Attribute attr, DialectAsmPrinter &p) const; + }]; +} + +#endif // IREE_CODEGEN_DIALECT_IREECODEGEN_DIALECT \ No newline at end of file diff --git a/iree/compiler/Codegen/Dialect/LoweringConfig.cpp b/iree/compiler/Codegen/Dialect/LoweringConfig.cpp new file mode 100644 index 000000000000..79990d6eaf4b --- /dev/null +++ b/iree/compiler/Codegen/Dialect/LoweringConfig.cpp @@ -0,0 +1,632 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" + +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" +#include "llvm/ADT/TypeSwitch.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/DialectImplementation.h" + +#define GET_ATTRDEF_CLASSES +#include "iree/compiler/Codegen/Dialect/LoweringConfig.cpp.inc" +#include "iree/compiler/Codegen/Dialect/LoweringConfigEnums.cpp.inc" + +static const char kConfigAttrName[] = "lowering.config"; +static const char kTranslationInfoAttrName[] = "translation.info"; +static const char kCompilationInfoAttrName[] = "compilation.info"; + +namespace mlir { +namespace iree_compiler { + +//===----------------------------------------------------------------------===// +// Utility function for common code patterns. +//===----------------------------------------------------------------------===// + +static bool checkIntegerArrayAttr(ArrayAttr arrayAttr) { + return !llvm::any_of(arrayAttr, + [](Attribute attr) { return !attr.isa(); }); +} + +/// Returns an `ArrayAttr` where each element is an `IntegerAttr` of `IndexType` +/// whose values is obtained from `values`. +static ArrayAttr getIndexIntegerArrayAttr(MLIRContext *context, + ArrayRef values) { + auto attrs = llvm::to_vector<4>( + llvm::map_range(values, [&context](int64_t value) -> Attribute { + return IntegerAttr::get(IndexType::get(context), APInt(64, value)); + })); + return ArrayAttr::get(context, attrs); +} + +/// Returns an `ArrayAttr` where each element is an `IntegerAttr` of 64-bit +/// integer type whose values is obtained from `values`. +static ArrayAttr getI64IntegerArrayAttr(MLIRContext *context, + ArrayRef values) { + auto attrs = llvm::to_vector<4>( + llvm::map_range(values, [&context](int64_t value) -> Attribute { + return IntegerAttr::get(IntegerType::get(context, 64), + APInt(64, value)); + })); + return ArrayAttr::get(context, attrs); +} + +/// Assumes that `arrayAttr` is a list of `IntegerAttr`s and returns the values +/// in these attributes as a vector. +static SmallVector getIntegerVals(ArrayAttr arrayAttr) { + if (!arrayAttr) return {}; + SmallVector values(arrayAttr.size()); + for (auto attr : llvm::enumerate(arrayAttr)) { + values[attr.index()] = attr.value().cast().getInt(); + } + return values; +} + +namespace IREE { +namespace Codegen { + +namespace { + +// TODO(ravishankarm): The IREEFieldParser is part of the patch D111594 (where +// it is called ::mlir::FieldParser). Remove this when the upstream change lands +// in IREE. + +//===----------------------------------------------------------------------===// +// Parse Fields +//===----------------------------------------------------------------------===// + +/// Provide a template class that can be specialized by users to dispatch to +/// parsers. Auto-generated parsers generate calls to +/// `IREEFieldParser::parse`, where `T` is the parameter storage type, to +/// parse custom types. +template +struct IREEFieldParser; + +/// Parse an attribute. +template +struct IREEFieldParser< + AttributeT, std::enable_if_t::value, + AttributeT>> { + static FailureOr parse(DialectAsmParser &parser) { + AttributeT value; + if (parser.parseAttribute(value)) return failure(); + return value; + } +}; + +/// Parse any integer. +template +struct IREEFieldParser::value, IntT>> { + static FailureOr parse(DialectAsmParser &parser) { + IntT value; + if (parser.parseInteger(value)) return failure(); + return value; + } +}; + +/// Parse a string. +template <> +struct IREEFieldParser { + static FailureOr parse(DialectAsmParser &parser) { + std::string value; + if (parser.parseString(&value)) return failure(); + return value; + } +}; + +/// Parse any container that supports back insertion as a list. +template +struct IREEFieldParser< + ContainerT, std::enable_if_t::value, + ContainerT>> { + using ElementT = typename ContainerT::value_type; + static FailureOr parse(DialectAsmParser &parser) { + ContainerT elements; + auto elementParser = [&]() { + auto element = IREEFieldParser::parse(parser); + if (failed(element)) return failure(); + elements.push_back(element.getValue()); + return success(); + }; + if (parser.parseCommaSeparatedList(elementParser)) return failure(); + return elements; + } +}; +} // namespace + +//===----------------------------------------------------------------------===// +// iree_codegen.translation.info +//===----------------------------------------------------------------------===// + +TranslationInfoAttr TranslationInfoAttr::get( + MLIRContext *context, DispatchLoweringPassPipeline passPipeline, + ArrayRef workloadPerWorkgroup) { + auto pipelineAttr = StringAttr::get(context, stringifyEnum(passPipeline)); + ArrayAttr workloadPerWorkgroupAttr = + getI64IntegerArrayAttr(context, workloadPerWorkgroup); + return get(context, pipelineAttr, workloadPerWorkgroupAttr); +} + +DispatchLoweringPassPipeline +TranslationInfoAttr::getDispatchLoweringPassPipeline() { + Optional passPipeline = + symbolizeEnum(getPassPipeline().getValue()); + return passPipeline.getValue(); +} + +SmallVector TranslationInfoAttr::getWorkloadPerWorkgroupVals() { + return getIntegerVals(getWorkloadPerWorkgroup()); +} + +LogicalResult TranslationInfoAttr::verify( + function_ref emitError, StringAttr passPipeline, + ArrayAttr workloadPerWorkgroup) { + if (!passPipeline) { + return emitError() << "missing pass pipeline specification"; + } + auto passPipelineValue = + symbolizeEnum( + passPipeline.getValue()); + if (!passPipelineValue) { + return emitError() << "invalid pass pipeline value : " + << passPipeline.getValue(); + } + if (!workloadPerWorkgroup) { + return emitError() << "expected workload_per_wg to be specified (even if " + "specified as empty)"; + } + if (!checkIntegerArrayAttr(workloadPerWorkgroup)) { + return emitError() << "expected workload_per_wg to be an IntegerAttr list"; + } + return success(); +} + +::mlir::Attribute TranslationInfoAttr::parse(::mlir::DialectAsmParser &parser, + ::mlir::Type attrType) { + ::mlir::FailureOr _result_passPipeline; + ::mlir::FailureOr _result_workloadPerWorkgroup; + // Parse literal '<' + if (parser.parseLess()) return {}; + // Parse variable 'passPipeline' + _result_passPipeline = IREEFieldParser::parse(parser); + if (failed(_result_passPipeline)) { + parser.emitError(parser.getCurrentLocation(), + "failed to parse IREECodegen_TranslationInfoAttr " + "parameter 'passPipeline' which is to be a `StringAttr`"); + return {}; + } + // Parse literal ',' + if (parser.parseComma()) return {}; + // Parse literal 'workload_per_wg' + if (parser.parseKeyword("workload_per_wg")) return {}; + // Parse literal '=' + if (parser.parseEqual()) return {}; + // Parse variable 'workloadPerWorkgroup' + _result_workloadPerWorkgroup = IREEFieldParser::parse(parser); + if (failed(_result_workloadPerWorkgroup)) { + parser.emitError( + parser.getCurrentLocation(), + "failed to parse IREECodegen_TranslationInfoAttr parameter " + "'workloadPerWorkgroup' which is to be a `ArrayAttr`"); + return {}; + } + // Parse literal '>' + if (parser.parseGreater()) return {}; + return TranslationInfoAttr::get(parser.getContext(), + _result_passPipeline.getValue(), + _result_workloadPerWorkgroup.getValue()); +} + +void TranslationInfoAttr::print(::mlir::DialectAsmPrinter &printer) const { + printer << "translation.info"; + printer << "<"; + printer << getPassPipeline(); + printer << ","; + printer << ' ' << "workload_per_wg"; + printer << ' ' << "="; + printer << ' '; + printer << getWorkloadPerWorkgroup(); + printer << ">"; +} + +//===----------------------------------------------------------------------===// +// iree_codegen.lowering.config +//===----------------------------------------------------------------------===// + +LoweringConfigAttr LoweringConfigAttr::get(MLIRContext *context, + TileSizesListTypeRef tileSizes, + ArrayRef nativeVectorSize) { + auto attrList = llvm::to_vector<4>( + llvm::map_range(tileSizes, [&](ArrayRef sizes) -> Attribute { + return getI64IntegerArrayAttr(context, sizes); + })); + ArrayAttr tileSizesAttr = ArrayAttr::get(context, attrList); + ArrayAttr nativeVectorSizeAttr = + getI64IntegerArrayAttr(context, nativeVectorSize); + return get(context, tileSizesAttr, nativeVectorSizeAttr); +} + +TileSizesListType LoweringConfigAttr::getTileSizeVals() { + auto tileSizesAttr = getTileSizes(); + if (!tileSizesAttr) return {}; + TileSizesListType tileSizes; + for (auto attr : tileSizesAttr) { + auto vals = getIntegerVals(attr.cast()); + tileSizes.emplace_back(std::move(vals)); + } + return tileSizes; +} + +SmallVector LoweringConfigAttr::getTileSizeVals(unsigned level) { + ArrayAttr tileSizesAttr = getTileSizes(); + if (!tileSizesAttr || tileSizesAttr.size() <= level) return {}; + return getIntegerVals(tileSizesAttr[level].cast()); +} + +SmallVector LoweringConfigAttr::getNativeVectorSizeVals() { + ArrayAttr nativeVectorSizeAttr = getNativeVectorSize(); + if (!nativeVectorSizeAttr) return {}; + return getIntegerVals(nativeVectorSizeAttr); +} + +LogicalResult LoweringConfigAttr::verify( + function_ref emitError, ArrayAttr tileSizes, + ArrayAttr nativeVectorSize) { + if (!tileSizes) { + return emitError() << "expected tile_sizes to be specified (even is " + "specified as empty)"; + } + if (llvm::any_of(tileSizes, [](Attribute attr) { + auto arrayAttr = attr.dyn_cast(); + return !arrayAttr || !checkIntegerArrayAttr(arrayAttr); + })) { + return emitError() + << "expected all elements of tile_sizes to be a list of integers"; + } + if (!nativeVectorSize) { + return emitError() << "expected native_vector_size to be specified (even " + "if specified as empty)"; + } + if (!checkIntegerArrayAttr(nativeVectorSize)) { + return emitError() + << "expected native_vector_size to be a list of integer values"; + } + return success(); +} + +::mlir::Attribute LoweringConfigAttr::parse(::mlir::DialectAsmParser &parser, + ::mlir::Type attrType) { + ::mlir::FailureOr _result_tileSizes; + ::mlir::FailureOr _result_nativeVectorSize; + // Parse literal '<' + if (parser.parseLess()) return {}; + // Parse literal 'tile_sizes' + if (parser.parseKeyword("tile_sizes")) return {}; + // Parse literal '=' + if (parser.parseEqual()) return {}; + // Parse variable 'tileSizes' + _result_tileSizes = IREEFieldParser::parse(parser); + if (failed(_result_tileSizes)) { + parser.emitError(parser.getCurrentLocation(), + "failed to parse IREECodegen_LoweringConfigAttr parameter " + "'tileSizes' which is to be a `ArrayAttr`"); + return {}; + } + // Parse literal ',' + if (parser.parseComma()) return {}; + // Parse literal 'native_vector_size' + if (parser.parseKeyword("native_vector_size")) return {}; + // Parse literal '=' + if (parser.parseEqual()) return {}; + // Parse variable 'nativeVectorSize' + _result_nativeVectorSize = IREEFieldParser::parse(parser); + if (failed(_result_nativeVectorSize)) { + parser.emitError(parser.getCurrentLocation(), + "failed to parse IREECodegen_LoweringConfigAttr parameter " + "'nativeVectorSize' which is to be a `ArrayAttr`"); + return {}; + } + // Parse literal '>' + if (parser.parseGreater()) return {}; + return LoweringConfigAttr::get(parser.getContext(), + _result_tileSizes.getValue(), + _result_nativeVectorSize.getValue()); +} + +void LoweringConfigAttr::print(::mlir::DialectAsmPrinter &printer) const { + printer << "lowering.config"; + printer << "<"; + printer << "tile_sizes"; + printer << ' ' << "="; + printer << ' '; + printer << getTileSizes(); + printer << ","; + printer << ' ' << "native_vector_size"; + printer << ' ' << "="; + printer << ' '; + printer << getNativeVectorSize(); + printer << ">"; +} + +//===----------------------------------------------------------------------===// +// iree.compilation.info +//===----------------------------------------------------------------------===// + +CompilationInfoAttr CompilationInfoAttr::get(MLIRContext *context, + TileSizesListTypeRef tileSizes, + ArrayRef nativeVectorSize, + ArrayRef workgroupSize) { + LoweringConfigAttr configAttr = + LoweringConfigAttr::get(context, tileSizes, nativeVectorSize); + TranslationInfoAttr translationInfo = + TranslationInfoAttr::get(context, DispatchLoweringPassPipeline::None); + ArrayAttr workgroupSizeAttr = getI64IntegerArrayAttr(context, workgroupSize); + return get(context, configAttr, translationInfo, workgroupSizeAttr); +} + +CompilationInfoAttr CompilationInfoAttr::get( + MLIRContext *context, TileSizesListTypeRef tileSizes, + ArrayRef nativeVectorSize, + DispatchLoweringPassPipeline passPipeline, + ArrayRef workloadPerWorkgroup, ArrayRef workgroupSize) { + LoweringConfigAttr configAttr = + LoweringConfigAttr::get(context, tileSizes, nativeVectorSize); + TranslationInfoAttr translationInfoAttr = + TranslationInfoAttr::get(context, passPipeline, workloadPerWorkgroup); + ArrayAttr workgroupSizeAttr = getI64IntegerArrayAttr(context, workgroupSize); + return get(context, configAttr, translationInfoAttr, workgroupSizeAttr); +} + +LogicalResult CompilationInfoAttr::verify( + function_ref emitError, + LoweringConfigAttr loweringConfig, TranslationInfoAttr translationInfo, + ArrayAttr workgroupSize) { + if (!loweringConfig) { + return emitError() << "missing lowering config"; + } + if (failed( + LoweringConfigAttr::verify(emitError, loweringConfig.getTileSizes(), + loweringConfig.getNativeVectorSize()))) { + return failure(); + } + if (!translationInfo) { + return emitError() << "missing translation info"; + } + if (failed(TranslationInfoAttr::verify( + emitError, translationInfo.getPassPipeline(), + translationInfo.getWorkloadPerWorkgroup()))) { + return failure(); + } + if (!workgroupSize) { + return emitError() << "expected workgroup_size to be specified (even if " + "specified empty)"; + } + if (!checkIntegerArrayAttr(workgroupSize)) { + return emitError() << "expected workgroup_size to be a list of integers"; + } + return success(); +} + +/// Parser method that is copied from the auto-generated using `assemblyFormat` +/// available with patch D111594. Replace after that change is in IREE. +::mlir::Attribute CompilationInfoAttr::parse(::mlir::DialectAsmParser &parser, + ::mlir::Type attrType) { + ::mlir::FailureOr _result_loweringConfig; + ::mlir::FailureOr _result_translationInfo; + ::mlir::FailureOr _result_workgroupSize; + // Parse literal '<' + if (parser.parseLess()) return {}; + // Parse variable 'loweringConfig' + _result_loweringConfig = IREEFieldParser::parse(parser); + if (failed(_result_loweringConfig)) { + parser.emitError( + parser.getCurrentLocation(), + "failed to parse IREECodegen_CompilationInfoAttr parameter " + "'loweringConfig' which is to be a `LoweringConfigAttr`"); + return {}; + } + // Parse literal ',' + if (parser.parseComma()) return {}; + // Parse variable 'translationInfo' + _result_translationInfo = IREEFieldParser::parse(parser); + if (failed(_result_translationInfo)) { + parser.emitError( + parser.getCurrentLocation(), + "failed to parse IREECodegen_CompilationInfoAttr parameter " + "'translationInfo' which is to be a `TranslationInfoAttr`"); + return {}; + } + // Parse literal ',' + if (parser.parseComma()) return {}; + // Parse literal 'workgroup_size' + if (parser.parseKeyword("workgroup_size")) return {}; + // Parse literal '=' + if (parser.parseEqual()) return {}; + // Parse variable 'workgroupSize' + _result_workgroupSize = IREEFieldParser::parse(parser); + if (failed(_result_workgroupSize)) { + parser.emitError(parser.getCurrentLocation(), + "failed to parse IREECodegen_CompilationInfoAttr " + "parameter 'workgroupSize' which is to be a `ArrayAttr`"); + return {}; + } + // Parse literal '>' + if (parser.parseGreater()) return {}; + return CompilationInfoAttr::get( + parser.getContext(), _result_loweringConfig.getValue(), + _result_translationInfo.getValue(), _result_workgroupSize.getValue()); +} + +/// Printer method that is copied from the auto-generated using `assemblyFormat` +/// available with patch D111594. Replace after that change is in IREE. +void CompilationInfoAttr::print(::mlir::DialectAsmPrinter &printer) const { + printer << "compilation.info"; + printer << "<"; + printer << getLoweringConfig(); + printer << ","; + printer << ' '; + printer << getTranslationInfo(); + printer << ","; + printer << ' ' << "workgroup_size"; + printer << ' ' << "="; + printer << ' '; + printer << getWorkgroupSize(); + printer << ">"; +} + +SmallVector CompilationInfoAttr::getWorkgroupSizeVals() { + ArrayAttr workgroupSizeAttr = getWorkgroupSize(); + if (!workgroupSizeAttr) return {}; + return getIntegerVals(workgroupSizeAttr); +} + +//===----------------------------------------------------------------------===// +// Initialize attributes +//===----------------------------------------------------------------------===// + +void IREECodegenDialect::initializeCodegenAttrs() { + addAttributes< +#define GET_ATTRDEF_LIST +#include "iree/compiler/Codegen/Dialect/LoweringConfig.cpp.inc" // IWYU pragma: keeep + >(); +} + +OptionalParseResult IREECodegenDialect::parseCodegenAttrs( + DialectAsmParser &parser, StringRef mnemonic, Type type, + Attribute &value) const { + return generatedAttributeParser(parser, mnemonic, type, value); +} + +LogicalResult IREECodegenDialect::printCodegenAttrs( + Attribute attr, DialectAsmPrinter &p) const { + return generatedAttributePrinter(attr, p); +} + +} // namespace Codegen +} // namespace IREE + +//===----------------------------------------------------------------------===// +// Helpers for getting/setting iree_codegen.translation.info attribute on the +// `hal.executable.entry_point` +// ===----------------------------------------------------------------------===// + +IREE::Codegen::TranslationInfoAttr getTranslationInfo( + IREE::HAL::ExecutableEntryPointOp entryPointOp) { + return entryPointOp->getAttrOfType( + kTranslationInfoAttrName); +} + +SmallVector getWorkgroupSize( + IREE::HAL::ExecutableEntryPointOp entryPointOp) { + if (Optional workgroupSizeAttrList = + entryPointOp.workgroup_size()) { + return getIntegerVals(*workgroupSizeAttrList); + } + return {}; +} + +void setTranslationInfo(IREE::HAL::ExecutableEntryPointOp entryPointOp, + IREE::Codegen::TranslationInfoAttr translationInfo, + ArrayRef workgroupSize) { + entryPointOp->setAttr(kTranslationInfoAttrName, translationInfo); + // The workgroup size is set on the entry point op directly. + if (!workgroupSize.empty()) { + MLIRContext *context = entryPointOp->getContext(); + auto attrs = getIndexIntegerArrayAttr(context, workgroupSize); + entryPointOp.workgroup_sizeAttr(attrs); + } +} + +//===----------------------------------------------------------------------===// +// Helpers for getting/setting `iree_codegen.lowering.config` attribute on root +// operations. +// ===----------------------------------------------------------------------===// + +IREE::Codegen::LoweringConfigAttr getLoweringConfig(Operation *op) { + return op->getAttrOfType(kConfigAttrName); +} + +SmallVector getTileSizes(Operation *op, unsigned level) { + IREE::Codegen::LoweringConfigAttr configAttr = getLoweringConfig(op); + if (!configAttr) return {}; + return configAttr.getTileSizeVals(level); +} +SmallVector getTileSizes(OpBuilder &b, Operation *op, + unsigned level) { + return llvm::to_vector<4>( + llvm::map_range(getTileSizes(op, level), [&](int64_t t) -> Value { + return b.create(op->getLoc(), t); + })); +} + +void setLoweringConfig(Operation *op, + IREE::Codegen::LoweringConfigAttr config) { + op->setAttr(kConfigAttrName, config); +} + +LogicalResult setOpConfigAndEntryPointFnTranslation( + FuncOp entryPointFn, Operation *op, + IREE::Codegen::LoweringConfigAttr config, + IREE::Codegen::DispatchLoweringPassPipeline passPipeline, + ArrayRef workgroupSize) { + auto partitionedLoops = getPartitionedLoops(op); + SmallVector workloadPerWorkgroup; + auto tileSizes = config.getTileSizeVals(0); + if (!tileSizes.empty() && !partitionedLoops.empty()) { + for (unsigned depth : partitionedLoops) { + if (depth >= tileSizes.size()) { + return op->emitOpError( + "illegal configuration for lowering op, expect first level " + "tile size to contain at least ") + << partitionedLoops.back() << " elements"; + } + if (tileSizes[depth] == 0) { + return op->emitOpError("illegal to set tilesize of loop ") + << depth + << " to zero since it is set to be partitioned at the flow " + "level"; + } + workloadPerWorkgroup.push_back(tileSizes[depth]); + } + if (!workloadPerWorkgroup.empty()) { + workloadPerWorkgroup = + llvm::to_vector<3>(llvm::reverse(workloadPerWorkgroup)); + } + } + auto entryPointOp = getEntryPoint(entryPointFn); + if (!entryPointOp) { + return entryPointFn.emitOpError( + "unable to find entry point op for entry point function"); + } + auto translationInfo = IREE::Codegen::TranslationInfoAttr::get( + entryPointOp->getContext(), passPipeline, workloadPerWorkgroup); + setTranslationInfo(entryPointOp, translationInfo, workgroupSize); + return success(); +} + +//===----------------------------------------------------------------------===// +// Helpers for getting/setting `iree_codegen.compilation.info` attribute on root +// operations to override IREEs default compilation. +// ===----------------------------------------------------------------------===// + +IREE::Codegen::CompilationInfoAttr getCompilationInfo(Operation *op) { + return op->getAttrOfType( + kCompilationInfoAttrName); +} + +void setCompilationInfo(Operation *op, + IREE::Codegen::CompilationInfoAttr config) { + op->setAttr(kCompilationInfoAttrName, config); +} + +void eraseCompilationInfo(Operation *op) { + op->removeAttr(kCompilationInfoAttrName); +} + +} // namespace iree_compiler +} // namespace mlir diff --git a/iree/compiler/Codegen/Dialect/LoweringConfig.h b/iree/compiler/Codegen/Dialect/LoweringConfig.h new file mode 100644 index 000000000000..6d99215f7e8f --- /dev/null +++ b/iree/compiler/Codegen/Dialect/LoweringConfig.h @@ -0,0 +1,152 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +//===- LoweringConfig.h - Declares configuration for lowering Linalg ops --===// +// +// This file declares an attribute that drives how a dispatch region containing +// a set of operations are lowered. The attribute itself is attached to Linalg +// operations, and help converting a Linalg operation into "scalar code". +// +//===----------------------------------------------------------------------===// + +#ifndef IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_ +#define IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_ + +#include "iree/compiler/Codegen/Utils/Utils.h" +#include "iree/compiler/Dialect/HAL/IR/HALOps.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" + +namespace mlir { +namespace iree_compiler { +/// Typedef for tile sizes to use at different levels of tiling. +using TileSizesListType = SmallVector>; +using TileSizesListTypeRef = ArrayRef>; +} // namespace iree_compiler +} // namespace mlir + +// clang-format off +#include "iree/compiler/Codegen/Dialect/LoweringConfigEnums.h.inc" +#define GET_ATTRDEF_CLASSES +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h.inc" +// clang-format on + +namespace mlir { +namespace iree_compiler { +//===----------------------------------------------------------------------===// +// Helpers for getting/setting iree_codegen.translation.info attribute on the +// `hal.executable.entry_point` +// ===----------------------------------------------------------------------===// + +/// Gets the translate executable info attribute value associated with +/// `entryPointOp`. It expects that the attribute is stored using the identifier +/// `translation.info`. +IREE::Codegen::TranslationInfoAttr getTranslationInfo( + IREE::HAL::ExecutableEntryPointOp entryPointOp); +/// Returns the translation info for the `funcOp` (by looking at the entry +/// point). Returns `nullptr` on failure. +inline IREE::Codegen::TranslationInfoAttr getTranslationInfo(FuncOp funcOp) { + auto entryPointOp = getEntryPoint(funcOp); + if (!entryPointOp) return nullptr; + return getTranslationInfo(entryPointOp); +} + +/// Returns the workgroup size specified on the `entryPointOp`. +SmallVector getWorkgroupSize( + IREE::HAL::ExecutableEntryPointOp entryPointOp); + +/// Set the translate executable info with the entry point op. Overwrites the +/// existing attributes. +// TODO(ravishankarm, benvanik): Eventually all the information needed for the +// lowering will be consolidated into a single attribute with richer +// information. +void setTranslationInfo(IREE::HAL::ExecutableEntryPointOp entryPointOp, + IREE::Codegen::TranslationInfoAttr translationInfo, + ArrayRef workgroupSize = {}); +inline void setTranslationInfo( + FuncOp entryPointFn, IREE::Codegen::TranslationInfoAttr translationInfo, + ArrayRef workgroupSize = {}) { + auto entryPointOp = getEntryPoint(entryPointFn); + return setTranslationInfo(entryPointOp, translationInfo, workgroupSize); +} + +/// Sets the translation info on the `hal.executable.entry_point` op +/// corresponding to the `entryPointFn`. Returns failure if a translation info +/// is already set on the entry point op and is incompatible with what is being +/// set. +inline void setTranslationInfo( + FuncOp entryPointFn, + IREE::Codegen::DispatchLoweringPassPipeline passPipeline, + ArrayRef workloadPerWorkgroup, ArrayRef workgroupSize) { + auto entryPointOp = getEntryPoint(entryPointFn); + MLIRContext *context = entryPointFn.getContext(); + auto translationInfo = IREE::Codegen::TranslationInfoAttr::get( + context, passPipeline, workloadPerWorkgroup); + setTranslationInfo(entryPointOp, translationInfo, workgroupSize); +} + +//===----------------------------------------------------------------------===// +// Helpers for getting/setting `iree_codegen.lowering.config` attribute on root +// operations. +// ===----------------------------------------------------------------------===// + +/// Returns the lowering configuration set for an operation. Returns `nullptr` +/// if no value is set. It expects that the attribute is stored using the +/// identifier `lowering.config`. +IREE::Codegen::LoweringConfigAttr getLoweringConfig(Operation *op); + +/// Returns the tile sizes for a particular operation if the +/// `iree_codegen.lowering.config` attribute is set on it. +SmallVector getTileSizes(Operation *op, unsigned level); +SmallVector getTileSizes(OpBuilder &b, Operation *op, unsigned level); + +/// Sets the lowering configuration, overwriting existing attribute values. +void setLoweringConfig(Operation *op, IREE::Codegen::LoweringConfigAttr config); + +/// Sets translation for the entry-point function based on op configuration. +LogicalResult setOpConfigAndEntryPointFnTranslation( + FuncOp entryPointFn, Operation *op, + IREE::Codegen::LoweringConfigAttr config, + IREE::Codegen::DispatchLoweringPassPipeline passPipeline, + ArrayRef workgroupSize = {}); +inline LogicalResult setOpConfigAndEntryPointFnTranslation( + FuncOp entryPointFn, Operation *op, TileSizesListTypeRef tileSizes, + ArrayRef nativeVectorSize, + IREE::Codegen::DispatchLoweringPassPipeline passPipeline, + ArrayRef workgroupSize = {}) { + MLIRContext *context = entryPointFn.getContext(); + auto config = IREE::Codegen::LoweringConfigAttr::get(context, tileSizes, + nativeVectorSize); + setLoweringConfig(op, config); + return setOpConfigAndEntryPointFnTranslation(entryPointFn, op, config, + passPipeline, workgroupSize); +} + +//===----------------------------------------------------------------------===// +// Helpers for getting/setting `iree_codegen.compilation.info` attribute on root +// operations to override IREEs default compilation. +// ===----------------------------------------------------------------------===// + +/// Returns the `#iree_codegen.compilation.info` set on the operation. Assumes +/// that the identifier used is `compilation.info`. +IREE::Codegen::CompilationInfoAttr getCompilationInfo(Operation *op); + +/// Sets the `config` to use for compiling the operation. If `op` is the root +/// operation of the dispatch region, overrides the default configuration that +/// is used for compilation. +void setCompilationInfo(Operation *op, + IREE::Codegen::CompilationInfoAttr config); + +/// Removes the `#iree_codegen.compilation.info` attribute that is set on the +/// operation. +void eraseCompilationInfo(Operation *op); + +} // namespace iree_compiler +} // namespace mlir + +#endif // IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_ diff --git a/iree/compiler/Codegen/Dialect/LoweringConfig.td b/iree/compiler/Codegen/Dialect/LoweringConfig.td new file mode 100644 index 000000000000..2e7b5470de7e --- /dev/null +++ b/iree/compiler/Codegen/Dialect/LoweringConfig.td @@ -0,0 +1,190 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef IREE_COMPILER_CODEGEN_DIALECT_LOWERINGCONFIG +#define IREE_COMPILER_CODEGEN_DIALECT_LOWERINGCONFIG + +include "iree/compiler/Codegen/Dialect/IREECodegenDialect.td" + +// List of pre-existing pipelines for translating executables. +def CPU_Default + : StrEnumAttrCase<"CPUDefault">; +def CPU_Vectorization + : StrEnumAttrCase<"CPUVectorization">; +def CPU_TensorToVectors + : StrEnumAttrCase<"CPUTensorToVectors">; + +def LLVMGPU_SimpleDistribute + : StrEnumAttrCase<"LLVMGPUDistribute">; +def LLVMGPU_Vectorize + : StrEnumAttrCase<"LLVMGPUVectorize">; +def LLVMGPU_MatmulSimt + : StrEnumAttrCase<"LLVMGPUMatmulSimt">; + +def SPIRV_SimpleDistribute + : StrEnumAttrCase<"SPIRVDistribute">; +def SPIRV_Vectorize + : StrEnumAttrCase<"SPIRVVectorize">; +def SPIRV_VectorizeToCooperativeOps + : StrEnumAttrCase<"SPIRVVectorizeToCooperativeOps">; + +def None + : StrEnumAttrCase<"None">; + +// EnumAttrCase for all known lowerings for ops within dispatch region +// to scalar/native-vector code. +def DispatchLoweringPassPipelineEnum : StrEnumAttr< + "DispatchLoweringPassPipeline", + "identifier for pass pipeline use to lower dispatch region", + [CPU_Default, CPU_TensorToVectors, CPU_Vectorization, + LLVMGPU_SimpleDistribute, LLVMGPU_Vectorize, LLVMGPU_MatmulSimt, + SPIRV_SimpleDistribute, SPIRV_Vectorize, + SPIRV_VectorizeToCooperativeOps, None]> { + let cppNamespace = "::mlir::iree_compiler::IREE::Codegen"; +} + +def IREECodegen_TranslationInfoAttr : + AttrDef { + let mnemonic = "translation.info"; + let summary = [{drive dispatch entry point lowering}]; + let description = [{ + Specifies the information that is used to drive the translation of + an entry point function using Linalg based structured-op + lowering.. During executable translation this is attached to the + `hal.executable.entry_point` operation. + + If this operation is already set on the root operation (as part of + `iree_codegen.compilation.info`) that drives the compilation of a + dispatch region (like `linalg.matmul`/`linalg.*conv*`), this + attribute gets propagated to the entry point function. + + The fields are + - `passPipeline` : The pass pipeline to use. + - `workloadPerWorkgroup` : Specifies how much of the original + `workload` is handled by a workgroup along `x`, `y` and `z`. If + left empty it implies that that there is a single workgroup that + does the entire `workload`. + + }]; + + // TODO(ravishankarm): Commented out till patch D111594 lands. + // let assemblyFormat = [{ + // `<` $passPipeline `,` `workload_per_wg` `=` $workloadPerWorkgroup `>` + // }]; + + let parameters = (ins + AttrParameter<"StringAttr", "">:$passPipeline, + AttrParameter<"ArrayAttr", "">:$workloadPerWorkgroup + ); + let builders = [ + AttrBuilder<(ins "DispatchLoweringPassPipeline":$passPipeline, + CArg<"ArrayRef", "{}">:$workloadPerWorkgroup)> + ]; + let extraClassDeclaration = [{ + // Returns the lowering pass pipeline set. + DispatchLoweringPassPipeline getDispatchLoweringPassPipeline(); + + // Returns values of the workloadPerWorkgroup field if set. + SmallVector getWorkloadPerWorkgroupVals(); + }]; + let genVerifyDecl = 1; +} + +def IREECodegen_LoweringConfigAttr : + AttrDef { + let mnemonic = "lowering.config"; + let summary = [{drive lowering of an operation within dispatch region}]; + let description = [{ + Specifies the information that is used by backend compiler to + translate an operation to scalar code. The way the information is + used is specific to each backend (indeed specific to the pass + pipeline used) to compile that operation. + + TODO: Currently there is no verification that the configuration + specifies everything needed for a pass-pipeline. The values to set + for these parameters is dependent on the pass-pipeline + implementation. In future, each pass pipeline could verify that + the lowering configuration has all the necessary attributes for + the pipeline. + + }]; + + // TODO(ravishankarm): Commented out till patch D111594 lands. + // let assemblyFormat = [{ + // `<` `tile_sizes` `=` $tileSizes `,` `native_vector_size` `=` $nativeVectorSize `>` + // }]; + + let parameters = (ins + AttrParameter<"ArrayAttr", "">:$tileSizes, + AttrParameter<"ArrayAttr", "">:$nativeVectorSize + ); + let builders = [ + AttrBuilder<(ins "TileSizesListTypeRef":$tileSizes, + CArg<"ArrayRef", "{}">:$nativeVectorSize)> + ]; + let extraClassDeclaration = [{ + // Returns the tile sizes for all levels set for the op. + TileSizesListType getTileSizeVals(); + + // Returns the tile sizes for a level set for the op. + SmallVector getTileSizeVals(unsigned level = 0); + + // Returns the native vector size to use. + SmallVector getNativeVectorSizeVals(); + }]; + let genVerifyDecl = 1; +} + +def IREECodegen_CompilationInfoAttr : + AttrDef { + let mnemonic = "compilation.info"; + let summary = [{drive lowering of an operation from input dialect}]; + let description = [{ + Specifies the information that allows controlling the compilation + of operations like `linalg.matmul`/`linalg.*conv` within + IREE. This information is used to override the defaults used by + the IREE compiler. Currently it is only valid to set this on + `linalg.matmul`/`linalg.*conv*` operations. + + TODO: It is expected that the `TranslationInfoAttr` and the + `LoweringConfigAttr` are specified. Currently there is no + verification that the values of the `LoweringConfigAttr` fully + specifies the behaviour of the compilation path chosen with + `TranslationInfoAttr`. This could be added in the future. Note: + Typically the values used for the first-level tiling in + `LoweringConfigAttr` and `workload_per_wg` value in the + `TranslationInfoAttr` are the same since the first-level of tile + + distribute is already done at the `Flow` level. This verification + is also a TODO. + }]; + let parameters = (ins + AttrParameter<"LoweringConfigAttr", "">:$loweringConfig, + AttrParameter<"TranslationInfoAttr", "">:$translationInfo, + AttrParameter<"ArrayAttr", "">:$workgroupSize + ); + + // TODO(ravishankarm): Commented out till patch D111594 lands. + // let assemblyFormat = [{ + // `<` $loweringConfig `,` $translationInfo `,` `workgroup_size` `=` $workgroupSize `>` + // }]; + + let builders = [ + AttrBuilder<(ins "TileSizesListTypeRef":$tileSizes, + "ArrayRef":$nativeVectorSize, + CArg<"ArrayRef", "{}">:$workgroupSize)>, + AttrBuilder<(ins "TileSizesListTypeRef":$tileSizes, + "ArrayRef":$nativeVectorSize, + "DispatchLoweringPassPipeline":$passPipeline, + "ArrayRef":$workloadPerWorkgroup, + CArg<"ArrayRef", "{}">:$workgroupSize)>, + ]; + let extraClassDeclaration = [{ + SmallVector getWorkgroupSizeVals(); + }]; + let genVerifyDecl = 1; +} + +#endif // IREE_COMPILER_CODEGEN_DIALECT_LOWERINGCONFIG diff --git a/iree/compiler/Codegen/Dialect/test/BUILD b/iree/compiler/Codegen/Dialect/test/BUILD new file mode 100644 index 000000000000..704286c9afdd --- /dev/null +++ b/iree/compiler/Codegen/Dialect/test/BUILD @@ -0,0 +1,30 @@ +# Copyright 2021 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# Tests for common transforms. + +load("//iree:lit_test.bzl", "iree_lit_test_suite") +load("//build_tools/bazel:enforce_glob.bzl", "enforce_glob") + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], + licenses = ["notice"], # Apache 2.0 +) + +iree_lit_test_suite( + name = "lit", + srcs = enforce_glob( + [ + "lowering_config_attr.mlir", + ], + include = ["*.mlir"], + ), + data = [ + "//iree/tools:IreeFileCheck", + "//iree/tools:iree-opt", + ], +) diff --git a/iree/compiler/Codegen/Dialect/test/CMakeLists.txt b/iree/compiler/Codegen/Dialect/test/CMakeLists.txt new file mode 100644 index 000000000000..4de932a994f6 --- /dev/null +++ b/iree/compiler/Codegen/Dialect/test/CMakeLists.txt @@ -0,0 +1,23 @@ +################################################################################ +# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # +# iree/compiler/Codegen/Dialect/test/BUILD # +# # +# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # +# CMake-only content. # +# # +# To disable autogeneration for this file entirely, delete this header. # +################################################################################ + +iree_add_all_subdirs() + +iree_lit_test_suite( + NAME + lit + SRCS + "lowering_config_attr.mlir" + DATA + iree::tools::IreeFileCheck + iree::tools::iree-opt +) + +### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/iree/compiler/Codegen/Dialect/test/lowering_config_attr.mlir b/iree/compiler/Codegen/Dialect/test/lowering_config_attr.mlir new file mode 100644 index 000000000000..363999672367 --- /dev/null +++ b/iree/compiler/Codegen/Dialect/test/lowering_config_attr.mlir @@ -0,0 +1,37 @@ +// RUN: iree-opt -split-input-file %s | IreeFileCheck %s + +module attributes { + translation.info = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [32, 42]> +} { } +// CHECK: #translation = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [32, 42]> + +// ----- + +module attributes { + translation.info = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []> +} { } +// CHECK: #translation = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []> + +// ----- + +module attributes { + lowering.config = #iree_codegen.lowering.config +} { } +// CHECK: #config = #iree_codegen.lowering.config + +// ----- + +module attributes { + lowering.config = #iree_codegen.lowering.config +} { } +// CHECK: #config = #iree_codegen.lowering.config + +// ----- + +module attributes { + compilation.info = #iree_codegen.compilation.info< + #iree_codegen.lowering.config, + #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []>, + workgroup_size = []> +} { } +// CHECK: #compilation = #iree_codegen.compilation.info<#iree_codegen.lowering.config, #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []>, workgroup_size = []> \ No newline at end of file diff --git a/iree/compiler/Codegen/LLVMCPU/BUILD b/iree/compiler/Codegen/LLVMCPU/BUILD index 0d410558c28e..21e280217659 100644 --- a/iree/compiler/Codegen/LLVMCPU/BUILD +++ b/iree/compiler/Codegen/LLVMCPU/BUILD @@ -29,6 +29,7 @@ cc_library( deps = [ "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/Transforms", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/Flow/IR", diff --git a/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt b/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt index 50207fdd49f7..44d43b1becd7 100644 --- a/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt +++ b/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt @@ -57,6 +57,7 @@ iree_cc_library( MLIRVectorToLLVM MLIRVectorToSCF iree::compiler::Codegen::Common + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::Transforms iree::compiler::Codegen::Utils diff --git a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp index 3504b5bd94ae..dbbdccad2f74 100644 --- a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp +++ b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp @@ -8,9 +8,7 @@ #include "iree/compiler/Codegen/Transforms/Transforms.h" #include "iree/compiler/Codegen/Utils/MarkerUtils.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/Flow/IR/FlowOps.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/CommandLine.h" @@ -227,8 +225,8 @@ static LogicalResult setDefaultLaunchConfig( getDefaultWorkloadPerWorkgroup(tiledLoops, nativeVectorSizeInElements); setTranslationInfo( - entryPointFn, IREE::HAL::DispatchLoweringPassPipeline::CPUDefault, - /*workgroupSize =*/ArrayRef{}, workloadPerWorkgroup); + entryPointFn, IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault, + workloadPerWorkgroup, /*workgroupSize =*/ArrayRef{}); return success(); } @@ -299,8 +297,9 @@ static LogicalResult setRootConfig(FuncOp entryPointFn, vectorSizeVals[i]); } setTranslationInfo( - entryPointFn, IREE::HAL::DispatchLoweringPassPipeline::CPUTensorToVectors, - /*workgroupSize =*/ArrayRef{}, workloadPerWorkgroup); + entryPointFn, + IREE::Codegen::DispatchLoweringPassPipeline::CPUTensorToVectors, + workloadPerWorkgroup, /*workgroupSize =*/ArrayRef{}); SmallVector l1TileSizes, vectorTileSizes; if (isBatchMatmul) { @@ -321,8 +320,8 @@ static LogicalResult setRootConfig(FuncOp entryPointFn, // level tiling. tileSizes.emplace_back(std::move(l1TileSizes)); tileSizes.emplace_back(std::move(vectorTileSizes)); - IREE::HAL::LoweringConfig config = - buildConfigAttr(tileSizes, vectorSizeVals, entryPointFn.getContext()); + auto config = IREE::Codegen::LoweringConfigAttr::get( + entryPointFn.getContext(), tileSizes, vectorSizeVals); setLoweringConfig(contractionOp, config); return success(); } @@ -368,14 +367,14 @@ static LogicalResult setRootConfig(FuncOp entryPointFn, linalg::Mmt4DOp mmt4dOp, return {1, 1, 1, M0, N0, K0}; }; - SmallVector nativeVectorSize = getVectorSizes(); + SmallVector nativeVectorSize = getVectorSizes(); TileSizesListType tileSizes = {getWorkgroupTileSizes(), getL1TileSizes(), nativeVectorSize}; return setOpConfigAndEntryPointFnTranslation( entryPointFn, mmt4dOp, tileSizes, nativeVectorSize, - IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization); + IREE::Codegen::DispatchLoweringPassPipeline::CPUVectorization); } /// Sets the lowering configuration for dispatch region for linalg_ext.fft @@ -384,8 +383,7 @@ static LogicalResult setRootConfig(FuncOp entryPointFn, linalg_ext::FftOp fftOp, ArrayRef tiledLoops) { auto partitionedLoops = getPartitionedLoops(fftOp); unsigned maxDepth = partitionedLoops.back() + 1; - SmallVector workgroupTileSizes(maxDepth, - defaultWorkgroupTileSize); + SmallVector workgroupTileSizes(maxDepth, defaultWorkgroupTileSize); llvm::DenseSet partitionedLoopsSet(partitionedLoops.begin(), partitionedLoops.end()); for (auto dim : llvm::seq(0, workgroupTileSizes.size())) { @@ -412,7 +410,7 @@ static LogicalResult setRootConfig(FuncOp entryPointFn, linalg_ext::FftOp fftOp, return setOpConfigAndEntryPointFnTranslation( entryPointFn, fftOp, tileSizes, /*nativeVectorSizes=*/ArrayRef{}, - IREE::HAL::DispatchLoweringPassPipeline::CPUDefault); + IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault); } /// Finds the root operation in the given list of linalg operations and sets @@ -454,11 +452,8 @@ static LogicalResult setTranslationInfoAndRootConfig( for (auto computeOp : computeOps) { if (!hasMarker(computeOp, getWorkgroupMarker())) continue; - if (auto config = getLoweringConfig(computeOp)) { - // Check if the op has a preset pipeline. - auto passPipeline = getLoweringPassPipeline(config); - if (!passPipeline) continue; - + if (IREE::Codegen::CompilationInfoAttr compilationInfo = + getCompilationInfo(computeOp)) { // If the function already has a translation, error out. if (auto translationInfo = getTranslationInfo(entryPointFn)) { return computeOp->emitOpError( @@ -466,17 +461,12 @@ static LogicalResult setTranslationInfoAndRootConfig( "info"); } - SmallVector workgroupSize; - if (auto workgroupSizeAttr = config.workgroupSize()) { - workgroupSize = llvm::to_vector<4>( - llvm::map_range(workgroupSizeAttr, [](Attribute intAttr) { - return intAttr.cast().getInt(); - })); - } - if (failed(setOpConfigAndEntryPointFnTranslation( - entryPointFn, computeOp, config, *passPipeline, workgroupSize))) { - return failure(); - } + SmallVector workgroupSize = + compilationInfo.getWorkgroupSizeVals(); + setTranslationInfo(entryPointFn, compilationInfo.getTranslationInfo(), + workgroupSize); + setLoweringConfig(computeOp, compilationInfo.getLoweringConfig()); + eraseCompilationInfo(computeOp); } } diff --git a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h index dbacd1a5c3b4..afb616e8cb08 100644 --- a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h +++ b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h @@ -7,7 +7,7 @@ #ifndef IREE_COMPILER_CODEGEN_LLVMCPU_KERNELDISPATCH_H_ #define IREE_COMPILER_CODEGEN_LLVMCPU_KERNELDISPATCH_H_ -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "mlir/IR/BuiltinOps.h" namespace mlir { diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp index 87bd94ba4313..0732fbbe480a 100644 --- a/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp +++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp @@ -4,10 +4,10 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Codegen/LLVMCPU/KernelDispatch.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -30,8 +30,9 @@ class LLVMCPULowerExecutableTargetPass LLVMCPULowerExecutableTargetPass> { public: void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); } LLVMCPULowerExecutableTargetPass(bool vectorize = true) @@ -123,16 +124,15 @@ void LLVMCPULowerExecutableTargetPass::runOnOperation() { // is fine. llvm::StringMap entryPoints = getAllEntryPoints(moduleOp); - Optional passPipeline; + Optional passPipeline; for (auto &it : entryPoints) { auto entryPointOp = it.second; - if (IREE::HAL::TranslationInfo translationInfo = + if (IREE::Codegen::TranslationInfoAttr translationInfo = getTranslationInfo(entryPointOp)) { - Optional currPipeline = - getLoweringPassPipeline(translationInfo); - if (!currPipeline) continue; + IREE::Codegen::DispatchLoweringPassPipeline currPipeline = + translationInfo.getDispatchLoweringPassPipeline(); if (passPipeline) { - if (currPipeline.getValue() != passPipeline.getValue()) { + if (currPipeline != passPipeline.getValue()) { moduleOp.emitError( "unhandled compilation of entry point function with different " "pass pipelines within a module"); @@ -150,14 +150,14 @@ void LLVMCPULowerExecutableTargetPass::runOnOperation() { OpPassManager &nestedModulePM = executableLoweringPipeline.nest(); switch (passPipeline.getValue()) { - case IREE::HAL::DispatchLoweringPassPipeline::CPUDefault: - case IREE::HAL::DispatchLoweringPassPipeline::None: + case IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault: + case IREE::Codegen::DispatchLoweringPassPipeline::None: addCPUDefaultPassPipeline(nestedModulePM); break; - case IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization: + case IREE::Codegen::DispatchLoweringPassPipeline::CPUVectorization: addCPUVectorizationPassPipeline(nestedModulePM, lowerToVectors); break; - case IREE::HAL::DispatchLoweringPassPipeline::CPUTensorToVectors: + case IREE::Codegen::DispatchLoweringPassPipeline::CPUTensorToVectors: addTensorToVectorsPassPipeline(nestedModulePM, lowerToVectors); break; default: diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp index aa1a3c69d03b..0c77693bb043 100644 --- a/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp +++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp @@ -4,6 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/LLVMCPU/KernelDispatch.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" @@ -20,7 +21,7 @@ #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#define DEBUG_TYPE "iree-linalg-to-llvm-tile-and-pad-workgroups" +#define DEBUG_TYPE "iree-llvmcpu-tile-and-vectorize" namespace mlir { namespace iree_compiler { @@ -72,15 +73,20 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { MLIRContext *context = &getContext(); auto funcOp = getOperation(); - // First level of tiling patterns { + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- Before LLVMCPUTileAndVectorizePass ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); + + // First level of tiling patterns { OwningRewritePatternList l1patterns(&getContext()); l1patterns.insert( context, linalg::LinalgTilingOptions().setTileSizeComputationFunction( - [](OpBuilder &builder, - Operation *operation) -> SmallVector { - return getTileSizes(builder, operation, + [](OpBuilder &builder, Operation *op) -> SmallVector { + return getTileSizes(builder, op, static_cast(TilingLevel::L1Tiles)); }), linalg::LinalgTransformationFilter( @@ -90,6 +96,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(l1patterns)))) { return signalPassFailure(); } + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- After first level of tiling patterns ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } // Apply canoncalization @@ -104,6 +116,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { funcOp, std::move(canonicalizationPatterns)))) { return signalPassFailure(); } + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- After canonicalization ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } // Second level of tiling patterns{ @@ -112,11 +130,9 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { l2patterns.insert( context, linalg::LinalgTilingOptions().setTileSizeComputationFunction( - [](OpBuilder &builder, - Operation *operation) -> SmallVector { + [](OpBuilder &builder, Operation *op) -> SmallVector { return getTileSizes( - builder, operation, - static_cast(TilingLevel::VectorTiles)); + builder, op, static_cast(TilingLevel::VectorTiles)); }), linalg::LinalgTransformationFilter( Identifier::get(getWorkgroupL1TileMarker(), context), @@ -125,7 +141,14 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(l2patterns)))) { return signalPassFailure(); } + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- After second level of tiling patterns ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } + // Apply canoncalization { OwningRewritePatternList canonicalizationPatterns(&getContext()); @@ -138,6 +161,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { funcOp, std::move(canonicalizationPatterns)))) { return signalPassFailure(); } + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- After canonicalization ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } if (!lowerToVectors) { @@ -156,6 +185,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { funcOp, std::move(vectorizationPatterns)))) { return signalPassFailure(); } + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- After vectorization ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } { @@ -165,6 +200,14 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { context); (void)applyPatternsAndFoldGreedily(funcOp, std::move(canonicalizationPatterns)); + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() + << "\n--- After folding consumer add ops into contraction op " + "iteself ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } // Apply vector specific operation lowering. @@ -183,6 +226,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { funcOp, std::move(vectorContractLoweringPatterns)))) { return signalPassFailure(); } + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- After vector specific operatrion lowering ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } } diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp index 2fc018541ea0..53092aec2db4 100644 --- a/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp +++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp @@ -167,9 +167,8 @@ void LLVMCPUVectorizationPass::runOnOperation() { l1patterns.insert( context, linalg::LinalgTilingOptions().setTileSizeComputationFunction( - [](OpBuilder &builder, - Operation *operation) -> SmallVector { - return getTileSizes(builder, operation, + [](OpBuilder &builder, Operation *op) -> SmallVector { + return getTileSizes(builder, op, static_cast(TilingLevel::L1Tiles)); }), linalg::LinalgTransformationFilter( @@ -188,11 +187,9 @@ void LLVMCPUVectorizationPass::runOnOperation() { l2patterns.insert( context, linalg::LinalgTilingOptions().setTileSizeComputationFunction( - [](OpBuilder &builder, - Operation *operation) -> SmallVector { + [](OpBuilder &builder, Operation *op) -> SmallVector { return getTileSizes( - builder, operation, - static_cast(TilingLevel::VectorTiles)); + builder, op, static_cast(TilingLevel::VectorTiles)); }), linalg::LinalgTransformationFilter( Identifier::get(getWorkgroupL1TileMarker(), context), diff --git a/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir b/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir index 91728ceed539..c307c9bc5261 100644 --- a/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir +++ b/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir @@ -59,10 +59,11 @@ hal.executable private @matmul_tensors { } } -// CHECK-DAG: #[[CONFIG:.+]] = {nativeVectorSize = [4, 4, 4], tileSizes = {{\[}}{{\[}}{{\]}}, [32, 32, 32], [4, 4, 4]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUTensorToVectors", workload_per_wg = [64, 64]> // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> // CHECK: hal.executable.entry_point public @matmul_tensors -// CHECK-SAME: translation.info = {passPipeline = "CPUTensorToVectors", workloadPerWorkgroup = [64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: (%[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index) @@ -118,11 +119,10 @@ hal.executable private @add_no_config { } } } - -// CHECK: #[[CONFIG:[a-zA-Z]+]] = {passPipeline = "CPUDefault"} -// CHECK: hal.executable private @add_no_config -// CHECK: hal.executable.entry_point public @add_no_config -// CHECK-SAME: translation.info = #[[CONFIG]] +// CHECK: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []> +// CHECK: hal.executable private @add_no_config +// CHECK: hal.executable.entry_point public @add_no_config +// CHECK-SAME: translation.info = #[[TRANSLATION]] // ----- @@ -192,9 +192,10 @@ hal.executable private @add { } } } +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64]> // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> // CHECK: hal.executable.entry_point public @add -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: (%[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index) @@ -296,8 +297,9 @@ hal.executable private @add4D { } } // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64, 64]> // CHECK: hal.executable.entry_point public @add4D -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: (%[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index) @@ -378,8 +380,9 @@ hal.executable private @batch_matmul_tensors { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {nativeVectorSize = [1, 4, 4, 4], tileSizes = {{\[}}[], [1, 32, 32, 32], [1, 4, 4, 4]{{\]}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUTensorToVectors", workload_per_wg = [64, 64]> // CHECK: hal.executable.entry_point public @batch_matmul_tensors // CHECK-NEXT: (%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index @@ -393,6 +396,10 @@ hal.executable private @batch_matmul_tensors { // ----- +#compilation = #iree_codegen.compilation.info< + #iree_codegen.lowering.config, + #iree_codegen.translation.info<"CPUVectorization", workload_per_wg = [32, 32]>, + workgroup_size = []> hal.executable private @preset_config_matmul_tensors { hal.executable.variant @system_elf_x86_64, target = #hal.executable.target<"llvm", "system-elf-x86_64"> { hal.executable.entry_point @preset_config attributes {interface = @io, ordinal = 0 : index} @@ -427,7 +434,11 @@ hal.executable private @preset_config_matmul_tensors { %14 = affine.min affine_map<(d0)[s0] -> (-d0 + 512, s0)>(%arg1)[%workgroup_size_x] %15 = linalg.init_tensor [%13, %14] : tensor %16 = linalg.fill(%cst, %15) : f32, tensor -> tensor - %17 = linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = {passPipeline = "CPUVectorization", tileSizes = [[32, 32, 32]]}} ins(%8, %10 : tensor, tensor<256x?xf32>) outs(%16 : tensor) -> tensor + %17 = linalg.matmul { + __internal_linalg_transform__ = "workgroup", + compilation.info = #compilation} + ins(%8, %10 : tensor, tensor<256x?xf32>) + outs(%16 : tensor) -> tensor flow.dispatch.tensor.store %17, %2, offsets = [%arg0, %arg1], sizes = [%11, %12], strides = [1, 1] : tensor -> !flow.dispatch.tensor } } @@ -441,11 +452,12 @@ hal.executable private @preset_config_matmul_tensors { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {passPipeline = "CPUVectorization", tileSizes = {{\[}}[32, 32, 32]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 32)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUVectorization", workload_per_wg = [32, 32]> // CHECK: hal.executable.entry_point -// CHECK-SAME: translation.info = {passPipeline = "CPUVectorization", workloadPerWorkgroup = [32, 32]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[NWG_X:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] @@ -511,9 +523,10 @@ hal.executable @tensor_insert { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64]> // CHECK: hal.executable.entry_point public @tensor_insert_slice -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: %[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index @@ -548,12 +561,11 @@ hal.executable private @static_1d_fft_stage2 { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[64]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @static_1d_fft_stage2 -// CHECK-SAME: translation.info = { -// CHECK-SAME: passPipeline = "CPUDefault" -// CHECK-SAME: workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index): // CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index // CHECK-NEXT: %[[T0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] @@ -620,12 +632,11 @@ hal.executable private @static_3d_fft_stage3 { } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[64, 64, 64]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64, 64]> // CHECK: hal.executable.entry_point public @static_3d_fft_stage3 -// CHECK-SAME: translation.info = { -// CHECK-SAME: passPipeline = "CPUDefault" -// CHECK-SAME: workloadPerWorkgroup = [64, 64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index): // CHECK-NEXT: %[[T0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] // CHECK-NEXT: %[[T1:.+]] = affine.apply #[[MAP0]]()[%[[ARG1]]] @@ -700,8 +711,9 @@ hal.executable private @outs_fusion { } } } +// CHECK: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64]> // CHECK: hal.executable.entry_point public @outs_fusion_fn -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // ----- @@ -768,9 +780,10 @@ hal.executable private @conv { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64, 64]> // CHECK: hal.executable.entry_point public @conv attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index) // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]] // CHECK-DAG: %[[D1:.+]] = affine.apply #[[MAP0]]()[%[[ARG1]] @@ -844,8 +857,9 @@ hal.executable private @conv_static { } // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64, 32]> // CHECK: hal.executable.entry_point public @conv_static attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64, 32]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index) // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]] // CHECK-DAG: %[[D1:.+]] = affine.apply #[[MAP0]]()[%[[ARG1]] @@ -902,8 +916,9 @@ hal.executable private @generic_static { } // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [32, 8]> // CHECK: hal.executable.entry_point public @generic_static attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [32, 8]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index) // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]] @@ -960,11 +975,12 @@ hal.executable private @matmul_static { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {nativeVectorSize = [4, 4, 4], tileSizes = {{\[}}[], [28, 8, 24], [4, 4, 4]{{\]}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 28)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUTensorToVectors", workload_per_wg = [8, 28]> // CHECK: hal.executable.entry_point public @matmul_static attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUTensorToVectors", workloadPerWorkgroup = [8, 28]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index) // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] @@ -1035,8 +1051,9 @@ hal.executable private @restrict_num_workgroups { // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 8, 4]> // CHECK: hal.executable.entry_point public @restrict_num_workgroups attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 8, 4]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index) // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] // CHECK-DAG: %[[D1:.+]] = affine.apply #[[MAP1]]()[%[[ARG1]]] @@ -1074,9 +1091,10 @@ hal.executable private @test_exp_0 { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @test_exp_0 attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] @@ -1113,9 +1131,10 @@ hal.executable private @test_exp_1 { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECk-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @test_exp_1 attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] @@ -1152,9 +1171,10 @@ hal.executable private @test_exp_2 { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @test_exp_2 attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] @@ -1191,9 +1211,10 @@ hal.executable private @test_exp_3 { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @test_exp_3 attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] @@ -1230,9 +1251,10 @@ hal.executable private @test_exp_4 { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @test_exp_4 attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] @@ -1269,9 +1291,10 @@ hal.executable private @test_exp_5 { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @test_exp_5 attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] diff --git a/iree/compiler/Codegen/LLVMCPU/test/matmul_vectorization.mlir b/iree/compiler/Codegen/LLVMCPU/test/matmul_vectorization.mlir index e49121783ecb..21e0e9a74a0e 100644 --- a/iree/compiler/Codegen/LLVMCPU/test/matmul_vectorization.mlir +++ b/iree/compiler/Codegen/LLVMCPU/test/matmul_vectorization.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt -pass-pipeline="hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target{use-lowering-pipeline='builtin.func(iree-llvmcpu-vectorization)'}))" -split-input-file %s | IreeFileCheck %s // RUN: iree-opt -pass-pipeline="hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target{use-lowering-pipeline='builtin.func(iree-llvmcpu-vectorization{promote-workgroup-to-full-tiles}),cse'}))" -split-input-file %s | IreeFileCheck %s -check-prefix=CHECK-PROMOTED -#config = {nativeVectorSize = [4, 4, 4], tileSizes = [[64, 64], [32, 32, 32], [4, 4, 4]]} +#config = #iree_codegen.lowering.config hal.executable private @dynamic_matmul { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -114,7 +114,7 @@ hal.executable private @dynamic_matmul { // ----- -#config = {nativeVectorSize = [4, 4, 4], tileSizes = [[64, 64], [32, 32, 32], [4, 4, 4]]} +#config = #iree_codegen.lowering.config hal.executable private @matmul_i8_i8_i32 { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" diff --git a/iree/compiler/Codegen/LLVMCPU/test/tile_and_vectorize.mlir b/iree/compiler/Codegen/LLVMCPU/test/tile_and_vectorize.mlir index ddc4b8fb6d15..4a51e1ad099f 100644 --- a/iree/compiler/Codegen/LLVMCPU/test/tile_and_vectorize.mlir +++ b/iree/compiler/Codegen/LLVMCPU/test/tile_and_vectorize.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt %s -cse -iree-llvmcpu-tile-and-vectorize -cse -canonicalize -split-input-file | IreeFileCheck %s -#config0 = {tileSizes = [[64, 64]]} -#config1 = {nativeVectorSize = [4, 4, 4], tileSizes = [[64, 64], [32, 32, 32], [4, 4, 4]]} +#config0 = #iree_codegen.lowering.config +#config1 = #iree_codegen.lowering.config #map0 = affine_map<()[s0] -> (s0 * 64)> #map1 = affine_map<(d0) -> (64, -d0 + 383)> #map2 = affine_map<(d0) -> (64, -d0 + 513)> diff --git a/iree/compiler/Codegen/LLVMGPU/BUILD b/iree/compiler/Codegen/LLVMGPU/BUILD index e9cc75d421fc..51adc6e585a5 100644 --- a/iree/compiler/Codegen/LLVMGPU/BUILD +++ b/iree/compiler/Codegen/LLVMGPU/BUILD @@ -34,6 +34,7 @@ cc_library( deps = [ "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/Transforms", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/Flow/IR", diff --git a/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt b/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt index cf63b73c3033..f3c058ed86ae 100644 --- a/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt +++ b/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt @@ -61,6 +61,7 @@ iree_cc_library( MLIRVectorToLLVM MLIRVectorToSCF iree::compiler::Codegen::Common + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::Transforms iree::compiler::Codegen::Utils diff --git a/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp b/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp index fa009cd74b6f..0d1ae4e7cdbe 100644 --- a/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp +++ b/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp @@ -8,7 +8,7 @@ #include -#include "iree/compiler/Codegen/Utils/Utils.h" +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Dialect/Flow/IR/FlowOps.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h" #include "llvm/Support/Debug.h" @@ -100,7 +100,7 @@ static LogicalResult setContractConfig(FuncOp entryPoint, linalg::LinalgOp op) { } // Currently just a basic tile size to enable tiling and vectorization. // TODO: pick a more efficient tile size and tile at subgroup level. - SmallVector ts; + SmallVector ts; // Tile all the higher parallel dimension with a size of 1 and the 2 most // inner dimension with the tileX/tileY size. ts.append(op.getNumParallelLoops() - 2, 1); @@ -110,14 +110,14 @@ static LogicalResult setContractConfig(FuncOp entryPoint, linalg::LinalgOp op) { tileSizes.push_back(ts); // Workgroup level. return setOpConfigAndEntryPointFnTranslation( entryPoint, op, tileSizes, /*nativeVectorSizes=*/ArrayRef{}, - IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt, + IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt, workgroupSize); } static LogicalResult setFftConfig(FuncOp entryPoint, linalg_ext::FftOp op) { auto partitionedLoops = getPartitionedLoops(op); unsigned loopDepth = partitionedLoops.back() + 1; - SmallVector workgroupTileSize(loopDepth, 0); + SmallVector workgroupTileSize(loopDepth, 0); SmallVector workgroupSize = {cudaWarpSize, 1, 1}; // Tiling along partitioned loops with size 1. @@ -137,14 +137,14 @@ static LogicalResult setFftConfig(FuncOp entryPoint, linalg_ext::FftOp op) { TileSizesListType tileSizes = {workgroupTileSize}; return setOpConfigAndEntryPointFnTranslation( entryPoint, op, tileSizes, /*nativeVectorSizes=*/ArrayRef{}, - IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute, + IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute, workgroupSize); } // Basic default properties for linalg ops that haven't been tuned. static LogicalResult setRootDefaultConfig(FuncOp entryPoint, Operation *op) { - IREE::HAL::DispatchLoweringPassPipeline passPipeline = - IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute; + IREE::Codegen::DispatchLoweringPassPipeline passPipeline = + IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute; TileSizesListType tileSizes; SmallVector partitionedLoops = getPartitionedLoops(op); if (partitionedLoops.empty()) { @@ -210,43 +210,34 @@ static LogicalResult setRootDefaultConfig(FuncOp entryPoint, Operation *op) { tileSizes.emplace_back(std::move(workgroupTileSizes)); // Workgroup level return setOpConfigAndEntryPointFnTranslation( entryPoint, op, tileSizes, /*nativeVectorSizes=*/ArrayRef{}, - IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUVectorize, workgroupSize); + IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUVectorize, + workgroupSize); } /// Propagate the configuration annotated in the incoming IR. -static LogicalResult setUserConfig(FuncOp entryPointFn, Operation *computeOp, - IREE::HAL::LoweringConfig config) { - IREE::HAL::DispatchLoweringPassPipeline passPipeline = - IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUVectorize; - if (auto setPassPipeline = getLoweringPassPipeline(config)) { - passPipeline = setPassPipeline.getValue(); - } - SmallVector workgroupSize; - if (auto workgroupSizeAttr = config.workgroupSize()) { - workgroupSize = llvm::to_vector<4>( - llvm::map_range(workgroupSizeAttr, [](Attribute intAttr) { - return intAttr.cast().getInt(); - })); +static LogicalResult setUserConfig( + FuncOp entryPointFn, Operation *computeOp, + IREE::Codegen::CompilationInfoAttr compilationInfo) { + if (auto translationInfo = getTranslationInfo(entryPointFn)) { + return computeOp->emitOpError( + "multiple ops within dispatch trying to set the translation " + "info"); } - if (failed(setOpConfigAndEntryPointFnTranslation( - entryPointFn, computeOp, config, passPipeline, workgroupSize))) { - return failure(); - } - // Reset the op configuration to drop the pass-pipeline and workgroup size - // info. The op does not carry that information anymore. - auto resetConfig = IREE::HAL::LoweringConfig::get( - config.tileSizes(), config.nativeVectorSize(), - /*passPipeline =*/nullptr, - /*workgroupSize =*/nullptr, computeOp->getContext()); - setLoweringConfig(computeOp, resetConfig); + + SmallVector workgroupSize = compilationInfo.getWorkgroupSizeVals(); + setTranslationInfo(entryPointFn, compilationInfo.getTranslationInfo(), + workgroupSize); + setLoweringConfig(computeOp, compilationInfo.getLoweringConfig()); + eraseCompilationInfo(computeOp); return success(); } static LogicalResult setRootConfig(FuncOp entryPointFn, Operation *computeOp) { - if (IREE::HAL::LoweringConfig config = getLoweringConfig(computeOp)) { + if (IREE::Codegen::CompilationInfoAttr compilationInfo = + getCompilationInfo(computeOp)) { // If the op already has a lowering config coming from the IR use this and // bypass the heuristic. - return setUserConfig(entryPointFn, computeOp, config); + return setUserConfig(entryPointFn, computeOp, compilationInfo); } if (auto linalgOp = dyn_cast(computeOp)) { if (linalg::isaContractionOpInterface(linalgOp) && @@ -294,8 +285,9 @@ LogicalResult initGPULaunchConfig(ModuleOp moduleOp) { // anything. Without any compute ops, this shouldnt be using tile and // distribute. setTranslationInfo( - funcOp, IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute, - workgroupSize, workloadPerWorkgroup); + funcOp, + IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute, + workloadPerWorkgroup, workgroupSize); continue; } @@ -330,8 +322,9 @@ LogicalResult initGPULaunchConfig(ModuleOp moduleOp) { // anything. Without any compute ops, this shouldnt be using tile and // distribute. setTranslationInfo( - funcOp, IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute, - {1, 1, 1}, /*workloadPerWorkgroup=*/{}); + funcOp, + IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute, + /*workloadPerWorkgroup=*/{}, {1, 1, 1}); continue; } if (failed(setRootConfig(funcOp, rootOperation))) continue; @@ -342,7 +335,7 @@ LogicalResult initGPULaunchConfig(ModuleOp moduleOp) { // and distributed. The rest of the compilation must be structured to either // use `TileAndFuse` or they are independent configurations that are // determined based on the op. - IREE::HAL::LoweringConfig config = getLoweringConfig(rootOperation); + IREE::Codegen::LoweringConfigAttr config = getLoweringConfig(rootOperation); for (auto op : computeOps) { if (op == rootOperation) continue; setLoweringConfig(op, config); diff --git a/iree/compiler/Codegen/LLVMGPU/KernelConfig.h b/iree/compiler/Codegen/LLVMGPU/KernelConfig.h index 2717b90b7eb0..d085245cf93b 100644 --- a/iree/compiler/Codegen/LLVMGPU/KernelConfig.h +++ b/iree/compiler/Codegen/LLVMGPU/KernelConfig.h @@ -7,7 +7,6 @@ #ifndef IREE_COMPILER_CODEGEN_LLVMGPU_KERNELCONFIG_H_ #define IREE_COMPILER_CODEGEN_LLVMGPU_KERNELCONFIG_H_ -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "mlir/IR/BuiltinOps.h" namespace mlir { diff --git a/iree/compiler/Codegen/LLVMGPU/LLVMGPUDistributeSharedMemoryCopy.cpp b/iree/compiler/Codegen/LLVMGPU/LLVMGPUDistributeSharedMemoryCopy.cpp index a07d472f342c..5dfd252dcb19 100644 --- a/iree/compiler/Codegen/LLVMGPU/LLVMGPUDistributeSharedMemoryCopy.cpp +++ b/iree/compiler/Codegen/LLVMGPU/LLVMGPUDistributeSharedMemoryCopy.cpp @@ -7,6 +7,7 @@ #include #include +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/LLVMGPU/LLVMGPUUtils.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" diff --git a/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp b/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp index 371d33c3ee4d..5ef123937d43 100644 --- a/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp +++ b/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp @@ -4,10 +4,11 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/LLVMGPU/KernelConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtDialect.h" @@ -32,9 +33,9 @@ class LLVMGPULowerExecutableTargetPass LLVMGPULowerExecutableTargetPass> { public: void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); } LLVMGPULowerExecutableTargetPass() = default; @@ -72,16 +73,15 @@ void LLVMGPULowerExecutableTargetPass::runOnOperation() { // is fine. llvm::StringMap entryPoints = getAllEntryPoints(moduleOp); - Optional passPipeline; + Optional passPipeline; for (auto &it : entryPoints) { auto entryPointOp = it.second; - if (IREE::HAL::TranslationInfo translationInfo = + if (IREE::Codegen::TranslationInfoAttr translationInfo = getTranslationInfo(entryPointOp)) { - Optional currPipeline = - getLoweringPassPipeline(translationInfo); - if (!currPipeline) continue; + IREE::Codegen::DispatchLoweringPassPipeline currPipeline = + translationInfo.getDispatchLoweringPassPipeline(); if (passPipeline) { - if (currPipeline.getValue() != passPipeline.getValue()) { + if (currPipeline != passPipeline.getValue()) { moduleOp.emitError( "unhandled compilation of entry point function with different " "pass pipelines within a module"); @@ -98,13 +98,13 @@ void LLVMGPULowerExecutableTargetPass::runOnOperation() { if (!testLoweringConfiguration && passPipeline.hasValue()) { OpPassManager &nestedModulePM = executableLoweringPipeline.nest(); switch (*passPipeline) { - case IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute: + case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute: addGPUSimpleDistributePassPipeline(nestedModulePM); break; - case IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUVectorize: + case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUVectorize: addGPUVectorizationPassPipeline(nestedModulePM); break; - case IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt: + case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt: addGPUMatmulSimtPassPipeline(nestedModulePM); break; default: diff --git a/iree/compiler/Codegen/LLVMGPU/LLVMGPURemoveTrivialLoops.cpp b/iree/compiler/Codegen/LLVMGPU/LLVMGPURemoveTrivialLoops.cpp index 383b74dd697e..df0e622bcaa1 100644 --- a/iree/compiler/Codegen/LLVMGPU/LLVMGPURemoveTrivialLoops.cpp +++ b/iree/compiler/Codegen/LLVMGPU/LLVMGPURemoveTrivialLoops.cpp @@ -4,6 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/LLVMGPU/LLVMGPUUtils.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" @@ -77,11 +78,8 @@ static SmallVector getNumWorkgroup( auto translationInfo = getTranslationInfo(entryPointOp); if (!translationInfo) return SmallVector(); - ArrayAttr workloadPerWorkgroupAttr = translationInfo.workloadPerWorkgroup(); - if (!workloadPerWorkgroupAttr) return SmallVector(); - auto workloadPerWorkgroup = llvm::to_vector<4>(llvm::map_range( - workloadPerWorkgroupAttr, - [](Attribute attr) { return attr.cast().getInt(); })); + SmallVector workloadPerWorkgroup = + translationInfo.getWorkloadPerWorkgroupVals(); if (workloadSize.size() != workloadPerWorkgroup.size()) return SmallVector(); SmallVector numWorkgroups; diff --git a/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp b/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp index 4409c334ab70..2a701f8d165c 100644 --- a/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp +++ b/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp @@ -4,14 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/LLVMGPU/KernelConfig.h" #include "iree/compiler/Codegen/LLVMGPU/LLVMGPUUtils.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/Transforms/Transforms.h" #include "iree/compiler/Codegen/Utils/MarkerUtils.h" -#include "iree/compiler/Codegen/Utils/Utils.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h" #include "iree/compiler/Dialect/LinalgExt/Transforms/Transforms.h" #include "iree/compiler/Dialect/Util/IR/UtilOps.h" @@ -38,19 +37,14 @@ static void populateTilingReductionPatterns( auto tileSizesFn = [&](OpBuilder &builder, Operation *op) -> SmallVector { SmallVector partitionedLoops = getPartitionedLoops(op); - SmallVector tileSizes = getTileSizes(op, 0); - Location loc = op->getLoc(); - auto tileSizesVal = - llvm::to_vector<4>(llvm::map_range(tileSizes, [&](int64_t v) -> Value { - return builder.create(loc, v); - })); - auto zero = builder.create(loc, 0); + SmallVector tileSizes = getTileSizes(builder, op, 0); + auto zero = builder.create(op->getLoc(), 0); for (unsigned depth : partitionedLoops) { - if (depth < tileSizesVal.size()) { - tileSizesVal[depth] = zero; + if (depth < tileSizes.size()) { + tileSizes[depth] = zero; } } - return tileSizesVal; + return tileSizes; }; auto tilingOptions = linalg::LinalgTilingOptions() @@ -69,8 +63,8 @@ static void populateTilingReductionPatterns( /// Patterns for thread level tiling. static void populateTilingToInvocationPatterns( MLIRContext *context, OwningRewritePatternList &patterns, - SmallVector &workgroupSize, - SmallVector &workloadPerWorkgroup) { + SmallVectorImpl &workgroupSize, + SmallVectorImpl &workloadPerWorkgroup) { linalg::TileSizeComputationFunction getInnerTileSizeFn = [&](OpBuilder &builder, Operation *operation) { SmallVector tileSizesVal; @@ -95,7 +89,7 @@ static void populateTilingToInvocationPatterns( return tileSizesVal; }; - auto getThreadProcInfoFn = [workgroupSize]( + auto getThreadProcInfoFn = [&workgroupSize]( OpBuilder &builder, Location loc, ArrayRef parallelLoopRanges) { return getGPUThreadIdsAndCounts(builder, loc, parallelLoopRanges.size(), @@ -240,11 +234,8 @@ struct LLVMGPUTileAndDistributePass auto workgroupSize = llvm::to_vector<4>(llvm::map_range( getEntryPoint(funcOp).workgroup_size().getValue(), [&](Attribute attr) { return attr.cast().getInt(); })); - auto workloadPerWorkgroup = llvm::to_vector<4>(llvm::map_range( - getTranslationInfo(getEntryPoint(funcOp)) - .workloadPerWorkgroup() - .getValue(), - [&](Attribute attr) { return attr.cast().getInt(); })); + auto workloadPerWorkgroup = + getTranslationInfo(getEntryPoint(funcOp)).getWorkloadPerWorkgroupVals(); int64_t flatWorkgroupSize = workgroupSize[0] * workgroupSize[1] * workgroupSize[2]; diff --git a/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir b/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir index 2b7e6da2a411..e7aaf8d7a374 100644 --- a/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir +++ b/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir @@ -1,6 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-llvmgpu-tile-and-distribute))))' %s | IreeFileCheck %s -#config = {tileSizes = [[2, 256, 4]]} +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"LLVMGPUMatmulSimt", workload_per_wg = [256, 2]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> #map0 = affine_map<()[s0] -> (s0 * 2)> #map1 = affine_map<()[s0] -> (s0 * 256)> @@ -12,7 +13,7 @@ hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb { hal.executable.entry_point @dot_dispatch_0 attributes { interface = @legacy_io, ordinal = 0 : index, - translation.info = {passPipeline = "LLVMGPUMatmulSimt" : i32, workloadPerWorkgroup = [256, 2]}, + translation.info = #translation, workgroup_size = [64 : index, 1 : index, 1 : index]} builtin.module { builtin.func @dot_dispatch_0() { @@ -86,14 +87,15 @@ hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb { // ----- -#config = {tileSizes = [[]]} +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"LLVMGPUVectorize", workload_per_wg = []> // Pure reducion case, skip tiling. hal.executable @reduction_dispatch { hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> { hal.executable.entry_point @predict_dispatch_153 attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = "LLVMGPUVectorize" : i32}, + translation.info = #translation, workgroup_size = [1: index, 1: index, 1: index]} builtin.module { builtin.func @predict_dispatch_153() { @@ -120,7 +122,7 @@ hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvpt } } } -// CHECK: #[[CONFIG:.+]] = {tileSizes = {{\[}}[]{{\]}}} +// CHECK: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK: hal.executable public @reduction_dispatch // CHECK: linalg.fill // CHECK-SAME: lowering.config = #[[CONFIG]] diff --git a/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir b/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir index b67847a345eb..8690542645b8 100644 --- a/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir +++ b/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir @@ -33,11 +33,11 @@ hal.executable @add_dispatch_0 { } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[256]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 256)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUVectorize", workload_per_wg = [256]> // CHECK: hal.executable.entry_point public @add_dispatch_0 -// CHECK-SAME: passPipeline = "LLVMGPUVectorize" -// CHECK-SAME: workloadPerWorkgroup = [256] +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [64 : index, 1 : index, 1 : index] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index @@ -92,12 +92,12 @@ hal.executable private @dot_dispatch_1 { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[4, 2, 4]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUMatmulSimt", workload_per_wg = [2, 4]> // CHECK: hal.executable.entry_point public @dot_dispatch_1 -// CHECK-SAME: passPipeline = "LLVMGPUMatmulSimt" -// CHECK-SAME: workloadPerWorkgroup = [2, 4] +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [2 : index, 4 : index, 1 : index] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index @@ -143,18 +143,18 @@ hal.executable @reduction_dispatch { } } -// CHECK-DAG: #[[CONFIG0:.+]] = {passPipeline = "LLVMGPUDistribute"} -// CHECK-DAG: #[[CONFIG1:.+]] = {tileSizes = {{\[}}[]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = []> // CHECK: hal.executable.entry_point public @predict_dispatch_153 -// CHECK-SAME: translation.info = #[[CONFIG0]] +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [1 : index, 1 : index, 1 : index] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK: hal.return %[[C1]], %[[C1]], %[[C1]] // CHECK: linalg.fill -// CHECK-SAME: lowering.config = #[[CONFIG1]] +// CHECK-SAME: lowering.config = #[[CONFIG]] // CHECK: linalg.generic -// CHECK-SAME: lowering.config = #[[CONFIG1]] +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -199,9 +199,10 @@ hal.executable @tensor_insert { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 128)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 128)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [128, 1]> // CHECK: hal.executable.entry_point public @tensor_insert_slice -// CHECK-SAME: translation.info = {passPipeline = "LLVMGPUDistribute", workloadPerWorkgroup = [128, 1]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: %[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index @@ -246,10 +247,11 @@ hal.executable @tensor_insert { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[1, 256]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 256)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUVectorize", workload_per_wg = [256, 1]> // CHECK: hal.executable.entry_point public @tensor_insert_slice -// CHECK-SAME: translation.info = {passPipeline = "LLVMGPUVectorize", workloadPerWorkgroup = [256, 1]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: %[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index @@ -286,11 +288,11 @@ hal.executable private @static_1d_fft_stage2 { } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [4]> // CHECK: hal.executable.entry_point public @static_1d_fft_stage2 -// CHECK-SAME: translation.info = {passPipeline = "LLVMGPUDistribute" -// CHECK-SAME: workloadPerWorkgroup = [4]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [32 : index, 1 : index, 1 : index] // CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %{{.+}}: index, %{{.+}}: index): // CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index @@ -351,11 +353,11 @@ hal.executable private @static_3d_fft_stage3 { } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[1, 1, 8]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [8, 1, 1]> // CHECK: hal.executable.entry_point public @static_3d_fft_stage3 -// CHECK-SAME: translation.info = {passPipeline = "LLVMGPUDistribute" -// CHECK-SAME: workloadPerWorkgroup = [8, 1, 1]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [32 : index, 1 : index, 1 : index] // CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index): // CHECK-NEXT: %[[T:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] @@ -367,6 +369,10 @@ hal.executable private @static_3d_fft_stage3 { // ----- +#compilation = #iree_codegen.compilation.info< + #iree_codegen.lowering.config, + #iree_codegen.translation.info<"LLVMGPUMatmulSimt", workload_per_wg = [256, 32]>, + workgroup_size = [16, 8, 1]> hal.executable @user_config { hal.executable.variant public @cuda_nvptx_fb, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> { hal.executable.entry_point public @_lowering_config_test_dispatch_1 attributes {interface = @io, ordinal = 0 : index} @@ -401,7 +407,7 @@ hal.executable.variant public @cuda_nvptx_fb, target = #hal.executable.target<"c %14 = affine.min affine_map<(d0)[s0] -> (-d0 + 1024, s0)>(%arg1)[%workgroup_size_x] %15 = linalg.init_tensor [%13, %14] : tensor %16 = linalg.fill(%cst, %15) : f32, tensor -> tensor - %17 = linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = {passPipeline = "LLVMGPUMatmulSimt", tileSizes = [[32, 256, 64]], workgroupSize = [16, 8, 1]}} ins(%8, %10 : tensor, tensor<256x?xf32>) outs(%16 : tensor) -> tensor + %17 = linalg.matmul {__internal_linalg_transform__ = "workgroup", compilation.info = #compilation} ins(%8, %10 : tensor, tensor<256x?xf32>) outs(%16 : tensor) -> tensor flow.dispatch.tensor.store %17, %2, offsets = [%arg0, %arg1], sizes = [%11, %12], strides = [1, 1] : tensor -> !flow.dispatch.tensor } } @@ -416,10 +422,10 @@ hal.executable.variant public @cuda_nvptx_fb, target = #hal.executable.target<"c } } -// CHECK-DAG: #[[CONFIG:.+]] = {{{.*}}tileSizes = {{\[}}[32, 256, 64]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK: hal.executable.entry_point public @_lowering_config_test_dispatch_1 -// CHECK-SAME: passPipeline = "LLVMGPUMatmulSimt" -// CHECK-SAME: workloadPerWorkgroup = [256, 32] +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [16 : index, 8 : index, 1 : index] // CHECK: func @_lowering_config_test_dispatch_1 // CHECK: linalg.fill diff --git a/iree/compiler/Codegen/LLVMGPU/test/remove_loops.mlir b/iree/compiler/Codegen/LLVMGPU/test/remove_loops.mlir index 46b6931e93c2..bed9ee396197 100644 --- a/iree/compiler/Codegen/LLVMGPU/test/remove_loops.mlir +++ b/iree/compiler/Codegen/LLVMGPU/test/remove_loops.mlir @@ -42,12 +42,13 @@ hal.executable private @dispatch_0 { // ----- // CHECK-LABEL: func @workgroup_tile_loop() +#translation = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [32]> hal.executable private @workgroup_tile_loop { hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> { hal.executable.entry_point @workgroup_tile_loop attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = "LLVMGPUDistribute", workloadPerWorkgroup = [32]} + translation.info = #translation } builtin.module { builtin.func @workgroup_tile_loop() { @@ -71,12 +72,13 @@ hal.executable private @workgroup_tile_loop { // ----- // CHECK-LABEL: func @workgroup_tile_loop_negative() +#translation = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [16]> hal.executable private @workgroup_tile_loop_negative { hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> { hal.executable.entry_point @workgroup_tile_loop_negative attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = "LLVMGPUDistribute", workloadPerWorkgroup = [16]} + translation.info = #translation } builtin.module { builtin.func @workgroup_tile_loop_negative() { diff --git a/iree/compiler/Codegen/Passes.h b/iree/compiler/Codegen/Passes.h index b513b99e292a..d550517c3a32 100644 --- a/iree/compiler/Codegen/Passes.h +++ b/iree/compiler/Codegen/Passes.h @@ -10,7 +10,6 @@ #include #include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassOptions.h" @@ -235,10 +234,6 @@ std::unique_ptr> createLLVMGPUPipeliningPass(); /// distribution to threads without vectorization. void addSPIRVTileAndDistributePassPipeline(OpPassManager &pm); -/// Pass pipeline to lower IREE HAL executables that contain Linalg ops that are -/// not tiled/distributed. Performs distribution to global invocations. -void addSPIRVDistributeToGlobalIDPassPipeline(OpPassManager &pm); - /// Pass pipeline to lower IREE HAL executables with workgroup tiled and /// distributed Linalg ops to SPIR-V scalar and vector code. Additionally /// performs distribution to threads with vectorization. @@ -256,9 +251,6 @@ void addSPIRVTileAndVectorizeToCooperativeOpsPassPipeline(OpPassManager &pm); /// corresponding SPIR-V ops. std::unique_ptr> createConvertToSPIRVPass(); -/// Pass to distribute Linalg ops with buffer semantics to global invocations. -std::unique_ptr> createSPIRVDistributeToGlobalIDPass(); - /// Creates a pass to fold processor ID uses where possible. std::unique_ptr> createSPIRVFoldProcessorIDUsesPass(); diff --git a/iree/compiler/Codegen/Passes.td b/iree/compiler/Codegen/Passes.td index eba35a657f71..83e376fb5898 100644 --- a/iree/compiler/Codegen/Passes.td +++ b/iree/compiler/Codegen/Passes.td @@ -206,15 +206,6 @@ def ConvertToSPIRV : Pass<"iree-convert-to-spirv", "ModuleOp"> { let constructor = "mlir::iree_compiler::createConvertToSPIRVPass()"; } -// TODO: Rename argument to be fully qualified. -def SPIRVDistributeToGlobalID : - Pass<"iree-spirv-distribute-to-global-id", "FuncOp"> { - let summary = "Distribute Linalg ops with buffer semantics to global " - "invocations"; - let constructor = - "mlir::iree_compiler::createSPIRVDistributeToGlobalIDPass()"; -} - // TODO: Rename argument to be fully qualified. def SPIRVFoldProcessorIDUses : Pass<"iree-spirv-fold-gpu-procid-uses", "FuncOp"> { diff --git a/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp b/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp index 38240a87e672..877cac71d713 100644 --- a/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp +++ b/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp @@ -13,7 +13,9 @@ #include #include "iree/compiler/Codegen/SPIRV/KernelConfig.h" +#include "llvm/ADT/TypeSwitch.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/IR/BuiltinOps.h" namespace mlir { namespace iree_compiler { diff --git a/iree/compiler/Codegen/SPIRV/BUILD b/iree/compiler/Codegen/SPIRV/BUILD index a63331f7d242..c4dea5d497e2 100644 --- a/iree/compiler/Codegen/SPIRV/BUILD +++ b/iree/compiler/Codegen/SPIRV/BUILD @@ -20,7 +20,6 @@ cc_library( "NVIDIAConfig.cpp", "Passes.cpp", "SPIRVCopyToWorkgroupMemory.cpp", - "SPIRVDistributeToGlobalID.cpp", "SPIRVFoldGPUProcessorIDUses.cpp", "SPIRVLowerExecutableTargetPass.cpp", "SPIRVRemoveOneTripTiledLoops.cpp", @@ -39,6 +38,7 @@ cc_library( deps = [ "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/Transforms", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/Flow/IR", diff --git a/iree/compiler/Codegen/SPIRV/CMakeLists.txt b/iree/compiler/Codegen/SPIRV/CMakeLists.txt index f6bbb282df2e..0c0bd96a15a3 100644 --- a/iree/compiler/Codegen/SPIRV/CMakeLists.txt +++ b/iree/compiler/Codegen/SPIRV/CMakeLists.txt @@ -25,7 +25,6 @@ iree_cc_library( "NVIDIAConfig.cpp" "Passes.cpp" "SPIRVCopyToWorkgroupMemory.cpp" - "SPIRVDistributeToGlobalID.cpp" "SPIRVFoldGPUProcessorIDUses.cpp" "SPIRVLowerExecutableTargetPass.cpp" "SPIRVRemoveOneTripTiledLoops.cpp" @@ -71,6 +70,7 @@ iree_cc_library( MLIRVectorInterfaces MLIRVectorToSPIRV iree::compiler::Codegen::Common + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::Transforms iree::compiler::Codegen::Utils diff --git a/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp b/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp index a956c04c08c4..4f9253a2b317 100644 --- a/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp +++ b/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp @@ -15,11 +15,11 @@ #include +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/SPIRV/Utils.h" #include "iree/compiler/Codegen/Utils/MarkerUtils.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "iree/compiler/Dialect/Util/IR/UtilOps.h" #include "llvm/ADT/DenseMapInfo.h" diff --git a/iree/compiler/Codegen/SPIRV/KernelConfig.cpp b/iree/compiler/Codegen/SPIRV/KernelConfig.cpp index 612947b40c09..d2bb4e14d20a 100644 --- a/iree/compiler/Codegen/SPIRV/KernelConfig.cpp +++ b/iree/compiler/Codegen/SPIRV/KernelConfig.cpp @@ -6,11 +6,10 @@ #include "iree/compiler/Codegen/SPIRV/KernelConfig.h" +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/SPIRV/Utils.h" #include "iree/compiler/Codegen/Transforms/Transforms.h" #include "iree/compiler/Codegen/Utils/MarkerUtils.h" -#include "iree/compiler/Codegen/Utils/Utils.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" @@ -18,6 +17,7 @@ #include "mlir/Dialect/SPIRV/IR/SPIRVAttributes.h" #include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h" #include "mlir/Dialect/SPIRV/IR/TargetAndABI.h" +#include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Matchers.h" #define DEBUG_TYPE "iree-spirv-kernel-config" @@ -25,37 +25,6 @@ namespace mlir { namespace iree_compiler { -//===----------------------------------------------------------------------===// -// Utilities -//===----------------------------------------------------------------------===// - -/// Defines the workgroup count region on entry point ops for the -/// `SPIRVDistributeToGlobalID` pipeline. -// TODO(ravishankarm): Remove this when that pipeline is deprecated. -static LogicalResult setTranslationUsingDistributeToGlobalId( - FuncOp funcOp, ArrayRef workgroupSize) { - auto entryPointOp = getEntryPoint(funcOp); - MLIRContext *context = entryPointOp.getContext(); - auto translationInfo = buildTranslationInfo( - IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistributeToGlobalID, - /*workloadPerWorkgroup =*/{}, context); - setTranslationInfo(entryPointOp, translationInfo, workgroupSize); - OpBuilder builder(context); - int64_t workgroupSizeX = workgroupSize[0]; - auto numWorkgroupsFn = [workgroupSizeX](OpBuilder &b, Location loc, - std::array workload) { - AffineExpr e1, e2, e3; - bindSymbols(b.getContext(), e1, e2, e3); - AffineExpr expr = e1 * e2 * e3; - expr = expr.ceilDiv(workgroupSizeX); - Value numWorkgroupsX = linalg::applyMapToValues( - b, loc, AffineMap::get(0, 3, expr), workload)[0]; - Value one = b.create(loc, 1); - return std::array{numWorkgroupsX, one, one}; - }; - return defineWorkgroupCountRegion(builder, funcOp, numWorkgroupsFn); -} - //===----------------------------------------------------------------------===// // Convolution Default Configuration //===----------------------------------------------------------------------===// @@ -107,9 +76,9 @@ LogicalResult setConvOpConfig(linalg::LinalgOp linalgOp, int64_t residualThreads = subgroupSize; int64_t residualTilingFactor = bestTilingFactor; - SmallVector workgroupSize(3, 1); // (X, Y, Z) - SmallVector workgroupTileSizes(4, 0); // (N, OH, OW, OC) - SmallVector invocationTileSizes(4, 0); // (N, OH, OW, OC) + SmallVector workgroupSize(3, 1); // (X, Y, Z) + SmallVector workgroupTileSizes(4, 0); // (N, OH, OW, OC) + SmallVector invocationTileSizes(4, 0); // (N, OH, OW, OC) // Deduce the configuration for the OC dimension. for (int64_t x = residualThreads; x >= 2; x >>= 1) { @@ -181,7 +150,7 @@ LogicalResult setConvOpConfig(linalg::LinalgOp linalgOp, } } - auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize; + auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize; TileSizesListType tileSizes; tileSizes.push_back(workgroupTileSizes); tileSizes.push_back(invocationTileSizes); @@ -244,10 +213,10 @@ LogicalResult setMatmulOpConfig(linalg::LinalgOp op, int64_t residualThreads = bestX * bestY; int64_t residualTilingFactor = (bestThreadM + bestThreadK) * bestThreadN; - SmallVector workgroupSize(3, 1); // (X, Y, Z) - SmallVector workgroupTileSizes(2 + isBM, 0); // (B, M, N) - SmallVector invocationTileSizes(2 + isBM, 0); // (B, M, N) - SmallVector reductionTileSizes(3 + isBM, 0); // (B, M, N, K) + SmallVector workgroupSize(3, 1); // (X, Y, Z) + SmallVector workgroupTileSizes(2 + isBM, 0); // (B, M, N, K) + SmallVector invocationTileSizes(2 + isBM, 0); // (B, M, N, K) + SmallVector reductionTileSizes(3 + isBM, 0); // (B, M, N, K) if (isBM) workgroupTileSizes[0] = invocationTileSizes[0] = 1; @@ -302,7 +271,7 @@ LogicalResult setMatmulOpConfig(linalg::LinalgOp op, } if (reductionTileSizes[2 + isBM] == 0) return success(); - auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize; + auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize; TileSizesListType tileSizes; tileSizes.push_back(workgroupTileSizes); tileSizes.push_back(invocationTileSizes); @@ -321,13 +290,13 @@ LogicalResult setMatmulOpConfig(linalg::LinalgOp op, static LogicalResult setOpConfig(spirv::ResourceLimitsAttr limits, linalg_ext::FftOp op) { const int64_t subgroupSize = limits.subgroup_size().getValue().getSExtValue(); - auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistribute; + auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistribute; std::array workgroupSize = {subgroupSize, 1, 1}; auto partitionedLoops = getPartitionedLoops(op); unsigned loopDepth = partitionedLoops.back() + 1; - SmallVector workgroupTileSize(loopDepth, 0); + SmallVector workgroupTileSize(loopDepth, 0); // Tiling along partitioned loops with size 1. for (int64_t loopIndex : partitionedLoops) { @@ -357,7 +326,7 @@ static LogicalResult setDefaultOpConfig(spirv::ResourceLimitsAttr limits, Operation *op) { auto partitionedLoops = getPartitionedLoops(op); if (partitionedLoops.empty()) { - auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize; + auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize; std::array workgroupSize = {1, 1, 1}; auto funcOp = op->getParentOfType(); return setOpConfigAndEntryPointFnTranslation(funcOp, op, {}, {}, pipeline, @@ -367,7 +336,7 @@ static LogicalResult setDefaultOpConfig(spirv::ResourceLimitsAttr limits, const int64_t subgroupSize = limits.subgroup_size().getValue().getSExtValue(); int64_t numElementsPerWorkgroup = subgroupSize; int64_t numElementsPerThread = 1; - auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistribute; + auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistribute; // Returns true if the given `operand` has 32-bit element type. auto has32BitElementType = [](Value operand) { @@ -415,15 +384,15 @@ static LogicalResult setDefaultOpConfig(spirv::ResourceLimitsAttr limits, if (vectorize) { numElementsPerThread = numElementsPerWorkgroup / subgroupSize; - pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize; + pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize; } } std::array workgroupSize = {subgroupSize, 1, 1}; unsigned loopDepth = partitionedLoops.back() + 1; - SmallVector workgroupTileSize(loopDepth, 0); - SmallVector threadTileSize(loopDepth, 0); + SmallVector workgroupTileSize(loopDepth, 0); + SmallVector threadTileSize(loopDepth, 0); // Tiling along partitioned loops with size 1. for (int64_t loopIndex : partitionedLoops) { @@ -534,24 +503,7 @@ LogicalResult initSPIRVLaunchConfig(ModuleOp module) { return funcOp.emitOpError("failed to get compute ops"); } - int64_t subgroupSize = limits.subgroup_size().getValue().getSExtValue(); - - // If the dispatch region does not contain tiled and distributed Linalg ops, - // invoke the pipeline to distribute to global invocations. - if (tiledLoops.empty() && llvm::none_of(computeOps, [](Operation *op) { - return hasMarker(op, getWorkgroupMarker()); - })) { - std::array workgroupSize = {subgroupSize, 1, 1}; - if (failed( - setTranslationUsingDistributeToGlobalId(funcOp, workgroupSize))) { - return computeOps[0]->emitOpError( - "failed to set translation info for distributing to global IDs"); - } - continue; - } - Operation *rootOperation = nullptr; - // Try to find a configuration according to a matmul/convolution op and use // it as the root op. for (Operation *computeOp : computeOps) { @@ -597,8 +549,9 @@ LogicalResult initSPIRVLaunchConfig(ModuleOp module) { SmallVector workloadPerWorkgroup(tiledLoops.size(), 1); workloadPerWorkgroup.front() = subgroupSize * 4; setTranslationInfo( - funcOp, IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistribute, - workgroupSize, workloadPerWorkgroup); + funcOp, + IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistribute, + workloadPerWorkgroup, workgroupSize); return success(); } return funcOp.emitError("contains no root Linalg operation"); @@ -610,7 +563,7 @@ LogicalResult initSPIRVLaunchConfig(ModuleOp module) { // and distributed. The rest of the compilation must be structured to either // use `TileAndFuse` or they are independent configurations that are // determined based on the op. - IREE::HAL::LoweringConfig config = getLoweringConfig(rootOperation); + IREE::Codegen::LoweringConfigAttr config = getLoweringConfig(rootOperation); for (auto op : computeOps) { if (op == rootOperation) continue; setLoweringConfig(op, config); diff --git a/iree/compiler/Codegen/SPIRV/KernelConfig.h b/iree/compiler/Codegen/SPIRV/KernelConfig.h index c0d4f31c8f24..81858940884d 100644 --- a/iree/compiler/Codegen/SPIRV/KernelConfig.h +++ b/iree/compiler/Codegen/SPIRV/KernelConfig.h @@ -17,9 +17,9 @@ #include -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h" #include "mlir/Dialect/SPIRV/IR/TargetAndABI.h" +#include "mlir/IR/BuiltinOps.h" namespace mlir { namespace iree_compiler { diff --git a/iree/compiler/Codegen/SPIRV/MaliConfig.cpp b/iree/compiler/Codegen/SPIRV/MaliConfig.cpp index 1c19955f33ad..9577d43d1838 100644 --- a/iree/compiler/Codegen/SPIRV/MaliConfig.cpp +++ b/iree/compiler/Codegen/SPIRV/MaliConfig.cpp @@ -13,7 +13,9 @@ #include #include "iree/compiler/Codegen/SPIRV/KernelConfig.h" +#include "llvm/ADT/TypeSwitch.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/IR/BuiltinOps.h" namespace mlir { namespace iree_compiler { diff --git a/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp b/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp index 3d07b75e57a9..dc1e4470dce7 100644 --- a/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp +++ b/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp @@ -10,10 +10,12 @@ // //===----------------------------------------------------------------------===// +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/SPIRV/KernelConfig.h" #include "iree/compiler/Codegen/Utils/Utils.h" #include "llvm/Support/Debug.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/IR/BuiltinOps.h" #define DEBUG_TYPE "iree-spirv-nvidia-config" @@ -80,8 +82,8 @@ static LogicalResult setOpConfig(const spirv::TargetEnv &targetEnv, getElementType(init), lhsShape[0], rhsShape[1], lhsShape[1]); if (!coopMatSize) return success(); - auto pipeline = - IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorizeToCooperativeOps; + auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline:: + SPIRVVectorizeToCooperativeOps; // For now only support one subgroup per workgroup because in the above // configuration deduction step we only consider whether the input workload is diff --git a/iree/compiler/Codegen/SPIRV/Passes.cpp b/iree/compiler/Codegen/SPIRV/Passes.cpp index 02964f7e3d76..bf98d6796eaa 100644 --- a/iree/compiler/Codegen/SPIRV/Passes.cpp +++ b/iree/compiler/Codegen/SPIRV/Passes.cpp @@ -150,10 +150,6 @@ void addSPIRVTileAndDistributePassPipeline(OpPassManager &pm) { addLoopMaterializationPasses(pm); } -void addSPIRVDistributeToGlobalIDPassPipeline(OpPassManager &pm) { - pm.addNestedPass(createSPIRVDistributeToGlobalIDPass()); -} - void buildSPIRVCodegenPassPipeline(OpPassManager &pm) { addLinalgBufferizePasses(pm.nest(), gpuAllocationFunction); pm.addPass(createSPIRVLowerExecutableTargetPass()); diff --git a/iree/compiler/Codegen/SPIRV/SPIRVDistributeToGlobalID.cpp b/iree/compiler/Codegen/SPIRV/SPIRVDistributeToGlobalID.cpp deleted file mode 100644 index b8df3b46d159..000000000000 --- a/iree/compiler/Codegen/SPIRV/SPIRVDistributeToGlobalID.cpp +++ /dev/null @@ -1,225 +0,0 @@ -// Copyright 2020 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -//===- SPIRVDistributeToGlobalIDPass.cpp ----------------------------------===// -// -// This pass distributes Linalg ops with buffer semantics to global invocations. -// -//===----------------------------------------------------------------------===// - -#include -#include - -#include "iree/compiler/Codegen/PassDetail.h" -#include "iree/compiler/Codegen/Passes.h" -#include "iree/compiler/Codegen/SPIRV/Utils.h" -#include "iree/compiler/Codegen/Transforms/Transforms.h" -#include "iree/compiler/Codegen/Utils/MarkerUtils.h" -#include "iree/compiler/Codegen/Utils/Utils.h" -#include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "iree/compiler/Dialect/Shape/IR/ShapeDialect.h" -#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/GPU/GPUDialect.h" -#include "mlir/Dialect/Linalg/IR/LinalgOps.h" -#include "mlir/Dialect/Linalg/Transforms/Transforms.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/SCF/SCF.h" -#include "mlir/Dialect/SPIRV/IR/TargetAndABI.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/IR/AffineMap.h" -#include "mlir/IR/FunctionSupport.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/Support/LLVM.h" -#include "mlir/Transforms/DialectConversion.h" -#include "mlir/Transforms/LoopUtils.h" - -namespace mlir { -namespace iree_compiler { - -//===----------------------------------------------------------------------===// -// Loop utilities -//===----------------------------------------------------------------------===// - -/// Serializes the dimensions of the scf.parallel specified in -/// `serializedDimensions`, by creating an nested scf.for operation for each -/// dimension. -// TODO(ravishankarm): Move this into LoopUtils.h in MLIR. -static Operation *serializeDimensions(ConversionPatternRewriter &rewriter, - scf::ParallelOp pLoopOp, - ArrayRef serializedDimensions) { - assert(!serializedDimensions.empty() && - "unhandled corner case of no serializing dims"); - OpBuilder::InsertionGuard guard(rewriter); - DenseSet serializedDimSet; - serializedDimSet.insert(serializedDimensions.begin(), - serializedDimensions.end()); - assert(serializedDimSet.size() == serializedDimensions.size() && - "cannot repeat dimensions during serialization of scf.parallel"); - SmallVector newPLoopBounds, forBounds; - SmallVector permutation; - auto lbs = pLoopOp.lowerBound(); - auto ubs = pLoopOp.upperBound(); - auto steps = pLoopOp.step(); - for (unsigned i : llvm::seq(0, pLoopOp.getNumLoops())) { - if (serializedDimSet.count(i)) { - forBounds.push_back({lbs[i], ubs[i], steps[i]}); - } else { - newPLoopBounds.push_back({lbs[i], ubs[i], steps[i]}); - permutation.push_back(i); - } - } - permutation.append(serializedDimensions.begin(), serializedDimensions.end()); - return replacePLoopOp(rewriter, pLoopOp, newPLoopBounds, forBounds, - permutation); -} - -/// Serialize all inner dimensions of a `pLoopOp` starting from `serializeFrom`. -static Operation *serializeDimensionsFrom(ConversionPatternRewriter &rewriter, - scf::ParallelOp pLoopOp, - unsigned serializeFrom) { - unsigned numLoops = pLoopOp.getNumLoops(); - assert(serializeFrom < numLoops && - "unhandled corner case of no serialization"); - SmallVector serializedDimensions; - for (unsigned dim : llvm::seq(serializeFrom, numLoops)) - serializedDimensions.push_back(dim); - return serializeDimensions(rewriter, pLoopOp, serializedDimensions); -} - -//===----------------------------------------------------------------------===// -// GPU processor ID mapping utilities -//===----------------------------------------------------------------------===// - -/// Distributes scf.parallel to processors where `IdOp` is used to get the -/// processor ID and `DimOp` is used to get the number of processors along a -/// dimension. Assumes that the number of processors will be less than equal to -/// the number of iterations of the pLoopOp along all dimensions. -template -static LogicalResult distributeSingleIterationPerProcessor( - ConversionPatternRewriter &rewriter, scf::ParallelOp pLoopOp, - bool generateGuard = true) { - unsigned numLoops = pLoopOp.getNumLoops(); - if (numLoops > 3) { - pLoopOp = - cast(serializeDimensionsFrom(rewriter, pLoopOp, 3)); - numLoops = 3; - } - auto procInfo = getGPUProcessorIdsAndCounts( - rewriter, pLoopOp.getLoc(), numLoops); - return distributeSingleIterationPerProcessor(rewriter, pLoopOp, procInfo, - generateGuard); -} - -//===----------------------------------------------------------------------===// -// Pass and patterns. -//===----------------------------------------------------------------------===// - -namespace { -/// Pass to convert from tiled and fused linalg ops into gpu.func. -struct SPIRVDistributeToGlobalIDPass - : public SPIRVDistributeToGlobalIDBase { - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } - void runOnOperation() override; -}; - -/// Given the workload return the workgroup count along X obtained by -/// linearizing the workload and dividing by the workgroup size. -static Value getWorkgroupCountX(OpBuilder &builder, Location loc, - ArrayRef values, - int64_t workgroupSizeX) { - AffineExpr expr = builder.getAffineConstantExpr(1); - for (auto val : enumerate(values)) { - expr = expr * builder.getAffineSymbolExpr(val.index()); - } - expr = expr.ceilDiv(workgroupSizeX); - return linalg::applyMapToValues( - builder, loc, AffineMap::get(0, values.size(), expr), values)[0]; -} - -/// Map linalg operation to execute on GPU in parallel by mapping the parallel -/// loops to "GlobalInvocationId". -template -struct MapLinalgOpToGlobalInvocationId - : public OpConversionPattern { - MapLinalgOpToGlobalInvocationId(MLIRContext *context, - PatternBenefit benefit = 1) - : OpConversionPattern(context, benefit) {} - - LogicalResult matchAndRewrite( - LinalgOpTy linalgOp, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - // If marker exists do nothing. - if (hasMarker(linalgOp)) return failure(); - FuncOp funcOp = linalgOp->template getParentOfType(); - if (!funcOp) return failure(); - Optional loops = - linalg::linalgOpToParallelLoops(rewriter, linalgOp); - if (!loops) return failure(); - - if (!loops.getValue().empty()) { - scf::ParallelOp pLoopOp = dyn_cast(loops.getValue()[0]); - // If there are parallel loops partition them to threads using global - // invocation ID. - if (pLoopOp) { - pLoopOp = collapseParallelLoops(rewriter, pLoopOp); - if (!pLoopOp) return failure(); - if (failed(distributeSingleIterationPerProcessor( - rewriter, pLoopOp))) { - return rewriter.notifyMatchFailure( - linalgOp, "mapping to GlobalInvocationID failed"); - } - } - } - rewriter.eraseOp(linalgOp); - return success(); - } -}; - -} // namespace - -void SPIRVDistributeToGlobalIDPass::runOnOperation() { - FuncOp funcOp = getOperation(); - if (!isEntryPoint(funcOp)) return; - - MLIRContext *context = &getContext(); - ConversionTarget target(*context); - // After this pass Linalg and scf.parallel ops should be gone. - target.addIllegalOp(); - target.addIllegalDialect(); - // Reshape ops are treated legal since they just change the way the underlying - // buffer is viewed. These are legalized downstream. They become no ops when - // lowering to SPIR-V since the SPIR-V code uses linearized arrays. - target.addLegalOp(); - // Let the rest fall through. - target.markUnknownOpDynamicallyLegal([](Operation *) { return true; }); - - OwningRewritePatternList patterns(&getContext()); - - patterns.insert, - MapLinalgOpToGlobalInvocationId, - MapLinalgOpToGlobalInvocationId>(context); - FrozenRewritePatternSet frozenPatterns(std::move(patterns)); - - Region &body = funcOp.getBody(); - if (!llvm::hasSingleElement(body)) { - funcOp.emitError("unhandled dispatch function with multiple blocks"); - return signalPassFailure(); - } - if (failed(applyFullConversion(funcOp, target, frozenPatterns))) - return signalPassFailure(); -} - -std::unique_ptr> createSPIRVDistributeToGlobalIDPass() { - return std::make_unique(); -} - -} // namespace iree_compiler -} // namespace mlir diff --git a/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp b/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp index fbc151eb6054..49de3abea2c9 100644 --- a/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp +++ b/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp @@ -4,10 +4,11 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/SPIRV/KernelConfig.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtDialect.h" @@ -36,7 +37,8 @@ class SPIRVLowerExecutableTargetPass SPIRVLowerExecutableTargetPass(const SPIRVLowerExecutableTargetPass &pass) {} void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); @@ -71,16 +73,15 @@ void SPIRVLowerExecutableTargetPass::runOnOperation() { // is fine. llvm::StringMap entryPoints = getAllEntryPoints(moduleOp); - Optional passPipeline; + Optional passPipeline; for (auto &it : entryPoints) { auto entryPointOp = it.second; - if (IREE::HAL::TranslationInfo translationInfo = + if (IREE::Codegen::TranslationInfoAttr translationInfo = getTranslationInfo(entryPointOp)) { - Optional currPipeline = - getLoweringPassPipeline(translationInfo); - if (!currPipeline) continue; + IREE::Codegen::DispatchLoweringPassPipeline currPipeline = + translationInfo.getDispatchLoweringPassPipeline(); if (passPipeline) { - if (currPipeline.getValue() != passPipeline.getValue()) { + if (currPipeline != passPipeline.getValue()) { moduleOp.emitError( "unhandled compilation of entry point function with different " "pass pipelines within a module"); @@ -97,16 +98,13 @@ void SPIRVLowerExecutableTargetPass::runOnOperation() { if (!testLoweringConfiguration && passPipeline.hasValue()) { OpPassManager &nestedModulePM = executableLoweringPipeline.nest(); switch (*passPipeline) { - case IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistribute: + case IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistribute: addSPIRVTileAndDistributePassPipeline(nestedModulePM); break; - case IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistributeToGlobalID: - addSPIRVDistributeToGlobalIDPassPipeline(nestedModulePM); - break; - case IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize: + case IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize: addSPIRVTileAndVectorizePassPipeline(nestedModulePM); break; - case IREE::HAL::DispatchLoweringPassPipeline:: + case IREE::Codegen::DispatchLoweringPassPipeline:: SPIRVVectorizeToCooperativeOps: addSPIRVTileAndVectorizeToCooperativeOpsPassPipeline(nestedModulePM); break; diff --git a/iree/compiler/Codegen/SPIRV/SPIRVRemoveOneTripTiledLoops.cpp b/iree/compiler/Codegen/SPIRV/SPIRVRemoveOneTripTiledLoops.cpp index 079f8953052d..21b7e62da190 100644 --- a/iree/compiler/Codegen/SPIRV/SPIRVRemoveOneTripTiledLoops.cpp +++ b/iree/compiler/Codegen/SPIRV/SPIRVRemoveOneTripTiledLoops.cpp @@ -4,6 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/Transforms/Transforms.h" @@ -112,11 +113,7 @@ class SPIRVRemoveOneTripTiledLoopPass auto translationInfo = getTranslationInfo(entryPointOp); if (!translationInfo) return; - ArrayAttr workloadPerWorkgroupAttr = translationInfo.workloadPerWorkgroup(); - if (!workloadPerWorkgroupAttr) return; - auto workloadPerWorkgroup = llvm::to_vector<4>(llvm::map_range( - workloadPerWorkgroupAttr, - [](Attribute attr) { return attr.cast().getInt(); })); + auto workloadPerWorkgroup = translationInfo.getWorkloadPerWorkgroupVals(); MLIRContext *context = &getContext(); removeOneTripTiledLoops(context, funcOp, cast(rootOp[0]), diff --git a/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp b/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp index 07b3a62864c4..15dc4fe54ced 100644 --- a/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp +++ b/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/SPIRV/Utils.h" #include "iree/compiler/Codegen/Transforms/Transforms.h" #include "iree/compiler/Codegen/Utils/MarkerUtils.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h" #include "iree/compiler/Dialect/LinalgExt/Transforms/Transforms.h" #include "llvm/ADT/STLExtras.h" @@ -79,11 +79,7 @@ static void populateTilingToInvocationPatterns(MLIRContext *context, RewritePatternSet &patterns) { linalg::TileSizeComputationFunction getInnerTileSizeFn = [&](OpBuilder &builder, Operation *op) { - SmallVector tileSizes = getTileSizes(op, 1); - return llvm::to_vector<4>( - llvm::map_range(tileSizes, [&](int64_t v) -> Value { - return builder.create(op->getLoc(), v); - })); + return getTileSizes(builder, op, 1); }; auto getThreadProcInfoFn = [](OpBuilder &builder, Location loc, @@ -161,11 +157,7 @@ static void populateTilingReductionPatterns( MLIRContext *context, RewritePatternSet &patterns, linalg::LinalgTransformationFilter marker) { auto getTileSizeFn = [&](OpBuilder &builder, Operation *op) { - SmallVector tileSizes = getTileSizes(op, 2); - return llvm::to_vector<4>( - llvm::map_range(tileSizes, [&](int64_t v) -> Value { - return builder.create(op->getLoc(), v); - })); + return getTileSizes(builder, op, 2); }; auto tilingOptions = linalg::LinalgTilingOptions() diff --git a/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp b/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp index dc611dec4cdc..3a3c10b5696c 100644 --- a/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp +++ b/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp @@ -13,6 +13,7 @@ #include +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/SPIRV/KernelConfig.h" @@ -20,7 +21,6 @@ #include "iree/compiler/Codegen/Transforms/Transforms.h" #include "iree/compiler/Codegen/Utils/MarkerUtils.h" #include "iree/compiler/Codegen/Utils/Utils.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "llvm/Support/Debug.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/GPU/GPUDialect.h" diff --git a/iree/compiler/Codegen/SPIRV/test/BUILD b/iree/compiler/Codegen/SPIRV/test/BUILD index f3c95cb5c061..c57ce23e6644 100644 --- a/iree/compiler/Codegen/SPIRV/test/BUILD +++ b/iree/compiler/Codegen/SPIRV/test/BUILD @@ -28,7 +28,6 @@ iree_lit_test_suite( "config_mali_matmul.mlir", "config_nvidia_matmul_cooperative_ops.mlir", "convert_to_spirv.mlir", - "distribute_to_global_id.mlir", "fold_gpu_procid_uses.mlir", "pipeline_matmul_cooperative_ops.mlir", "pipeline_matmul_vectorization.mlir", diff --git a/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt b/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt index c3aefd1eb29b..6fbb76274e56 100644 --- a/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt +++ b/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt @@ -23,7 +23,6 @@ iree_lit_test_suite( "config_mali_matmul.mlir" "config_nvidia_matmul_cooperative_ops.mlir" "convert_to_spirv.mlir" - "distribute_to_global_id.mlir" "fold_gpu_procid_uses.mlir" "pipeline_matmul_cooperative_ops.mlir" "pipeline_matmul_vectorization.mlir" diff --git a/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir b/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir index aadabbccff45..c1781560ce43 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s // Conv - large OC - distribute to only one workgroup dimension. @@ -74,18 +74,20 @@ hal.executable @conv_112x112x512 { } } -// CHECK-LABEL: hal.executable.entry_point public @conv_112x112x512 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [256, 8, 1]} -// CHECK-SAME: workgroup_size = [64 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C2:.+]] = arith.constant 2 : index -// CHECK-NEXT: %[[C14:.+]] = arith.constant 14 : index -// CHECK-NEXT: %[[C112:.+]] = arith.constant 112 : index -// CHECK-NEXT: hal.return %[[C2]], %[[C14]], %[[C112]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [256, 8, 1]> +// CHECK: hal.executable.entry_point public @conv_112x112x512 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [64 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C2:.+]] = arith.constant 2 : index +// CHECK-NEXT: %[[C14:.+]] = arith.constant 14 : index +// CHECK-NEXT: %[[C112:.+]] = arith.constant 112 : index +// CHECK-NEXT: hal.return %[[C2]], %[[C14]], %[[C112]] -// CHECK: func @conv_112x112x512() -// CHECK: linalg.conv_2d_nhwc_hwcf -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 1, 8, 256], [0, 1, 8, 4], [0, 0, 0, 0, 1, 1, 4]]} +// CHECK: func @conv_112x112x512() +// CHECK: linalg.conv_2d_nhwc_hwcf +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -163,18 +165,20 @@ hal.executable @conv_112x112x32 { } } -// CHECK-LABEL: hal.executable.entry_point public @conv_112x112x32 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 16, 4]} -// CHECK-SAME: workgroup_size = [8 : index, 8 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[C7:.+]] = arith.constant 7 : index -// CHECK-NEXT: %[[C28:.+]] = arith.constant 28 : index -// CHECK-NEXT: hal.return %[[C1]], %[[C7]], %[[C28]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 16, 4]> +// CHECK: hal.executable.entry_point public @conv_112x112x32 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 8 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[C7:.+]] = arith.constant 7 : index +// CHECK-NEXT: %[[C28:.+]] = arith.constant 28 : index +// CHECK-NEXT: hal.return %[[C1]], %[[C7]], %[[C28]] -// CHECK: func @conv_112x112x32() -// CHECK: linalg.conv_2d_nhwc_hwcf -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 4, 16, 32], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4]]} +// CHECK: func @conv_112x112x32() +// CHECK: linalg.conv_2d_nhwc_hwcf +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -251,17 +255,19 @@ hal.executable @conv_16x16x16 { } } -// CHECK-LABEL: hal.executable.entry_point public @conv_16x16x16 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 8, 8]} -// CHECK-SAME: workgroup_size = [4 : index, 4 : index, 4 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[C2:.+]] = arith.constant 2 : index -// CHECK-NEXT: hal.return %[[C1]], %[[C2]], %[[C2]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 8, 8]> +// CHECK: hal.executable.entry_point public @conv_16x16x16 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [4 : index, 4 : index, 4 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[C2:.+]] = arith.constant 2 : index +// CHECK-NEXT: hal.return %[[C1]], %[[C2]], %[[C2]] -// CHECK: func @conv_16x16x16() -// CHECK: linalg.conv_2d_nhwc_hwcf -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 8, 8, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4]]} +// CHECK: func @conv_16x16x16() +// CHECK: linalg.conv_2d_nhwc_hwcf +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -340,17 +346,19 @@ hal.executable @dwconv_28x28x144 { } } -// CHECK-LABEL: hal.executable.entry_point public @dwconv_28x28x144 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]} -// CHECK-SAME: workgroup_size = [4 : index, 4 : index, 4 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C9:.+]] = arith.constant 9 : index -// CHECK-NEXT: %[[C7:.+]] = arith.constant 7 : index -// CHECK-NEXT: hal.return %[[C9]], %[[C7]], %[[C7]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]> +// CHECK: hal.executable.entry_point public @dwconv_28x28x144 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [4 : index, 4 : index, 4 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C9:.+]] = arith.constant 9 : index +// CHECK-NEXT: %[[C7:.+]] = arith.constant 7 : index +// CHECK-NEXT: hal.return %[[C9]], %[[C7]], %[[C7]] -// CHECK: func @dwconv_28x28x144() -// CHECK: linalg.depthwise_conv2D_nhw -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 4, 4, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]]} +// CHECK: func @dwconv_28x28x144() +// CHECK: linalg.depthwise_conv2D_nhw +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -428,14 +436,15 @@ hal.executable @dwconv_4x4x8 { } } } +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 4, 4]> +// CHECK: hal.executable.entry_point public @dwconv_4x4x8 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [2 : index, 4 : index, 4 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: hal.return %[[C1]], %[[C1]], %[[C1]] -// CHECK-LABEL: hal.executable.entry_point public @dwconv_4x4x8 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 4, 4]} -// CHECK-SAME: workgroup_size = [2 : index, 4 : index, 4 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: hal.return %[[C1]], %[[C1]], %[[C1]] - -// CHECK: func @dwconv_4x4x8() -// CHECK: linalg.depthwise_conv2D_nhw -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 4, 4, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]]} +// CHECK: func @dwconv_4x4x8() +// CHECK: linalg.depthwise_conv2D_nhw +// CHECK-SAME: lowering.config = #[[CONFIG]] diff --git a/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir index 8412a2808af6..251f21fcf408 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s // Large matmul that can match the best tiling scheme. @@ -62,18 +62,22 @@ hal.executable @matmul_1024x2048x512 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_1024x2048x512 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [128, 32]} -// CHECK-SAME: workgroup_size = [32 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 128)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_1024x2048x512() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[32, 128], [16, 4], [0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 128) +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [128, 32]> +// CHECK: hal.executable.entry_point public @matmul_1024x2048x512 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [32 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_1024x2048x512() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -139,18 +143,22 @@ hal.executable @matmul_3136x24x96 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_3136x24x96 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 448]} -// CHECK-SAME: workgroup_size = [2 : index, 32 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 448)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_3136x24x96() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[448, 8], [14, 4], [0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 448)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 448]> +// CHECK: hal.executable.entry_point public @matmul_3136x24x96 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [2 : index, 32 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_3136x24x96() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -216,18 +224,22 @@ hal.executable @matmul_196x64x192 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_196x64x192 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 28]} -// CHECK-SAME: workgroup_size = [16 : index, 4 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 64)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 28)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_196x64x192() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[28, 64], [7, 4], [0, 0, 8]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 28)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 28]> +// CHECK: hal.executable.entry_point public @matmul_196x64x192 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [16 : index, 4 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_196x64x192() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -288,18 +300,22 @@ hal.executable @matmul_12544x96x16 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_12544x96x16 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 128]} -// CHECK-SAME: workgroup_size = [8 : index, 8 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 128)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_12544x96x16() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[128, 32], [16, 4], [0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 128)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 128]> +// CHECK: hal.executable.entry_point public @matmul_12544x96x16 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 8 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_12544x96x16() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -365,18 +381,22 @@ hal.executable @matmul_49x160x576 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_49x160x576 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 7]} -// CHECK-SAME: workgroup_size = [8 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 7)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_49x160x576() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[7, 32], [7, 4], [0, 0, 8]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 7)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 7]> +// CHECK: hal.executable.entry_point public @matmul_49x160x576 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_49x160x576() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -453,17 +473,21 @@ hal.executable @batch_matmul_4x384x384 { } } -// CHECK-LABEL: hal.executable.entry_point public @batch_matmul_4x384x384 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [128, 32, 1]} -// CHECK-SAME: workgroup_size = [32 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 128)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] - -// CHECK: func @batch_matmul_4x384x384() -// CHECK: linalg.batch_matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 32, 128], [1, 16, 4], [0, 0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 128)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [128, 32, 1]> +// CHECK: hal.executable.entry_point public @batch_matmul_4x384x384 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [32 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] + +// CHECK: func @batch_matmul_4x384x384() +// CHECK: linalg.batch_matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -540,14 +564,17 @@ hal.executable @batch_matmul_4x8x8 { } } -// CHECK-LABEL: hal.executable.entry_point public @batch_matmul_4x8x8 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 8, 1]} -// CHECK-SAME: workgroup_size = [2 : index, 8 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] - -// CHECK: func @batch_matmul_4x8x8() -// CHECK: linalg.batch_matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 8, 8], [1, 1, 4], [0, 0, 0, 16]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 8, 1]> +// CHECK: hal.executable.entry_point public @batch_matmul_4x8x8 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [2 : index, 8 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] + +// CHECK: func @batch_matmul_4x8x8() +// CHECK: linalg.batch_matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] \ No newline at end of file diff --git a/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir index 0b584bfc1da5..10dc64f3c81d 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s // Odd K that forbids vectorization. @@ -74,16 +74,19 @@ hal.executable @batch_matmul_1x3x32 { } } -// CHECK-LABEL: hal.executable.entry_point public @batch_matmul_1x3x32 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [4, 1, 1]} -// CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%[[X]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y]], %[[Z]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [4, 1, 1]> +// CHECK: hal.executable.entry_point public @batch_matmul_1x3x32 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y]], %[[Z]] -// CHECK: func @batch_matmul_1x3x32() -// CHECK: linalg.batch_matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 1, 4], [1, 1, 1]]} +// CHECK: func @batch_matmul_1x3x32() +// CHECK: linalg.batch_matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -149,14 +152,17 @@ hal.executable private @matmul_64x16 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_64x16 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [4, 1]} -// CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%[[X]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y]], %[[ONE]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [4, 1]> +// CHECK: hal.executable.entry_point public @matmul_64x16 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y]], %[[ONE]] -// CHECK: func @matmul_64x16() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 4], [1, 1]]} +// CHECK: func @matmul_64x16() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] diff --git a/iree/compiler/Codegen/SPIRV/test/config_linalg_ext_ops.mlir b/iree/compiler/Codegen/SPIRV/test/config_linalg_ext_ops.mlir index ae68820d73f0..57ea3d876000 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_linalg_ext_ops.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_linalg_ext_ops.mlir @@ -1,5 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s - +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s hal.executable private @static_1d_sort { hal.interface @io { hal.interface.binding @s0b0_rw_external, set=0, binding=0, type="StorageBuffer", access="Read|Write" @@ -34,8 +33,10 @@ hal.executable private @static_1d_sort { // Check that the workgroup count and size are (1, 1, 1) for serializing the computation. -// CHECK-LABEL: hal.executable.entry_point public @static_1d_sort -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize"} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = []> +// CHECK: hal.executable.entry_point public @static_1d_sort +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [1 : index, 1 : index, 1 : index] // CHECK-NEXT: ^{{.+}}(%{{.+}}: index, %{{.+}}: index, %{{.+}}: index): // CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index @@ -43,7 +44,7 @@ hal.executable private @static_1d_sort { // CHECK: func @static_1d_sort() // CHECK: linalg_ext.sort -// CHECK-SAME: lowering.config = {} +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -98,17 +99,20 @@ hal.executable private @static_3d_sort { } } -// CHECK-LABEL: hal.executable.entry_point public @static_3d_sort -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [16, 1]} -// CHECK-SAME: workgroup_size = [16 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[DIV:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%[[X]]] -// CHECK-NEXT: hal.return %[[DIV]], %[[Y]], %[[ONE]] - -// CHECK: func @static_3d_sort() -// CHECK: linalg_ext.sort -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 0, 16], [1, 0, 1]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [16, 1]> +// CHECK: hal.executable.entry_point public @static_3d_sort +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [16 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[DIV:.+]] = affine.apply #[[MAP]]()[%[[X]]] +// CHECK-NEXT: hal.return %[[DIV]], %[[Y]], %[[ONE]] + +// CHECK: func @static_3d_sort() +// CHECK: linalg_ext.sort +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -144,18 +148,20 @@ hal.executable private @static_1d_fft_stage2 { } } -// CHECK-LABEL: hal.executable.entry_point public @static_1d_fft_stage2 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute" -// CHECK-SAME: workloadPerWorkgroup = [4]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [4]> +// CHECK: hal.executable.entry_point public @static_1d_fft_stage2 +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [16 : index, 1 : index, 1 : index] // CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %{{.+}}: index, %{{.+}}: index): // CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[T:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%[[ARG0]]] +// CHECK-NEXT: %[[T:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] // CHECK-NEXT: hal.return %[[T]], %[[ONE]], %[[ONE]] // CHECK: func @static_1d_fft_stage2() // CHECK: linalg_ext.fft -// CHECK-SAME: lowering.config = {tileSizes = {{\[}}[4]]} +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -214,14 +220,16 @@ hal.executable private @static_3d_fft_stage3 { } -// CHECK-LABEL: hal.executable.entry_point public @static_3d_fft_stage3 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute" -// CHECK-SAME: workloadPerWorkgroup = [8, 1, 1]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [8, 1, 1]> +// CHECK: hal.executable.entry_point public @static_3d_fft_stage3 +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [16 : index, 1 : index, 1 : index] // CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index): -// CHECK-NEXT: %[[T:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[ARG0]]] +// CHECK-NEXT: %[[T:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] // CHECK-NEXT: hal.return %[[T]], %[[ARG1]], %[[ARG2]] // CHECK: func @static_3d_fft_stage3() // CHECK: linalg_ext.fft -// CHECK-SAME: lowering.config = {tileSizes = {{\[}}[1, 1, 8]]} +// CHECK-SAME: lowering.config = #[[CONFIG]] diff --git a/iree/compiler/Codegen/SPIRV/test/config_linalg_ops.mlir b/iree/compiler/Codegen/SPIRV/test/config_linalg_ops.mlir index 0cb6a62c3827..4ed74419e78d 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_linalg_ops.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_linalg_ops.mlir @@ -47,9 +47,10 @@ hal.executable @tensor_insert { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [64, 1]> // CHECK: hal.executable.entry_point public @tensor_insert_slice -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [64, 1]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: %[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index @@ -100,10 +101,11 @@ hal.executable @tensor_insert { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[1, 16], [1, 1]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [16, 1]> // CHECK: hal.executable.entry_point public @tensor_insert_slice -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [16, 1]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: %[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index diff --git a/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir b/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir index 02000f274de1..644ab49b27f7 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s // Conv - large OC - distribute to only one workgroup dimension. @@ -74,18 +74,20 @@ hal.executable @conv_112x112x512 { } } -// CHECK-LABEL: hal.executable.entry_point public @conv_112x112x512 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 4, 1]} -// CHECK-SAME: workgroup_size = [16 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C8:.+]] = arith.constant 8 : index -// CHECK-NEXT: %[[C28:.+]] = arith.constant 28 : index -// CHECK-NEXT: %[[C112:.+]] = arith.constant 112 : index -// CHECK-NEXT: hal.return %[[C8]], %[[C28]], %[[C112]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 4, 1]> +// CHECK: hal.executable.entry_point public @conv_112x112x512 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [16 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C8:.+]] = arith.constant 8 : index +// CHECK-NEXT: %[[C28:.+]] = arith.constant 28 : index +// CHECK-NEXT: %[[C112:.+]] = arith.constant 112 : index +// CHECK-NEXT: hal.return %[[C8]], %[[C28]], %[[C112]] -// CHECK: func @conv_112x112x512() -// CHECK: linalg.conv_2d_nhwc_hwcf -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 1, 4, 64], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4]]} +// CHECK: func @conv_112x112x512() +// CHECK: linalg.conv_2d_nhwc_hwcf +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -163,18 +165,20 @@ hal.executable @conv_112x112x32 { } } -// CHECK-LABEL: hal.executable.entry_point public @conv_112x112x32 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 8, 1]} -// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[C14:.+]] = arith.constant 14 : index -// CHECK-NEXT: %[[C112:.+]] = arith.constant 112 : index -// CHECK-NEXT: hal.return %[[C1]], %[[C14]], %[[C112]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 8, 1]> +// CHECK: hal.executable.entry_point public @conv_112x112x32 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[C14:.+]] = arith.constant 14 : index +// CHECK-NEXT: %[[C112:.+]] = arith.constant 112 : index +// CHECK-NEXT: hal.return %[[C1]], %[[C14]], %[[C112]] -// CHECK: func @conv_112x112x32() -// CHECK: linalg.conv_2d_nhwc_hwcf -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 1, 8, 32], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4]]} +// CHECK: func @conv_112x112x32() +// CHECK: linalg.conv_2d_nhwc_hwcf +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -251,17 +255,19 @@ hal.executable @conv_16x16x16 { } } -// CHECK-LABEL: hal.executable.entry_point public @conv_16x16x16 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]} -// CHECK-SAME: workgroup_size = [4 : index, 2 : index, 2 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[C4:.+]] = arith.constant 4 : index -// CHECK-NEXT: hal.return %[[C1]], %[[C4]], %[[C4]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]> +// CHECK: hal.executable.entry_point public @conv_16x16x16 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [4 : index, 2 : index, 2 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[C4:.+]] = arith.constant 4 : index +// CHECK-NEXT: hal.return %[[C1]], %[[C4]], %[[C4]] -// CHECK: func @conv_16x16x16() -// CHECK: linalg.conv_2d_nhwc_hwcf -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 4, 4, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4]]} +// CHECK: func @conv_16x16x16() +// CHECK: linalg.conv_2d_nhwc_hwcf +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -340,17 +346,19 @@ hal.executable @dwconv_28x28x144 { } } -// CHECK-LABEL: hal.executable.entry_point public @dwconv_28x28x144 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]} -// CHECK-SAME: workgroup_size = [4 : index, 2 : index, 2 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C9:.+]] = arith.constant 9 : index -// CHECK-NEXT: %[[C7:.+]] = arith.constant 7 : index -// CHECK-NEXT: hal.return %[[C9]], %[[C7]], %[[C7]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]> +// CHECK: hal.executable.entry_point public @dwconv_28x28x144 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [4 : index, 2 : index, 2 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C9:.+]] = arith.constant 9 : index +// CHECK-NEXT: %[[C7:.+]] = arith.constant 7 : index +// CHECK-NEXT: hal.return %[[C9]], %[[C7]], %[[C7]] -// CHECK: func @dwconv_28x28x144() -// CHECK: linalg.depthwise_conv2D_nhw -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 4, 4, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1]]} +// CHECK: func @dwconv_28x28x144() +// CHECK: linalg.depthwise_conv2D_nhw +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -430,14 +438,16 @@ hal.executable @dwconv_1x2x8 { } } -// CHECK-LABEL: hal.executable.entry_point public @dwconv_1x2x8 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 2, 1]} -// CHECK-SAME: workgroup_size = [2 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: hal.return %[[C1]], %[[C1]], %[[C1]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 2, 1]> +// CHECK: hal.executable.entry_point public @dwconv_1x2x8 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [2 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: hal.return %[[C1]], %[[C1]], %[[C1]] -// CHECK: func @dwconv_1x2x8() -// CHECK: linalg.depthwise_conv2D_nhw -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 1, 2, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]]} +// CHECK: func @dwconv_1x2x8() +// CHECK: linalg.depthwise_conv2D_nhw +// CHECK-SAME: lowering.config = #[[CONFIG]] diff --git a/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir index 4d72dc471823..fe03afa14a9f 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s // Large matmul that can match the best tiling scheme. @@ -62,18 +62,22 @@ hal.executable @matmul_1024x2048x512 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_1024x2048x512 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 8]} -// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_1024x2048x512() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[8, 32], [4, 4], [0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 8]> +// CHECK: hal.executable.entry_point public @matmul_1024x2048x512 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_1024x2048x512() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -139,18 +143,22 @@ hal.executable @matmul_3136x24x96 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_3136x24x96 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 32]} -// CHECK-SAME: workgroup_size = [2 : index, 8 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_3136x24x96() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[32, 8], [4, 4], [0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 32]> +// CHECK: hal.executable.entry_point public @matmul_3136x24x96 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [2 : index, 8 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_3136x24x96() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -216,18 +224,22 @@ hal.executable @matmul_196x64x192 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_196x64x192 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 4]} -// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_196x64x192() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[4, 32], [2, 4], [0, 0, 8]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4]> +// CHECK: hal.executable.entry_point public @matmul_196x64x192 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_196x64x192() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -288,18 +300,22 @@ hal.executable @matmul_12544x96x16 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_12544x96x16 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 8]} -// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_12544x96x16() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[8, 32], [4, 4], [0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 8]> +// CHECK: hal.executable.entry_point public @matmul_12544x96x16 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_12544x96x16() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -365,17 +381,20 @@ hal.executable @matmul_49x160x576 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_49x160x576 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 1]} -// CHECK-SAME: workgroup_size = [8 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y]], %[[ONE]] - -// CHECK: func @matmul_49x160x576() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 32], [1, 4], [0, 0, 8]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 1]> +// CHECK: hal.executable.entry_point public @matmul_49x160x576 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y]], %[[ONE]] + +// CHECK: func @matmul_49x160x576() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -452,17 +471,21 @@ hal.executable @batch_matmul_4x384x384 { } } -// CHECK-LABEL: hal.executable.entry_point public @batch_matmul_4x384x384 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 12, 1]} -// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 12)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] - -// CHECK: func @batch_matmul_4x384x384() -// CHECK: linalg.batch_matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 12, 32], [1, 6, 4], [0, 0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 12)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 12, 1]> +// CHECK: hal.executable.entry_point public @batch_matmul_4x384x384 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] + +// CHECK: func @batch_matmul_4x384x384() +// CHECK: linalg.batch_matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -540,14 +563,18 @@ hal.executable @batch_matmul_4x2x8 { } } -// CHECK-LABEL: hal.executable.entry_point public @batch_matmul_4x2x8 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 2, 1]} -// CHECK-SAME: workgroup_size = [2 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] - -// CHECK: func @batch_matmul_4x2x8() -// CHECK: linalg.batch_matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 2, 8], [1, 1, 4], [0, 0, 0, 8]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 2, 1]> +// CHECK: hal.executable.entry_point public @batch_matmul_4x2x8 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [2 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] + +// CHECK: func @batch_matmul_4x2x8() +// CHECK: linalg.batch_matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] diff --git a/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir b/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir index 3968d729923b..1097c3caaae3 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s #map0 = affine_map<()[s0, s1] -> (s0 * s1)> #map1 = affine_map<(d0)[s0] -> (s0, -d0 + 256)> @@ -100,18 +100,21 @@ hal.executable public @matmul_256x1024x128_div_sub { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_256x1024x128_div_sub -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorizeToCooperativeOps", workloadPerWorkgroup = [16, 16]} -// CHECK-SAME: workgroup_size = [32 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[C1]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorizeToCooperativeOps", workload_per_wg = [16, 16]> +// CHECK: hal.executable.entry_point public @matmul_256x1024x128_div_sub +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [32 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[C1]] -// CHECK: func @matmul_256x1024x128_div_sub() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[16, 16, 16], [16, 16, 16]]} +// CHECK: func @matmul_256x1024x128_div_sub() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -194,5 +197,6 @@ hal.executable public @matmul_256x1024x8 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_256x1024x8 -// CHECK-SAME: passPipeline = "SPIRVVectorize" +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize" +// CHECK: hal.executable.entry_point public @matmul_256x1024x8 +// CHECK-SAME: translation.info = #[[TRANSLATION]] diff --git a/iree/compiler/Codegen/SPIRV/test/distribute_to_global_id.mlir b/iree/compiler/Codegen/SPIRV/test/distribute_to_global_id.mlir deleted file mode 100644 index c7d9c882fd62..000000000000 --- a/iree/compiler/Codegen/SPIRV/test/distribute_to_global_id.mlir +++ /dev/null @@ -1,236 +0,0 @@ -// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-distribute-to-global-id))))' -canonicalize -cse %s | IreeFileCheck %s - -#map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -hal.executable private @parallel_4D { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> { - hal.executable.entry_point @parallel_4D attributes {interface = @io, ordinal = 0 : index} - builtin.module { - func @parallel_4D() { - %c0 = arith.constant 0 : index - %dim0 = hal.interface.load.constant offset = 0 : index - %dim1 = hal.interface.load.constant offset = 1 : index - %dim2 = hal.interface.load.constant offset = 2 : index - %dim3 = hal.interface.load.constant offset = 3 : index - %arg0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref{%dim0, %dim1, %dim2, %dim3} - %arg1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref{%dim0, %dim1, %dim2, %dim3} - %arg2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref{%dim0, %dim1, %dim2, %dim3} - linalg.generic { - indexing_maps = [#map0, #map0, #map0], - iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) { - ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): - %0 = arith.addf %arg3, %arg4 : f32 - linalg.yield %0 : f32 - } - return - } - func private @parallel_4D__num_workgroups__ - (!shapex.ranked_shape<[?,?,?,?]>, !shapex.ranked_shape<[?,?,?,?]>, - !shapex.ranked_shape<[?,?,?,?]>) -> (index, index, index) - hal.interface private @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} -// CHECK-LABEL: func @parallel_4D -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index -// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index -// CHECK-DAG: %[[UB0:.+]] = memref.dim %{{.+}}, %[[C0]] -// CHECK-DAG: %[[UB1:.+]] = memref.dim %{{.+}}, %[[C1]] -// CHECK-DAG: %[[UB2:.+]] = memref.dim %{{.+}}, %[[C2]] -// CHECK-DAG: %[[UB3:.+]] = memref.dim %{{.+}}, %[[C3]] -// CHECK: %[[T4:.+]] = arith.muli %[[UB3]], %[[UB2]] -// CHECK: %[[T5:.+]] = arith.muli %[[T4]], %[[UB1]] -// CHECK: %[[UB:.+]] = arith.muli %[[T5]], %[[UB0]] -// CHECK-DAG: %[[BID:.+]] = "gpu.block_id"() {dimension = "x"} -// CHECK-DAG: %[[BDIM:.+]] = "gpu.block_dim"() {dimension = "x"} -// CHECK-DAG: %[[TID:.+]] = "gpu.thread_id"() {dimension = "x"} -// CHECK: %[[BOFFSET:.+]] = arith.muli %[[BID]], %[[BDIM]] -// CHECK: %[[IV:.+]] = arith.addi %[[BOFFSET]], %[[TID]] -// CHECK: %[[COND:.+]] = arith.cmpi slt, %[[IV]], %[[UB]] -// CHECK: scf.if %[[COND]] -// CHECK: %[[IV0:.+]] = arith.divsi %[[IV]], %[[T5]] -// CHECK: %[[T14:.+]] = arith.remsi %[[IV]], %[[T5]] -// CHECK: %[[IV1:.+]] = arith.divsi %[[T14]], %[[T4]] -// CHECK: %[[T16:.+]] = arith.remsi %[[T14]], %[[T4]] -// CHECK: %[[IV2:.+]] = arith.divsi %[[T16]], %[[UB3]] -// CHECK: %[[IV3:.+]] = arith.remsi %[[T16]], %[[UB3]] -// CHECK: load %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] -// CHECK: load %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] -// CHECK: store %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] - -// ----- - -#map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -hal.executable private @parallel_4D_static { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> { - hal.executable.entry_point @parallel_4D_static attributes {interface = @io, ordinal = 0 : index} - builtin.module { - func @parallel_4D_static() { - %c0 = arith.constant 0 : index - %arg0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<3x4x5x6xf32> - %arg1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<3x4x5x6xf32> - %arg2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<3x4x5x6xf32> - linalg.generic { - indexing_maps = [#map0, #map0, #map0], - iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%arg0, %arg1 : memref<3x4x5x6xf32>, memref<3x4x5x6xf32>) - outs(%arg2 : memref<3x4x5x6xf32>) { - ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): - %0 = arith.addf %arg3, %arg4 : f32 - linalg.yield %0 : f32 - } - return - } - hal.interface private @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} -// CHECK-LABEL: func @parallel_4D_static() -// CHECK-DAG: %[[C360:.+]] = arith.constant 360 : index -// CHECK-DAG: %[[C120:.+]] = arith.constant 120 : index -// CHECK-DAG: %[[C30:.+]] = arith.constant 30 : index -// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index -// CHECK-DAG: %[[BID:.+]] = "gpu.block_id"() {dimension = "x"} -// CHECK-DAG: %[[BDIM:.+]] = "gpu.block_dim"() {dimension = "x"} -// CHECK-DAG: %[[TID:.+]] = "gpu.thread_id"() {dimension = "x"} -// CHECK: %[[BOFFSET:.+]] = arith.muli %[[BID]], %[[BDIM]] -// CHECK: %[[IV:.+]] = arith.addi %[[BOFFSET]], %[[TID]] -// CHECK: %[[COND:.+]] = arith.cmpi slt, %[[IV]], %[[C360]] -// CHECK: scf.if %[[COND]] -// CHECK: %[[IV0:.+]] = arith.divsi %[[IV]], %[[C120]] -// CHECK: %[[T14:.+]] = arith.remsi %[[IV]], %[[C120]] -// CHECK: %[[IV1:.+]] = arith.divsi %[[T14]], %[[C30]] -// CHECK: %[[T16:.+]] = arith.remsi %[[T14]], %[[C30]] -// CHECK: %[[IV2:.+]] = arith.divsi %[[T16]], %[[C6]] -// CHECK: %[[IV3:.+]] = arith.remsi %[[T16]], %[[C6]] -// CHECK: load %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] -// CHECK: load %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] -// CHECK: store %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] - -// ----- - -#map0 = affine_map<() -> ()> -#accesses = [#map0, #map0, #map0] -#trait = { - indexing_maps = #accesses, - iterator_types = [] -} - -hal.executable private @scalar_add { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> { - hal.executable.entry_point @scalar_add attributes {interface = @io, ordinal = 0 : index} - builtin.module { - func @scalar_add() attributes {hal.num_workgroups_fn = @scalar_add__num_workgroups__} { - %c0 = arith.constant 0 : index - %arg0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref - %arg1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref - %arg2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref - linalg.generic #trait - ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) { - ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): - %0 = arith.addf %arg3, %arg4 : f32 - linalg.yield %0 : f32 - } - return - } - hal.interface private @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} -// CHECK-LABEL: func @scalar_add() -// CHECK: load -// CHECK-NEXT: load -// CHECK-NEXT: addf -// CHECK-NEXT: store -// CHECK-NEXT: return - -// ----- - -// TODO(GH-4901): Convert these tests back to use dynamic shapes when linalg on tensors becomes default. -hal.executable private @reduce_sum { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> { - hal.executable.entry_point @reduce_sum attributes { - interface = @io, - ordinal = 0 : index - } - builtin.module { - func @reduce_sum() { - %c0 = arith.constant 0 : index - %arg0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<40x50x75xf32> - %arg1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref - %arg2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<40xf32> - linalg.generic { - indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, - affine_map<(d0, d1, d2) -> ()>, - affine_map<(d0, d1, d2) -> (d0)>], - iterator_types = ["parallel", "reduction", "reduction"]} - ins(%arg0, %arg1 : memref<40x50x75xf32>, memref) - outs(%arg2 : memref<40xf32>) { - ^bb0(%arg6: f32, %arg7: f32, %arg8: f32): // no predecessors - %idx1 = linalg.index 1 : index - %idx2 = linalg.index 2 : index - %zero = arith.constant 0 : index - %0 = arith.cmpi eq, %idx2, %zero : index - %1 = arith.cmpi eq, %idx1, %zero : index - %2 = arith.andi %0, %1 : i1 - %3 = select %2, %arg7, %arg8 : f32 - %4 = arith.addf %arg6, %3 : f32 - linalg.yield %4 : f32 - } - return - } - hal.interface private @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} -//CHECK-LABEL: func @reduce_sum -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C40:.+]] = arith.constant 40 : index -// CHECK-DAG: %[[C50:.+]] = arith.constant 50 : index -// CHECK-DAG: %[[C75:.+]] = arith.constant 75 : index -// CHECK: %[[COND:.+]] = arith.cmpi slt, %{{.+}}, %[[C40]] -// CHECK: scf.if %[[COND]] -// CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %[[C50]] -// CHECK: scf.for %[[IV1:.+]] = %{{.+}} to %[[C75]] -// CHECK-DAG: %[[ISZERO0:.+]] = arith.cmpi eq, %[[IV0]], %[[C0]] -// CHECK-DAG: %[[ISZERO1:.+]] = arith.cmpi eq, %[[IV1]], %[[C0]] diff --git a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir index fc1058e1469f..3315774f2237 100644 --- a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir +++ b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-codegen-linalg-to-spirv-pipeline))' %s | IreeFileCheck %s -#config = {tileSizes = [[8, 64], [8, 4], [0, 0, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8]> hal.executable private @fuse_and_vectorize_fill_matmul { hal.interface @io { hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read" @@ -13,7 +13,7 @@ hal.executable private @fuse_and_vectorize_fill_matmul { hal.executable.entry_point @fuse_and_vectorize_fill_matmul attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 1: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8]} + translation.info = #translation } builtin.module { func @fuse_and_vectorize_fill_matmul() { @@ -70,8 +70,8 @@ hal.executable private @fuse_and_vectorize_fill_matmul { // ----- -#config = {tileSizes = [[8, 64], [8, 4], [0, 0, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8]> hal.executable private @fuse_and_vectorize_matmul_add { hal.interface @io { hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read" @@ -83,7 +83,7 @@ hal.executable private @fuse_and_vectorize_matmul_add { hal.executable.entry_point @fuse_and_vectorize_matmul_add attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 1: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8]} + translation.info = #translation } builtin.module { func @fuse_and_vectorize_matmul_add() { diff --git a/iree/compiler/Codegen/SPIRV/test/remove_one_trip_tiled_loop.mlir b/iree/compiler/Codegen/SPIRV/test/remove_one_trip_tiled_loop.mlir index 2dd847e3e27a..6b8b7bcc91b7 100644 --- a/iree/compiler/Codegen/SPIRV/test/remove_one_trip_tiled_loop.mlir +++ b/iree/compiler/Codegen/SPIRV/test/remove_one_trip_tiled_loop.mlir @@ -1,5 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-remove-one-trip-tiled-loop))))' %s | IreeFileCheck %s +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]> hal.executable private @static_shaped_conv { hal.interface @io { hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read" @@ -9,7 +11,7 @@ hal.executable private @static_shaped_conv { hal.executable.variant @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb"> { hal.executable.entry_point @static_shaped_conv attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]}, + translation.info = #translation, workgroup_size = [4 : index, 4 : index, 1 : index] } builtin.module { @@ -46,8 +48,8 @@ hal.executable private @static_shaped_conv { %16 = affine.min affine_map<(d0) -> (4, -d0 + 112)>(%arg0) %17 = affine.min affine_map<(d0) -> (4, -d0 + 112)>(%arg1) %18 = memref.subview %2[0, %arg0, %arg1, %arg2] [1, %16, %17, %14] [1, 1, 1, 1] : memref<1x112x112x32xf32> to memref<1x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 401408 + s0 + d1 * 3584 + d2 * 32 + d3)>> - linalg.fill(%cst, %18) {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[0, 4, 4, 16], [], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4]]}} : f32, memref<1x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 401408 + s0 + d1 * 3584 + d2 * 32 + d3)>> - linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, lowering.config = {tileSizes = [[0, 4, 4, 16], [], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4]]}, strides = dense<2> : tensor<2xi64>} + linalg.fill(%cst, %18) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f32, memref<1x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 401408 + s0 + d1 * 3584 + d2 * 32 + d3)>> + linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, lowering.config = #config, strides = dense<2> : tensor<2xi64>} ins(%13, %15 : memref<1x?x?x3xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 151875 + s0 + d1 * 675 + d2 * 3 + d3)>>, memref<3x3x3x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 288 + s0 + d1 * 96 + d2 * 32 + d3)>>) outs(%18 : memref<1x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 401408 + s0 + d1 * 3584 + d2 * 32 + d3)>>) } diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir index bdd9652f5844..b12a0993fe99 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir @@ -1,5 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-tile-and-distribute))))' %s | IreeFileCheck %s +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [16, 1]> hal.executable private @static_scatter_update_slice { hal.interface @io { hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read" @@ -10,7 +12,7 @@ hal.executable private @static_scatter_update_slice { hal.executable.variant @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb"> { hal.executable.entry_point @static_scatter_update_slice attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = 5 : i32, workloadPerWorkgroup = [16, 1]}, + translation.info = #translation, workgroup_size = [16 : index, 1 : index, 1 : index] } @@ -36,7 +38,7 @@ hal.executable private @static_scatter_update_slice { %8 = memref.subview %1[%arg0, 0] [1, 1] [1, 1] : memref<40x1xi32> to memref<1x1xi32, affine_map<(d0, d1)[s0] -> (d0 + s0 + d1)>> %9 = memref.cast %8 : memref<1x1xi32, affine_map<(d0, d1)[s0] -> (d0 + s0 + d1)>> to memref (d0 + s0 + d1)>> %10 = memref.subview %2[0, %arg1] [100, %5] [1, 1] : memref<100x500xi32> to memref<100x?xi32, affine_map<(d0, d1)[s0] -> (d0 * 500 + s0 + d1)>> - linalg_ext.scatter {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[1, 16], [1, 1]]}} ins(%7, %9 : memref (d0 * 500 + s0 + d1)>>, memref (d0 + s0 + d1)>>) outs(%10 : memref<100x?xi32, affine_map<(d0, d1)[s0] -> (d0 * 500 + s0 + d1)>>) { + linalg_ext.scatter {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%7, %9 : memref (d0 * 500 + s0 + d1)>>, memref (d0 + s0 + d1)>>) outs(%10 : memref<100x?xi32, affine_map<(d0, d1)[s0] -> (d0 * 500 + s0 + d1)>>) { ^bb0(%arg2: i32, %arg3: i32): // no predecessors linalg_ext.yield %arg2 : i32 } diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir index 201f344c7665..345c7317a644 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir @@ -1,5 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-tile-and-distribute, cse))))' %s | IreeFileCheck %s +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [16, 1]> hal.executable private @static_3d_sort { hal.interface @io { hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read" @@ -8,7 +10,7 @@ hal.executable private @static_3d_sort { hal.executable.variant @vulkan_spirv_fb, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> { hal.executable.entry_point @static_3d_sort attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = 5 : i32, workloadPerWorkgroup = [16, 1]}, + translation.info = #translation, workgroup_size = [16 : index, 1 : index, 1 : index] } builtin.module { @@ -30,8 +32,8 @@ hal.executable private @static_3d_sort { %5 = memref.cast %4 : memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>> to memref %6 = memref.subview %1[%arg0, 0, %arg1] [1, 32, 16] [1, 1, 1] : memref<64x32x128xi32> to memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>> %7 = memref.cast %6 : memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>> to memref (d0 * 4096 + s0 + d1 * 128 + d2)>> - linalg.copy(%5, %6) {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[1, 0, 16], [1, 0, 1]]}} : memref, memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>> - linalg_ext.sort dimension(1) {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[1, 0, 16], [1, 0, 1]]}} outs(%7 : memref (d0 * 4096 + s0 + d1 * 128 + d2)>>) { + linalg.copy(%5, %6) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : memref, memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>> + linalg_ext.sort dimension(1) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} outs(%7 : memref (d0 * 4096 + s0 + d1 * 128 + d2)>>) { ^bb0(%arg2: i32, %arg3: i32): // no predecessors %8 = arith.cmpi slt, %arg2, %arg3 : i32 linalg_ext.yield %8 : i1 diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir index 8bf2d36bced6..958207c71242 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir @@ -8,7 +8,8 @@ #map5 = affine_map<(d0, d1, d2) -> (d2, d1)> #map6 = affine_map<(d0, d1, d2) -> (d0, d1)> -#config = {tileSizes = [[8, 16], [1, 1], [0, 0, 1]]} +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 16]> hal.executable private @matmul { hal.interface @io { @@ -20,7 +21,7 @@ hal.executable private @matmul { hal.executable.entry_point @matmul attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 8: index, 1: index], - translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [8, 16]} + translation.info = #translation } builtin.module { func @matmul() { @@ -82,8 +83,8 @@ hal.executable private @matmul { // ----- -#config = {tileSizes = [[1, 4, 32], [1, 1, 1]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4, 1]> hal.executable private @conv_1d { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -94,7 +95,7 @@ hal.executable private @conv_1d { hal.executable.entry_point @conv_1d attributes { interface = @io, ordinal = 0 : index, workgroup_size = [32: index, 4: index, 1: index], - translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]} + translation.info = #translation } builtin.module { func @conv_1d() { @@ -165,8 +166,8 @@ hal.executable private @conv_1d { #map6 = affine_map<(d0)[s0] -> (4, -d0 + s0)> #map7 = affine_map<(d0)[s0] -> (32, -d0 + s0)> -#config = {tileSizes = [[0, 1, 4, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4, 1]> hal.executable private @conv_no_padding { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -177,7 +178,7 @@ hal.executable private @conv_no_padding { hal.executable.entry_point @conv_no_padding attributes { interface = @io, ordinal = 0 : index, workgroup_size = [32: index, 4: index, 1: index], - translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]} + translation.info = #translation } builtin.module { func @conv_no_padding() { @@ -292,8 +293,8 @@ hal.executable private @conv_no_padding { // ----- -#config = {tileSizes = [[0, 0, 1, 4, 32], [0, 0, 1, 1, 1]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4, 1]> hal.executable private @conv_3d { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -304,7 +305,7 @@ hal.executable private @conv_3d { hal.executable.entry_point @conv_3d attributes { interface = @io, ordinal = 0 : index, workgroup_size = [32: index, 4: index, 1: index], - translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]} + translation.info = #translation } builtin.module { func @conv_3d() { @@ -365,8 +366,8 @@ hal.executable private @conv_3d { #map6 = affine_map<()[s0] -> (32, s0 * -32 + 13)> #map7 = affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 1092 + s0 + d1 * 78 + d2 * 6 + d3)> -#config = {tileSizes = [[1, 4, 32], [1, 1, 1]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4, 1]> module { hal.executable private @pooling_nhwc_max { hal.interface @io { @@ -378,7 +379,7 @@ module { hal.executable.entry_point @pooling_nhwc_max attributes { interface = @io, ordinal = 0 : index, workgroup_size = [32: index, 4: index, 1: index], - translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]} + translation.info = #translation } builtin.module { func @pooling_nhwc_max() { diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir index 2cd1b62aeaf9..6018c6b94794 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-set-num-workgroups,builtin.module(builtin.func(iree-spirv-tile-and-distribute,iree-spirv-vectorize))))' -canonicalize -cse %s | IreeFileCheck %s -#config = {tileSizes = [[1, 8, 64], [1, 8, 4], [0, 0, 0, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8, 1]> hal.executable private @batch_matmul_static_shape { hal.interface private @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -12,7 +12,7 @@ hal.executable private @batch_matmul_static_shape { hal.executable.entry_point @batch_matmul_static_shape attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 1: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8, 1]} + translation.info = #translation } builtin.module { func @batch_matmul_static_shape() { @@ -370,8 +370,8 @@ hal.executable private @batch_matmul_static_shape { // ----- -#config = {tileSizes = [[1, 8, 64], [1, 8, 4], [0, 0, 0, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8, 1]> hal.executable private @fused_fill_batch_matmul { hal.interface private @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -382,7 +382,7 @@ hal.executable private @fused_fill_batch_matmul { hal.executable.entry_point @fused_fill_batch_matmul attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 1: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8, 1]} + translation.info = #translation } builtin.module { func @fused_fill_batch_matmul() { diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir index ffd3b8f83b13..757dd467d2d2 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-set-num-workgroups,builtin.module(builtin.func(canonicalize,iree-spirv-remove-one-trip-tiled-loop,iree-spirv-tile-and-distribute,iree-spirv-vectorize))))' -canonicalize -cse %s | IreeFileCheck %s -#config = {tileSizes = [[0, 4, 4, 16], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]> hal.executable private @conv_static_shape_f32 { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -13,7 +13,7 @@ hal.executable private @conv_static_shape_f32 { interface = @io, ordinal = 0 : index, workgroup_size = [4: index, 4: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]} + translation.info = #translation } { ^bb0(%arg0 : index, %arg1 : index, %arg2 : index): %x = arith.constant 2: index @@ -99,8 +99,8 @@ hal.executable private @conv_static_shape_f32 { // ----- -#config = {tileSizes = [[0, 4, 4, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]> hal.executable private @depthwise_conv_static_shape_f32 { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -112,7 +112,7 @@ hal.executable private @depthwise_conv_static_shape_f32 { interface = @io, ordinal = 0 : index, workgroup_size = [4: index, 4: index, 4: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]} + translation.info = #translation } { ^bb0(%arg0 : index, %arg1 : index, %arg2 : index): %x = arith.constant 6: index diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir index 0906c264c185..a3c5db1f5a0b 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-set-num-workgroups,builtin.module(builtin.func(iree-spirv-tile-and-distribute,iree-spirv-vectorize))))' -canonicalize -cse %s | IreeFileCheck %s -#config = {tileSizes = [[8, 64], [8, 4], [0, 0, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8]> hal.executable private @matmul_static_shape_f16 { hal.interface private @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -12,7 +12,7 @@ hal.executable private @matmul_static_shape_f16 { hal.executable.entry_point @matmul_static_shape_f16 attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 1: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8]} + translation.info = #translation } builtin.module { func @matmul_static_shape_f16() { @@ -66,8 +66,8 @@ hal.executable private @matmul_static_shape_f16 { // ----- -#config = {tileSizes = [[8, 64], [8, 4], [0, 0, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8]> hal.executable private @matmul_static_shape_f32 { hal.interface private @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -78,7 +78,7 @@ hal.executable private @matmul_static_shape_f32 { hal.executable.entry_point @matmul_static_shape_f32 attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 1: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8]} + translation.info = #translation } builtin.module { func @matmul_static_shape_f32() { diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir index e63bd03118d0..42e13de893df 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir @@ -1,5 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-tile-and-vectorize-to-cooperative-ops))))' %s | IreeFileCheck %s +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorizeToCooperativeOps", workload_per_wg = [16, 16]> hal.executable public @matmul_256x1024x128_div_sub { hal.interface public @io { hal.interface.binding public @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read" @@ -28,7 +30,7 @@ hal.executable public @matmul_256x1024x128_div_sub { subgroup_size = 32 : i32}>}> { hal.executable.entry_point public @matmul_256x1024x128_div_sub attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = "SPIRVVectorizeToCooperativeOps", workloadPerWorkgroup = [16, 16]}, + translation.info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index] } { ^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors @@ -63,14 +65,14 @@ hal.executable public @matmul_256x1024x128_div_sub { %11 = memref.subview %2[%arg0, 0] [16, 128] [1, 1] : memref<256x128xf16> to memref<16x128xf16, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> %12 = memref.subview %3[0, %arg1] [128, 16] [1, 1] : memref<128x1024xf16> to memref<128x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> %13 = memref.subview %4[%arg0, %arg1] [16, 16] [1, 1] : memref<256x1024xf16> to memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> - linalg.fill(%cst, %13) {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[16, 16, 16], [16, 16, 16]]}} : f16, memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> - linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[16, 16, 16], [16, 16, 16]]}} + linalg.fill(%cst, %13) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f16, memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> + linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%11, %12 : memref<16x128xf16, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>, memref<128x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>) outs(%13 : memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>) linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%13, %9, %10 : memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>, memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>, memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>) outs(%13 : memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>) - attrs = {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[16, 16, 16], [16, 16, 16]]}} { + attrs = {__internal_linalg_transform__ = "workgroup", lowering.config = #config} { ^bb0(%arg2: f16, %arg3: f16, %arg4: f16, %arg5: f16): // no predecessors %14 = arith.divf %arg2, %arg3 : f16 %15 = arith.subf %14, %arg4 : f16 diff --git a/iree/compiler/Codegen/SPIRV/test/vectorize_elementwise_ops.mlir b/iree/compiler/Codegen/SPIRV/test/vectorize_elementwise_ops.mlir index 667dab3a02c3..3785956a2c20 100644 --- a/iree/compiler/Codegen/SPIRV/test/vectorize_elementwise_ops.mlir +++ b/iree/compiler/Codegen/SPIRV/test/vectorize_elementwise_ops.mlir @@ -5,6 +5,7 @@ // CHECK: vector.transfer_read %{{.+}}[%c0], {{.+}} memref<4xf32, #{{.+}}>, vector<4xf32> // CHECK: addf %{{.*}}, %{{.*}} : vector<4xf32> // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, memref<4xf32 +#config = #iree_codegen.lowering.config hal.executable private @elementwise_static_shape { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -24,7 +25,7 @@ hal.executable private @elementwise_static_shape { %ret0 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<128xf32> linalg.generic { __internal_linalg_transform__ = "workgroup", - lowering.config = {tileSizes = [[128], [4]]}, + lowering.config = #config, indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], @@ -54,6 +55,7 @@ hal.executable private @elementwise_static_shape { // CHECK-NOT: vector.transfer_read // CHECK: scf.for // CHECK: scf.for +#config = #iree_codegen.lowering.config hal.executable private @elementwise_transpose { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -73,7 +75,7 @@ hal.executable private @elementwise_transpose { %ret0 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<128x8xf32> linalg.generic { __internal_linalg_transform__ = "workgroup", - lowering.config = {tileSizes = [[1, 32], [1, 1]]}, + lowering.config = #config, indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], diff --git a/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir index bd16b5d9cacb..618ec1f128e0 100644 --- a/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir +++ b/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir @@ -1,5 +1,6 @@ // RUN: iree-opt -split-input-file -iree-spirv-vectorize %s | IreeFileCheck %s +#config = #iree_codegen.lowering.config func @matmul_2x128x4() { %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index @@ -25,10 +26,10 @@ func @matmul_2x128x4() { %11 = "gpu.thread_id"() {dimension = "y"} : () -> index %12 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%10] %13 = memref.subview %9[%11, %12] [1, 4] [1, 1] : memref<2x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> to memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> - linalg.fill(%cst, %13) {__internal_linalg_transform__ = "vectorize", lowering.config = {tileSizes = [[2, 128], [], [1, 4], [0, 0, 4]]}} : f32, memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> + linalg.fill(%cst, %13) {__internal_linalg_transform__ = "vectorize", lowering.config = #config} : f32, memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> %17 = memref.subview %7[%11, 0] [1, 4] [1, 1] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 4 + s0 + d1)>> to memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 4 + s0 + d1)>> %18 = memref.subview %8[0, %12] [4, 4] [1, 1] : memref<4x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> to memref<4x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> - linalg.matmul {__internal_linalg_transform__ = "vectorize", lowering.config = {tileSizes = [[2, 128], [], [1, 4], [0, 0, 4]]}} + linalg.matmul {__internal_linalg_transform__ = "vectorize", lowering.config = #config} ins(%17, %18 : memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 4 + s0 + d1)>>, memref<4x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>) outs(%13 : memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>) } diff --git a/iree/compiler/Codegen/Utils/Utils.cpp b/iree/compiler/Codegen/Utils/Utils.cpp index 4f55f594f7f6..58a1e0494c35 100644 --- a/iree/compiler/Codegen/Utils/Utils.cpp +++ b/iree/compiler/Codegen/Utils/Utils.cpp @@ -19,6 +19,10 @@ namespace mlir { namespace iree_compiler { +//===----------------------------------------------------------------------===// +// Utility functions to get entry point(s) +//===----------------------------------------------------------------------===// + bool isEntryPoint(FuncOp func) { return func.isPublic(); } IREE::HAL::ExecutableEntryPointOp getEntryPoint(FuncOp funcOp) { @@ -41,21 +45,9 @@ llvm::StringMap getAllEntryPoints( return entryPointOps; } -IREE::HAL::TranslationInfo getTranslationInfo(FuncOp funcOp) { - auto entryPointOp = getEntryPoint(funcOp); - if (!entryPointOp) return nullptr; - return getTranslationInfo(entryPointOp); -} - -void setTranslationInfo(FuncOp entryPointFn, - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workgroupSize, - ArrayRef workloadPerWorkgroup) { - auto entryPointOp = getEntryPoint(entryPointFn); - auto translationInfo = buildTranslationInfo( - passPipeline, workloadPerWorkgroup, entryPointFn.getContext()); - setTranslationInfo(entryPointOp, translationInfo, workgroupSize); -} +//===----------------------------------------------------------------------===// +// Utility functions used in setting default configurations. +//===----------------------------------------------------------------------===// SmallVector getPartitionedLoops(Operation *op) { if (auto mmt4dOp = dyn_cast(op)) { @@ -80,45 +72,6 @@ SmallVector getPartitionedLoops(Operation *op) { return {}; } -LogicalResult setOpConfigAndEntryPointFnTranslation( - FuncOp entryPointFn, Operation *op, IREE::HAL::LoweringConfig config, - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workgroupSize) { - auto partitionedLoops = getPartitionedLoops(op); - SmallVector workloadPerWorkgroup; - auto tileSizes = getTileSizes(config, 0); - if (!tileSizes.empty() && !partitionedLoops.empty()) { - for (unsigned depth : partitionedLoops) { - if (depth >= tileSizes.size()) { - return op->emitOpError( - "illegal configuration for lowering op, expect first level " - "tile size to contain at least ") - << partitionedLoops.back() << " elements"; - } - if (tileSizes[depth] == 0) { - return op->emitOpError("illegal to set tilesize of loop ") - << depth - << " to zero since it is set to be partitioned at the flow " - "level"; - } - workloadPerWorkgroup.push_back(tileSizes[depth]); - } - if (!workloadPerWorkgroup.empty()) { - workloadPerWorkgroup = - llvm::to_vector<3>(llvm::reverse(workloadPerWorkgroup)); - } - } - auto entryPointOp = getEntryPoint(entryPointFn); - if (!entryPointOp) { - return entryPointFn.emitOpError( - "unable to find entry point op for entry point function"); - } - IREE::HAL::TranslationInfo translationInfo = buildTranslationInfo( - passPipeline, workloadPerWorkgroup, entryPointOp->getContext()); - setTranslationInfo(entryPointOp, translationInfo, workgroupSize); - return success(); -} - /// Walk up the defs of the view, to get the untiled value. Either walks up /// `ViewOpInterface` op-chains or the `subtensor` op-chains. static Value getViewSource(Value view) { diff --git a/iree/compiler/Codegen/Utils/Utils.h b/iree/compiler/Codegen/Utils/Utils.h index 120e2c4e96f3..f0c563a1c00d 100644 --- a/iree/compiler/Codegen/Utils/Utils.h +++ b/iree/compiler/Codegen/Utils/Utils.h @@ -8,7 +8,6 @@ #define IREE_COMPILER_CODEGEN_UTILS_UTILS_H_ #include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "llvm/ADT/StringMap.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/IR/BuiltinOps.h" @@ -18,6 +17,10 @@ namespace iree_compiler { static constexpr unsigned kNumMaxParallelDims = 3; +//===----------------------------------------------------------------------===// +// Utility functions to get entry point(s) +//===----------------------------------------------------------------------===// + /// Returns true if the given `func` is a kernel dispatch entry point. bool isEntryPoint(FuncOp func); @@ -28,18 +31,9 @@ llvm::StringMap getAllEntryPoints( /// Returns the entry point op for the `funcOp`. Returns `nullptr` on failure. IREE::HAL::ExecutableEntryPointOp getEntryPoint(FuncOp funcOp); -/// Returns the translation info for the `funcOp` (by looking at the entry -/// point). Returns `nullptr` on failure. -IREE::HAL::TranslationInfo getTranslationInfo(FuncOp funcOp); - -/// Sets the translation info on the `hal.executable.entry_point` op -/// corresponding to the `entryPointFn`. Returns failure if a translation info -/// is already set on the entry point op and is incompatible with what is being -/// set. -void setTranslationInfo(FuncOp entryPointFn, - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workgroupSize, - ArrayRef workloadPerWorkgroup); +//===----------------------------------------------------------------------===// +// Utility functions used in setting default configurations. +//===----------------------------------------------------------------------===// /// Returns the loops that are partitioned during dispatch region formations, in /// order, i.e. starting from the outer-most to innermost. @@ -47,23 +41,6 @@ void setTranslationInfo(FuncOp entryPointFn, /// formation to tile and distribute the ops. SmallVector getPartitionedLoops(Operation *op); -/// Sets translation for the entry-point function based on op configuration. -LogicalResult setOpConfigAndEntryPointFnTranslation( - FuncOp entryPointFn, Operation *op, IREE::HAL::LoweringConfig config, - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workgroupSize = {}); -inline LogicalResult setOpConfigAndEntryPointFnTranslation( - FuncOp entryPointFn, Operation *op, TileSizesListTypeRef tileSizes, - ArrayRef nativeVectorSize, - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workgroupSize = {}) { - IREE::HAL::LoweringConfig config = - buildConfigAttr(tileSizes, nativeVectorSize, op->getContext()); - setLoweringConfig(op, config); - return setOpConfigAndEntryPointFnTranslation(entryPointFn, op, config, - passPipeline, workgroupSize); -} - /// Returns the untiled type of a tiled view for both tensor and memref /// types. Either walks the `ViewOpInterface` chain (for memrefs) or the /// `subtensor` op chain (for tensors). diff --git a/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp b/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp index 1ceb06673c9a..c009f37ab99f 100644 --- a/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp +++ b/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp @@ -697,7 +697,8 @@ struct ResolveShapedDim : public OpRewritePattern { return success(); } - auto dynamicDims = IREE::Util::findDynamicDims(op.source(), op); + auto dynamicDims = IREE::Util::findDynamicDims( + op.source(), op->getBlock(), Block::iterator(op.getOperation())); if (!dynamicDims.hasValue()) { return rewriter.notifyMatchFailure(op, "no dynamic dims found/usable"); } @@ -768,6 +769,15 @@ void TensorSplatOp::getCanonicalizationPatterns( results.insert(context); } +OpFoldResult TensorSplatOp::fold(ArrayRef operands) { + if (operands.size() == 1 && operands.front()) { + // Splat value is constant and we can fold the operation. + return SplatElementsAttr::get(result().getType().cast(), + operands[0]); + } + return {}; +} + OpFoldResult TensorCloneOp::fold(ArrayRef operands) { if (operands[0]) { // Constants always fold. diff --git a/iree/compiler/Dialect/Flow/IR/FlowOps.td b/iree/compiler/Dialect/Flow/IR/FlowOps.td index 102d86190eab..be3caede69f2 100644 --- a/iree/compiler/Dialect/Flow/IR/FlowOps.td +++ b/iree/compiler/Dialect/Flow/IR/FlowOps.td @@ -839,6 +839,7 @@ def FLOW_TensorSplatOp : FLOW_PureOp<"tensor.splat", [ }]; let hasCanonicalizer = 1; + let hasFolder = 1; } def FLOW_TensorCloneOp : FLOW_PureOp<"tensor.clone", [ diff --git a/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir b/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir index bd843fb1a2b7..792b88dfe57c 100644 --- a/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir +++ b/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir @@ -154,6 +154,28 @@ func @storeConstScalar() -> tensor { // ----- +// CHECK-LABEL: @splatConst +func @splatConst() -> tensor<4xi32> { + %0 = arith.constant 4 : i32 + // CHECK-NEXT: %[[C:.+]] = arith.constant dense<4> : tensor<4xi32> + %1 = flow.tensor.splat %0 : tensor<4xi32> + // CHECK-NEXT: return %[[C]] + return %1 : tensor<4xi32> +} + +// ----- + +// CHECK-LABEL: @splatConstScalar +func @splatConstScalar() -> tensor { + %0 = arith.constant 4 : i32 + // CHECK-NEXT: %[[C:.+]] = arith.constant dense<4> : tensor + %1 = flow.tensor.splat %0 : tensor + // CHECK-NEXT: return %[[C]] + return %1 : tensor +} + +// ----- + // CHECK-LABEL: @splatDynamicShape // CHECK-SAME: (%[[DIM0:.+]]: index, %[[DIM1:.+]]: index) func @splatDynamicShape(%dim0: index, %dim1: index) -> tensor { diff --git a/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgMatmulToMmt4D.cpp b/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgMatmulToMmt4D.cpp index 1a103f853901..7e6cecf01c4d 100644 --- a/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgMatmulToMmt4D.cpp +++ b/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgMatmulToMmt4D.cpp @@ -112,11 +112,6 @@ class LinalgMatmulOpToLinalgMmt4DOpPattern return failure(); } - // This is for float only matmul for now. Integer data type might require - // r.h.s layout change. - if (!lhsType.getElementType().isF32() || !rhsType.getElementType().isF32()) - return failure(); - int m = lhsType.getShape()[0]; int k = rhsType.getShape()[0]; int n = rhsType.getShape()[1]; diff --git a/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp b/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp index 992878b07445..869143aca268 100644 --- a/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp +++ b/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp @@ -81,6 +81,7 @@ struct LinalgFillToFlowTensorSplat final // Don't convert linalg.fill ops that were fused together with other ops. return failure(); } + SmallVector dynamicDims = getDynamicDimValues(rewriter, fillOp.getLoc(), fillOp.output()); rewriter.replaceOpWithNewOp( @@ -89,26 +90,6 @@ struct LinalgFillToFlowTensorSplat final } }; -struct ConvertSplatConstantOp : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(mlir::ConstantOp op, - PatternRewriter &rewriter) const override { - if (op->getParentOfType()) { - return rewriter.notifyMatchFailure(op, "ignoring dispatch ops"); - } - auto splatAttr = op.getValue().dyn_cast(); - if (!splatAttr) { - return rewriter.notifyMatchFailure(op, "only looking for splats"); - } - auto tensorType = op.getType().cast(); - auto elementValue = rewriter.createOrFold( - op.getLoc(), tensorType.getElementType(), splatAttr.getSplatValue()); - rewriter.replaceOpWithNewOp( - op, tensorType, elementValue, ValueRange{}); - return success(); - } -}; - /// Converts linalg operations that can map to flow.tensor.* operations. struct ConvertLinalgTensorOpsPass : public ConvertLinalgTensorOpsBase { @@ -135,8 +116,7 @@ struct ConvertLinalgTensorOpsPass LinalgTensorReshapeToFlowTensorReshape>( context); } else { - patterns.insert( - context); + patterns.insert(context); } IREE::Flow::TensorReshapeOp::getCanonicalizationPatterns(patterns, context); if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns)))) { diff --git a/iree/compiler/Dialect/Flow/Transforms/DispatchLinalgOnTensors.cpp b/iree/compiler/Dialect/Flow/Transforms/DispatchLinalgOnTensors.cpp index 6ba9c4bac62a..7861e2de71b3 100644 --- a/iree/compiler/Dialect/Flow/Transforms/DispatchLinalgOnTensors.cpp +++ b/iree/compiler/Dialect/Flow/Transforms/DispatchLinalgOnTensors.cpp @@ -476,7 +476,8 @@ static BlockArgument getTiedOperandBlockArgument(BlockArgument resultArg) { // block argument. Single use can potentially be relaxed. auto loadArg = loadOp.source().template dyn_cast(); - if (!loadArg || !loadArg.hasOneUse()) { + if (!loadArg || !loadArg.hasOneUse() || + loadArg.use_begin()->get() != storeOp.target()) { return nullptr; } return loadArg; diff --git a/iree/compiler/Dialect/Flow/Transforms/OutlineLargeConstants.cpp b/iree/compiler/Dialect/Flow/Transforms/OutlineLargeConstants.cpp index 24de6786fcd1..27c3e80b565c 100644 --- a/iree/compiler/Dialect/Flow/Transforms/OutlineLargeConstants.cpp +++ b/iree/compiler/Dialect/Flow/Transforms/OutlineLargeConstants.cpp @@ -29,15 +29,11 @@ namespace Flow { // more efficient and fewer bindings. static bool isConstantLarge(arith::ConstantOp constantOp, size_t minLargeConstantSize) { - if (constantOp.value().isa()) { - // Never outline splats; we want those transient within streams. - return false; - } auto type = constantOp.getType(); if (auto shapedType = type.dyn_cast()) { size_t unpackedByteLength = (shapedType.getNumElements() * shapedType.getElementTypeBitWidth()) / 8; - if (unpackedByteLength > minLargeConstantSize) { + if (unpackedByteLength >= minLargeConstantSize) { return true; } } @@ -67,6 +63,8 @@ class OutlineLargeConstantsPass : public OutlineLargeConstantsBase { public: OutlineLargeConstantsPass() = default; + OutlineLargeConstantsPass(size_t minLargeConstantSize) + : minLargeConstantSize(minLargeConstantSize){}; void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); @@ -86,7 +84,7 @@ class OutlineLargeConstantsPass std::vector> replacements; for (auto &largeConstantOp : - findLargeConstantsInModule(moduleOp, minStorageSize.getValue())) { + findLargeConstantsInModule(moduleOp, minLargeConstantSize)) { std::string name; do { name = baseName + std::to_string(uniqueId++); @@ -116,11 +114,14 @@ class OutlineLargeConstantsPass constantOp.erase(); } } + + private: + size_t minLargeConstantSize; }; -std::unique_ptr> -createOutlineLargeConstantsPass() { - return std::make_unique(); +std::unique_ptr> createOutlineLargeConstantsPass( + size_t minLargeConstantSize) { + return std::make_unique(minLargeConstantSize); } } // namespace Flow diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.h b/iree/compiler/Dialect/Flow/Transforms/Passes.h index dafb8d635262..123cfb05956e 100644 --- a/iree/compiler/Dialect/Flow/Transforms/Passes.h +++ b/iree/compiler/Dialect/Flow/Transforms/Passes.h @@ -131,8 +131,12 @@ createPadLinalgOpsToIntegerMultiplePass(int paddingSize = 4); //===----------------------------------------------------------------------===// // Outlines large tensor constants into util.globals at the module level. -std::unique_ptr> -createOutlineLargeConstantsPass(); +// +// TODO(#5493): implement the support for inlining constants into the command +// buffer and raise this value to one that is measured to be good. +static constexpr size_t kMinLargeConstantSize = 1; +std::unique_ptr> createOutlineLargeConstantsPass( + size_t minLargeConstantSize = kMinLargeConstantSize); // Deduplicates equivalent executables. std::unique_ptr> diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.td b/iree/compiler/Dialect/Flow/Transforms/Passes.td index 4fee8c0ab20d..64941b2d5093 100644 --- a/iree/compiler/Dialect/Flow/Transforms/Passes.td +++ b/iree/compiler/Dialect/Flow/Transforms/Passes.td @@ -102,12 +102,8 @@ def OutlineDispatchRegions : def OutlineLargeConstants : Pass<"iree-flow-outline-large-constants", "mlir::ModuleOp"> { let summary = "Outlines large tensor constants into util.globals at the module level."; - let constructor = "mlir::iree_compiler::IREE::Flow::createOutlineLargeConstantsPass()"; - let options = [ - Option<"minStorageSize", "min-storage-size", - "int64_t", /*default=*/"64", - "Outlines constants with storage sizes > than this byte size."> - ]; + // TODO(#5493): add a flag for this. + let constructor = "mlir::iree_compiler::IREE::Flow::createOutlineLargeConstantsPass(25)"; } def PadLinalgOps : diff --git a/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir b/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir index c42e9b3be3be..63fbc2f9d5bf 100644 --- a/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir +++ b/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir @@ -1114,3 +1114,17 @@ func @dynamic_slice(%arg0 : i32, %arg1 : i32, %arg2 : tensor, // CHECK-SAME: tensor{%[[D1]], %[[D2]]}, tensor{%[[D0]]} // CHECK-NEXT: %[[ARG4:.+]]: !flow.dispatch.tensor // CHECK-SAME: %[[ARG5:.+]]: !flow.dispatch.tensor + +// ----- + +func @extract_slice(%arg0 : tensor, %arg1 : index, %arg2 : index, + %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) -> tensor { + %0 = tensor.extract_slice %arg0[%arg1, %arg2] [%arg3, %arg4] [%arg5, %arg6] : + tensor to tensor + return %0 : tensor +} +// CHECK: flow.dispatch.workgroups +// CHECK-NEXT: %[[INPUT:[a-zA-Z0-9]+]]: !flow.dispatch.tensor +// CHECK-SAME: %[[OUTPUT:[a-zA-Z0-9]+]]: !flow.dispatch.tensor +// CHECK: %[[SLICE:.+]] = flow.dispatch.tensor.load %[[INPUT]] +// CHECK: flow.dispatch.tensor.store %[[SLICE]], %[[OUTPUT]] diff --git a/iree/compiler/Dialect/Flow/Transforms/test/outline_large_constants.mlir b/iree/compiler/Dialect/Flow/Transforms/test/outline_large_constants.mlir index 739411928a2c..cdc299f036fd 100644 --- a/iree/compiler/Dialect/Flow/Transforms/test/outline_large_constants.mlir +++ b/iree/compiler/Dialect/Flow/Transforms/test/outline_large_constants.mlir @@ -1,12 +1,10 @@ -// RUN: iree-opt -split-input-file -iree-flow-outline-large-constants='min-storage-size=9' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -iree-flow-outline-large-constants %s | IreeFileCheck %s -// CHECK: util.global private @[[LARGE_VARIABLE:.+]] {noinline} = dense<{{.+}}> : tensor<8xf32> -func @fn1() -> (tensor<2xf32>, tensor<512x128xf32>, tensor<8xf32>) { +// CHECK: util.global private @[[LARGE_VARIABLE:.+]] {noinline} = dense<1.200000e+00> : tensor<512x128xf32> +func @fn1() -> (tensor<2xf32>, tensor<512x128xf32>) { // CHECK-DAG: %[[SMALL_VALUE:.+]] = arith.constant dense<{{.+}}> : tensor<2xf32> %cst_0 = arith.constant dense<[0.0287729427, 0.0297581609]> : tensor<2xf32> - // CHECK-DAG: %[[SPLATG_VALUE:.+]] = arith.constant dense<{{.+}}> : tensor<512x128xf32> + // CHECK-DAG: %[[LARGE_VALUE:.+]] = util.global.load @[[LARGE_VARIABLE]] : tensor<512x128xf32> %cst_1 = arith.constant dense<1.2> : tensor<512x128xf32> - // CHECK-DAG: %[[LARGE_VALUE:.+]] = util.global.load @[[LARGE_VARIABLE]] : tensor<8xf32> - %cst_2 = arith.constant dense<[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]> : tensor<8xf32> - return %cst_0, %cst_1, %cst_2 : tensor<2xf32>, tensor<512x128xf32>, tensor<8xf32> + return %cst_0, %cst_1 : tensor<2xf32>, tensor<512x128xf32> } diff --git a/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertStreamOps.cpp b/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertStreamOps.cpp index 8fe6df3203de..19a03980141f 100644 --- a/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertStreamOps.cpp +++ b/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertStreamOps.cpp @@ -949,9 +949,6 @@ static Value splatFillPattern(Location loc, Value baseValue, baseValue = builder.createOrFold( loc, builder.getIntegerType(baseBitWidth), baseValue); - // Treat i1 as i8. - if (baseBitWidth == 1) baseBitWidth = 8; - switch (baseBitWidth) { case 8: { // (v << 24) | (v << 16) | (v << 8) | v diff --git a/iree/compiler/Dialect/HAL/IR/BUILD b/iree/compiler/Dialect/HAL/IR/BUILD index 9586bfc10959..c0cdf752ae22 100644 --- a/iree/compiler/Dialect/HAL/IR/BUILD +++ b/iree/compiler/Dialect/HAL/IR/BUILD @@ -27,7 +27,6 @@ td_library( "HALDialect.td", "HALInterfaces.td", "HALOps.td", - "LoweringConfig.td", ], include = ["*.td"], ), @@ -46,14 +45,12 @@ cc_library( "HALOpFolders.cpp", "HALOps.cpp", "HALTypes.cpp", - "LoweringConfig.cpp", ], hdrs = [ "HALDialect.h", "HALOps.h", "HALTraits.h", "HALTypes.h", - "LoweringConfig.h", ], textual_hdrs = [ "HALAttrs.cpp.inc", @@ -70,18 +67,12 @@ cc_library( "HALStructs.h.inc", "HALTypeInterfaces.cpp.inc", "HALTypeInterfaces.h.inc", - "LoweringConfig.h.inc", - "LoweringConfig.cpp.inc", - "LoweringConfigEnums.h.inc", - "LoweringConfigEnums.cpp.inc", ], deps = [ ":HALInterfacesGen", ":HALOpsGen", ":HALStructsGen", ":HALTypesGen", - ":LoweringConfigEnumGen", - ":LoweringConfigGen", "//iree/compiler/Dialect/Shape/IR", "//iree/compiler/Dialect/Util/IR", "@llvm-project//llvm:Support", @@ -221,37 +212,3 @@ iree_tablegen_doc( td_file = "HALOps.td", deps = [":td_files"], ) - -gentbl_cc_library( - name = "LoweringConfigGen", - tbl_outs = [ - ( - ["-gen-struct-attr-decls"], - "LoweringConfig.h.inc", - ), - ( - ["-gen-struct-attr-defs"], - "LoweringConfig.cpp.inc", - ), - ], - tblgen = "@llvm-project//mlir:mlir-tblgen", - td_file = "LoweringConfig.td", - deps = [":td_files"], -) - -gentbl_cc_library( - name = "LoweringConfigEnumGen", - tbl_outs = [ - ( - ["-gen-enum-decls"], - "LoweringConfigEnums.h.inc", - ), - ( - ["-gen-enum-defs"], - "LoweringConfigEnums.cpp.inc", - ), - ], - tblgen = "@llvm-project//mlir:mlir-tblgen", - td_file = "LoweringConfig.td", - deps = [":td_files"], -) diff --git a/iree/compiler/Dialect/HAL/IR/CMakeLists.txt b/iree/compiler/Dialect/HAL/IR/CMakeLists.txt index 341d7ea45396..8d2316a5c360 100644 --- a/iree/compiler/Dialect/HAL/IR/CMakeLists.txt +++ b/iree/compiler/Dialect/HAL/IR/CMakeLists.txt @@ -18,7 +18,6 @@ iree_cc_library( "HALOps.h" "HALTraits.h" "HALTypes.h" - "LoweringConfig.h" TEXTUAL_HDRS "HALAttrInterfaces.cpp.inc" "HALAttrInterfaces.h.inc" @@ -34,22 +33,15 @@ iree_cc_library( "HALStructs.h.inc" "HALTypeInterfaces.cpp.inc" "HALTypeInterfaces.h.inc" - "LoweringConfig.cpp.inc" - "LoweringConfig.h.inc" - "LoweringConfigEnums.cpp.inc" - "LoweringConfigEnums.h.inc" SRCS "HALOpFolders.cpp" "HALOps.cpp" "HALTypes.cpp" - "LoweringConfig.cpp" DEPS ::HALInterfacesGen ::HALOpsGen ::HALStructsGen ::HALTypesGen - ::LoweringConfigEnumGen - ::LoweringConfigGen LLVMSupport MLIRIR MLIRMemRef @@ -145,24 +137,4 @@ iree_tablegen_doc( -gen-dialect-doc HALDialect.md ) -iree_tablegen_library( - NAME - LoweringConfigGen - TD_FILE - "LoweringConfig.td" - OUTS - -gen-struct-attr-decls LoweringConfig.h.inc - -gen-struct-attr-defs LoweringConfig.cpp.inc -) - -iree_tablegen_library( - NAME - LoweringConfigEnumGen - TD_FILE - "LoweringConfig.td" - OUTS - -gen-enum-decls LoweringConfigEnums.h.inc - -gen-enum-defs LoweringConfigEnums.cpp.inc -) - ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/iree/compiler/Dialect/HAL/IR/HALDialect.cpp b/iree/compiler/Dialect/HAL/IR/HALDialect.cpp index 7b38e4a34883..a3e30eba8d37 100644 --- a/iree/compiler/Dialect/HAL/IR/HALDialect.cpp +++ b/iree/compiler/Dialect/HAL/IR/HALDialect.cpp @@ -10,7 +10,6 @@ #include "iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertHALToVM.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "iree/compiler/Dialect/HAL/IR/HALTypes.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "iree/compiler/Dialect/HAL/hal.imports.h" #include "iree/compiler/Dialect/Util/IR/UtilDialect.h" #include "iree/compiler/Dialect/VM/Conversion/ConversionDialectInterface.h" @@ -44,9 +43,6 @@ struct HALOpAsmInterface : public OpAsmDialectInterface { } else if (auto targetAttr = attr.dyn_cast()) { os << "executable_target_" << targetAttr.getSymbolNameFragment(); return AliasResult::OverridableAlias; - } else if (attr.isa()) { - os << "config"; - return AliasResult::OverridableAlias; } return AliasResult::NoAlias; } diff --git a/iree/compiler/Dialect/HAL/IR/LoweringConfig.cpp b/iree/compiler/Dialect/HAL/IR/LoweringConfig.cpp deleted file mode 100644 index 4bc7e4e32087..000000000000 --- a/iree/compiler/Dialect/HAL/IR/LoweringConfig.cpp +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" - -#include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" - -static const char kConfigAttrName[] = "lowering.config"; -static const char kTranslationInfoAttrName[] = "translation.info"; - -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.cpp.inc" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfigEnums.cpp.inc" - -namespace mlir { -namespace iree_compiler { - -//===----------------------------------------------------------------------===// -// Helpers for getting/setting information needed to lower an executable. These -// are information that are stored as attributes on the -// `hal.executable.entry_point` -//===----------------------------------------------------------------------===// - -IREE::HAL::TranslationInfo buildTranslationInfo( - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workloadPerWorkgroup, MLIRContext *context) { - OpBuilder builder(context); - auto pipelineAttr = StringAttr::get(context, stringifyEnum(passPipeline)); - ArrayAttr workloadPerWorkgroupAttr = nullptr; - if (!workloadPerWorkgroup.empty()) { - workloadPerWorkgroupAttr = builder.getI64ArrayAttr(workloadPerWorkgroup); - } - return IREE::HAL::TranslationInfo::get(pipelineAttr, workloadPerWorkgroupAttr, - context); -} - -IREE::HAL::TranslationInfo getTranslationInfo( - IREE::HAL::ExecutableEntryPointOp entryPointOp) { - return entryPointOp->getAttrOfType( - kTranslationInfoAttrName); -} - -SmallVector getWorkgroupSize( - IREE::HAL::ExecutableEntryPointOp entryPointOp) { - SmallVector workgroupSize; - if (Optional workgroupSizeAttrList = - entryPointOp.workgroup_size()) { - workgroupSize.resize(workgroupSizeAttrList->size()); - for (auto attr : llvm::enumerate(workgroupSizeAttrList.getValue())) { - workgroupSize[attr.index()] = attr.value().cast().getInt(); - } - } - return workgroupSize; -} - -void setTranslationInfo(IREE::HAL::ExecutableEntryPointOp entryPointOp, - IREE::HAL::TranslationInfo translationInfo, - ArrayRef workgroupSize) { - entryPointOp->setAttr(kTranslationInfoAttrName, translationInfo); - // The workgroup size is set on the entry point op directly. - if (!workgroupSize.empty()) { - MLIRContext *context = entryPointOp->getContext(); - auto indexType = IndexType::get(context); - auto attrs = llvm::to_vector<4>( - llvm::map_range(workgroupSize, [&](int64_t v) -> Attribute { - return IntegerAttr::get(indexType, v); - })); - entryPointOp.workgroup_sizeAttr(ArrayAttr::get(context, attrs)); - } -} - -//===----------------------------------------------------------------------===// -// Helpers for getting/setting the `hal.lowering.*` attributes that drive the -// linalg-based lowering. -// ===----------------------------------------------------------------------===// - -IREE::HAL::LoweringConfig getLoweringConfig(Operation *op) { - return op->getAttrOfType(kConfigAttrName); -} - -void setLoweringConfig(Operation *op, IREE::HAL::LoweringConfig config) { - op->setAttr(kConfigAttrName, config); -} - -void eraseLoweringConfig(Operation *op) { op->removeAttr(kConfigAttrName); } - -//===----------------------------------------------------------------------===// -// Helpers for accessing values from the LoweringConfig attribute. -//===----------------------------------------------------------------------===// - -IREE::HAL::LoweringConfig buildConfigAttr(TileSizesListTypeRef tileSizes, - ArrayRef nativeVectorSize, - MLIRContext *context) { - OpBuilder builder(context); - ArrayAttr tileSizesAttr = nullptr; - if (!tileSizes.empty()) { - auto attrList = llvm::to_vector<4>( - llvm::map_range(tileSizes, [&](ArrayRef sizes) -> Attribute { - return builder.getI64ArrayAttr(sizes); - })); - tileSizesAttr = builder.getArrayAttr(attrList); - } - ArrayAttr nativeVectorSizeAttr = nullptr; - if (!nativeVectorSize.empty()) { - nativeVectorSizeAttr = builder.getI64ArrayAttr(nativeVectorSize); - } - return IREE::HAL::LoweringConfig::get(tileSizesAttr, nativeVectorSizeAttr, - /*passPipeline = */ nullptr, - /*workgroupSize = */ nullptr, context); -} - -TileSizesListType getTileSizes(IREE::HAL::LoweringConfig config) { - auto tileSizesAttr = config.tileSizes(); - if (!tileSizesAttr) return {}; - return llvm::to_vector<1>(llvm::map_range( - tileSizesAttr, [&](Attribute attr) -> SmallVector { - return llvm::to_vector<4>( - llvm::map_range(attr.cast(), [&](Attribute intAttr) { - return intAttr.cast().getInt(); - })); - })); -} - -SmallVector getTileSizes(IREE::HAL::LoweringConfig config, - unsigned level) { - ArrayAttr tileSizesAttr = config.tileSizes(); - if (!tileSizesAttr || tileSizesAttr.size() <= level) return {}; - return llvm::to_vector<4>(llvm::map_range( - tileSizesAttr.getValue()[level].cast(), - [&](Attribute intAttr) { return intAttr.cast().getInt(); })); -} - -SmallVector getTileSizes(OpBuilder &b, Operation *op, - unsigned level) { - return llvm::to_vector<4>( - llvm::map_range(getTileSizes(op, level), [&](int64_t t) -> Value { - return b.create(op->getLoc(), t); - })); -} - -SmallVector getNativeVectorSize(IREE::HAL::LoweringConfig config) { - ArrayAttr nativeVectorSizeAttr = config.nativeVectorSize(); - if (!nativeVectorSizeAttr) return {}; - return llvm::to_vector<4>(llvm::map_range( - nativeVectorSizeAttr, - [&](Attribute intAttr) { return intAttr.cast().getInt(); })); -} - -} // namespace iree_compiler -} // namespace mlir diff --git a/iree/compiler/Dialect/HAL/IR/LoweringConfig.h b/iree/compiler/Dialect/HAL/IR/LoweringConfig.h deleted file mode 100644 index a48d60ae45fb..000000000000 --- a/iree/compiler/Dialect/HAL/IR/LoweringConfig.h +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -//===- LoweringConfig.h - Declares configuration for lowering Linalg ops --===// -// -// This file declares an attribute that drives how a dispatch region containing -// a set of operations are lowered. The attribute itself is attached to Linalg -// operations, and help converting a Linalg operation into "scalar code". -// -//===----------------------------------------------------------------------===// - -#ifndef IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_ -#define IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_ - -#include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/BuiltinAttributes.h" -#include "mlir/IR/BuiltinTypes.h" - -// clang-format off -#include "iree/compiler/Dialect/HAL/IR/LoweringConfigEnums.h.inc" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h.inc" -// clang-format on - -namespace mlir { -namespace iree_compiler { - -namespace IREE { -namespace HAL { - -inline bool operator==(const TranslationInfo &lhs, const TranslationInfo &rhs) { - return lhs.passPipeline() == rhs.passPipeline() && - lhs.workloadPerWorkgroup() == rhs.workloadPerWorkgroup(); -} - -inline bool operator!=(const TranslationInfo &lhs, const TranslationInfo &rhs) { - return !(lhs == rhs); -} - -} // namespace HAL -} // namespace IREE - -//===----------------------------------------------------------------------===// -// Helpers for getting/setting information needed to lower an executable. These -// are information that are stored as attributes on the -// `hal.executable.entry_point` -//===----------------------------------------------------------------------===// - -/// Builder method for IREE::HAL::TranslationInfoAttr. -IREE::HAL::TranslationInfo buildTranslationInfo( - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workloadPerWorkgroup, MLIRContext *context); - -/// Gets the translate executable info attribute value associated with -/// `entryPointOp`. -IREE::HAL::TranslationInfo getTranslationInfo( - IREE::HAL::ExecutableEntryPointOp entryPointOp); - -/// Get the pass pipeline specified in the `translationInfo` -inline Optional -getLoweringPassPipeline(IREE::HAL::TranslationInfo translationInfo) { - return IREE::HAL::symbolizeDispatchLoweringPassPipeline( - translationInfo.passPipeline().getValue()); -} - -/// Returns the workgroup size specified on the `entryPointOp`. -SmallVector getWorkgroupSize( - IREE::HAL::ExecutableEntryPointOp entryPointOp); - -/// Set the translate executable info with the entry point op. Overwrites the -/// existing attributes. -// TODO(ravishankarm, benvanik): Eventually all the information needed for the -// lowering will be consolidated into a single attribute with richer -// information. -void setTranslationInfo(IREE::HAL::ExecutableEntryPointOp entryPointOp, - IREE::HAL::TranslationInfo translationInfo, - ArrayRef workgroupSize = {}); - -//===----------------------------------------------------------------------===// -// Helpers for getting/setting the `hal.lowering.*` attributes that drive the -// linalg-based lowering. -// ===----------------------------------------------------------------------===// - -/// Returns the lowering configuration set for an operation. -IREE::HAL::LoweringConfig getLoweringConfig(Operation *op); - -/// Sets the lowering configuration, overwriting existing attribute values. -void setLoweringConfig(Operation *op, IREE::HAL::LoweringConfig config); - -/// Removes the lowering configuration on the operation if it exists. -void eraseLoweringConfig(Operation *op); - -//===----------------------------------------------------------------------===// -// Helpers for accessing values from the LoweringConfig attribute. -//===----------------------------------------------------------------------===// - -// TODO(ravishankarm): Struct attributes dont have a way of defining extra class -// methods. When they do, these could all be moved into the attribute definition -// itself. - -/// Stores the tile sizes to use at different levels of tiling as a vector of -/// vectors. -/// - First level tiling maps to workgroups. -/// - Second level tiling maps to subgroups. -/// - Third level tiling maps to invocations. -using TileSizesListType = SmallVector, 1>; -using TileSizesListTypeRef = ArrayRef>; - -/// Construct a lowering configuration. -IREE::HAL::LoweringConfig buildConfigAttr(TileSizesListTypeRef tileSizes, - ArrayRef nativeVectorSize, - MLIRContext *context); - -/// Get the tile sizes for all levels. -TileSizesListType getTileSizes(IREE::HAL::LoweringConfig config); - -/// Get the tile sizes for all levels for an operation if the lowering -/// configuration is set. -inline TileSizesListType getTileSizes(Operation *op) { - auto configAttr = getLoweringConfig(op); - if (!configAttr) return {}; - return getTileSizes(configAttr); -} - -/// Get the tile sizes for level `level`, if it is defined. Returns {} if tile -/// sizes are not set for that level. -SmallVector getTileSizes(IREE::HAL::LoweringConfig config, - unsigned level); - -/// Get the tile sizes for level `level` for an operation if the lowering -/// configuration for the operation is set, and tile sizes are defined for that -/// level. -inline SmallVector getTileSizes(Operation *op, unsigned level) { - auto configAttr = getLoweringConfig(op); - if (!configAttr) return {}; - return getTileSizes(configAttr, level); -} -SmallVector getTileSizes(OpBuilder &b, Operation *op, unsigned level); - -/// Gets the native vector size defined in the lowering configuration. -SmallVector getNativeVectorSize(IREE::HAL::LoweringConfig config); - -/// Gets the native vector size defined for lowering an operation, if the -/// lowering configuration is defined. If not returns empty vector. -inline SmallVector getNativeVectorSize(Operation *op) { - auto configAttr = getLoweringConfig(op); - if (!configAttr) return {}; - return getNativeVectorSize(configAttr); -} - -/// Get the pass pipeline specified in the `loweringConfig` -inline Optional -getLoweringPassPipeline(IREE::HAL::LoweringConfig config) { - return IREE::HAL::symbolizeDispatchLoweringPassPipeline( - config.passPipeline().getValue()); -} - -} // namespace iree_compiler -} // namespace mlir -#endif // IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_ diff --git a/iree/compiler/Dialect/HAL/IR/LoweringConfig.td b/iree/compiler/Dialect/HAL/IR/LoweringConfig.td deleted file mode 100644 index 9e520ceb907e..000000000000 --- a/iree/compiler/Dialect/HAL/IR/LoweringConfig.td +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_COMPILER_DIALECT_HAL_IR_LOWERINGCONFIG -#define IREE_COMPILER_DIALECT_HAL_IR_LOWERINGCONFIG - -// Putting this in HAL dialect for now. -include "iree/compiler/Dialect/HAL/IR/HALDialect.td" - -// List of pre-existing pipelines for translating executables. -def CPU_Default - : StrEnumAttrCase<"CPUDefault">; -def CPU_Vectorization - : StrEnumAttrCase<"CPUVectorization">; -def CPU_TensorToVectors - : StrEnumAttrCase<"CPUTensorToVectors">; - -def LLVMGPU_SimpleDistribute - : StrEnumAttrCase<"LLVMGPUDistribute">; -def LLVMGPU_Vectorize - : StrEnumAttrCase<"LLVMGPUVectorize">; -def LLVMGPU_MatmulSimt - : StrEnumAttrCase<"LLVMGPUMatmulSimt">; - -def SPIRV_SimpleDistribute - : StrEnumAttrCase<"SPIRVDistribute">; -def SPIRV_DistributeToGlobalID - : StrEnumAttrCase<"SPIRVDistributeToGlobalID">; -def SPIRV_Vectorize - : StrEnumAttrCase<"SPIRVVectorize">; -def SPIRV_VectorizeToCooperativeOps - : StrEnumAttrCase<"SPIRVVectorizeToCooperativeOps">; -def None - : StrEnumAttrCase<"None">; - -// EnumAttrCase for all known lowerings for ops within dispatch region -// to scalar/native-vector code. -def DispatchLoweringPassPipelineEnum : StrEnumAttr< - "DispatchLoweringPassPipeline", - "identifier for pass pipeline use to lower dispatch region", - [CPU_Default, CPU_TensorToVectors, CPU_Vectorization, - LLVMGPU_SimpleDistribute, LLVMGPU_Vectorize, LLVMGPU_MatmulSimt, - SPIRV_SimpleDistribute, SPIRV_DistributeToGlobalID, - SPIRV_Vectorize, SPIRV_VectorizeToCooperativeOps, - None]> { - let cppNamespace = "::mlir::iree_compiler::IREE::HAL"; -} - -def TileSizesListAttr : - TypedArrayAttrBase { } - -// Attribute that captures information needed for translating the executables. -def TranslationInfoAttr : - StructAttr<"TranslationInfo", HAL_Dialect, [ - StructFieldAttr<"passPipeline", DispatchLoweringPassPipelineEnum>, - StructFieldAttr<"workloadPerWorkgroup", - DefaultValuedAttr>, - ]>; - -// Attribute that carries information needed to perform -// tiling/vectorization, etc. -def HAL_LoweringConfigAttr : - StructAttr<"LoweringConfig", HAL_Dialect, [ - StructFieldAttr<"tileSizes", - DefaultValuedAttr>, - StructFieldAttr<"nativeVectorSize", - DefaultValuedAttr>, - StructFieldAttr<"passPipeline", - DefaultValuedAttr< - DispatchLoweringPassPipelineEnum, - "\"IREE::HAL::DispatchLoweringPassPipeline::None\"">>, - StructFieldAttr<"workgroupSize", - DefaultValuedAttr> - ]>; - -#endif // IREE_COMPILER_DIALECT_HAL_IR_LOWERINGCONFIG diff --git a/iree/compiler/Dialect/HAL/Target/CUDA/BUILD b/iree/compiler/Dialect/HAL/Target/CUDA/BUILD index 0be0ecd2ddca..bf8438203934 100644 --- a/iree/compiler/Dialect/HAL/Target/CUDA/BUILD +++ b/iree/compiler/Dialect/HAL/Target/CUDA/BUILD @@ -42,6 +42,7 @@ cc_library( deps = [ ":cuda_libdevice", "//iree/compiler/Codegen:PassHeaders", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/LLVMGPU", "//iree/compiler/Dialect/HAL/Target", "//iree/compiler/Utils", diff --git a/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp b/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp index 51210f0fc772..4974fa9dc437 100644 --- a/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp @@ -6,6 +6,7 @@ #include "iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.h" +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Dialect/HAL/Target/CUDA/LLVMPasses.h" #include "iree/compiler/Dialect/HAL/Target/CUDA/libdevice.h" @@ -150,7 +151,7 @@ class CUDATargetBackend final : public TargetBackend { std::string name() const override { return "cuda"; } void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); mlir::registerLLVMDialectTranslation(registry); mlir::registerNVVMDialectTranslation(registry); } diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/BUILD b/iree/compiler/Dialect/HAL/Target/LLVM/BUILD index 398b138f93be..710eb0fb2f48 100644 --- a/iree/compiler/Dialect/HAL/Target/LLVM/BUILD +++ b/iree/compiler/Dialect/HAL/Target/LLVM/BUILD @@ -37,6 +37,7 @@ cc_library( ":StaticLibraryGenerator", "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/LLVMCPU", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/HAL/Target", diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt index 7c24113f9522..f6ea0e49115d 100644 --- a/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt +++ b/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt @@ -46,6 +46,7 @@ iree_cc_library( MLIRLLVMToLLVMIRTranslation MLIRTargetLLVMIRExport iree::compiler::Codegen::Common + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::LLVMCPU iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::Utils diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp index 4cf5cd7887d4..5275258ce1eb 100644 --- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp @@ -8,6 +8,7 @@ #include +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h" #include "iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.h" @@ -110,6 +111,7 @@ class LLVMAOTTargetBackend final : public TargetBackend { void getDependentDialects(DialectRegistry ®istry) const override { mlir::registerLLVMDialectTranslation(registry); + registry.insert(); } IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget( diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp index 87987a8ec214..cdc1b2111cbf 100644 --- a/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp +++ b/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp @@ -62,6 +62,9 @@ class UnixLinkerTool : public LinkerTool { // Produce a Mach-O dylib file. flags.push_back("-dylib"); flags.push_back("-flat_namespace"); + flags.push_back( + "-L /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib " + "-lSystem"); // HACK: we insert libm calls. This is *not good*. // Until the MLIR LLVM lowering paths no longer introduce these, diff --git a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/BUILD b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/BUILD index 01f3323acdd7..6fd21ab87791 100644 --- a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/BUILD +++ b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/BUILD @@ -28,6 +28,7 @@ cc_library( ":SPIRVToMSL", "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/SPIRV", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/HAL/Target", diff --git a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/CMakeLists.txt index d5fecf0b9e2e..d01431a31b30 100644 --- a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/CMakeLists.txt +++ b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/CMakeLists.txt @@ -30,6 +30,7 @@ iree_cc_library( MLIRSPIRVSerialization MLIRVector iree::compiler::Codegen::Common + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::SPIRV iree::compiler::Codegen::Utils diff --git a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp index ed74c7079c0f..6d8ec736a34e 100644 --- a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp @@ -6,6 +6,7 @@ #include "iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.h" +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Dialect/HAL/Target/MetalSPIRV/SPIRVToMSL.h" #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h" @@ -44,7 +45,8 @@ class MetalSPIRVTargetBackend : public TargetBackend { std::string name() const override { return "metal"; } void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); } IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget( diff --git a/iree/compiler/Dialect/HAL/Target/ROCM/BUILD b/iree/compiler/Dialect/HAL/Target/ROCM/BUILD index 67ffe56895b1..6c1f412886bf 100644 --- a/iree/compiler/Dialect/HAL/Target/ROCM/BUILD +++ b/iree/compiler/Dialect/HAL/Target/ROCM/BUILD @@ -31,6 +31,7 @@ cc_library( ], deps = [ "//iree/compiler/Codegen:PassHeaders", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/LLVMGPU", "//iree/compiler/Dialect/HAL/Target", "//iree/compiler/Utils", diff --git a/iree/compiler/Dialect/HAL/Target/ROCM/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/ROCM/CMakeLists.txt index a57a8b868a93..041ca473dbbc 100644 --- a/iree/compiler/Dialect/HAL/Target/ROCM/CMakeLists.txt +++ b/iree/compiler/Dialect/HAL/Target/ROCM/CMakeLists.txt @@ -38,6 +38,7 @@ iree_cc_library( MLIRROCDLToLLVMIRTranslation MLIRSupport MLIRTargetLLVMIRExport + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::LLVMGPU iree::compiler::Codegen::PassHeaders iree::compiler::Dialect::HAL::Target diff --git a/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp b/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp index 08f26434316d..90aaeb32f4c2 100644 --- a/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp @@ -8,6 +8,7 @@ #include +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h" #include "iree/compiler/Utils/FlatbufferUtils.h" @@ -77,13 +78,13 @@ class ROCMTargetBackend final : public TargetBackend { void getDependentDialects(DialectRegistry ®istry) const override { mlir::registerLLVMDialectTranslation(registry); mlir::registerROCDLDialectTranslation(registry); + registry.insert(); } IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget( MLIRContext *context) const override { Builder b(context); SmallVector configItems; - ; configItems.emplace_back(b.getIdentifier("executable_targets"), getExecutableTargets(context)); diff --git a/iree/compiler/Dialect/HAL/Target/VMVX/BUILD b/iree/compiler/Dialect/HAL/Target/VMVX/BUILD index fdf19ce140c6..4582466745b0 100644 --- a/iree/compiler/Dialect/HAL/Target/VMVX/BUILD +++ b/iree/compiler/Dialect/HAL/Target/VMVX/BUILD @@ -30,6 +30,7 @@ cc_library( ], deps = [ "//iree/compiler/Codegen:PassHeaders", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Dialect/Flow/IR", "//iree/compiler/Dialect/HAL/Target", "//iree/compiler/Dialect/Modules/VMVX/IR:VMVXDialect", diff --git a/iree/compiler/Dialect/HAL/Target/VMVX/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/VMVX/CMakeLists.txt index f21e2c09ae4e..757b3fd2bad6 100644 --- a/iree/compiler/Dialect/HAL/Target/VMVX/CMakeLists.txt +++ b/iree/compiler/Dialect/HAL/Target/VMVX/CMakeLists.txt @@ -26,6 +26,7 @@ iree_cc_library( MLIRIR MLIRPass MLIRSupport + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Dialect::Flow::IR iree::compiler::Dialect::HAL::Target diff --git a/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp b/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp index 9206b7541607..8b3fa5274e0d 100644 --- a/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp @@ -6,6 +6,7 @@ #include "iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.h" +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Dialect/Flow/IR/FlowOps.h" #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h" #include "iree/compiler/Dialect/Modules/VMVX/IR/VMVXDialect.h" @@ -35,7 +36,8 @@ class VMVXTargetBackend final : public TargetBackend { std::string name() const override { return "vmvx"; } void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); } IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget( diff --git a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/BUILD b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/BUILD index 0345228d8572..2d398cc93b0b 100644 --- a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/BUILD +++ b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/BUILD @@ -31,6 +31,7 @@ cc_library( deps = [ "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/SPIRV", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/Flow/IR", diff --git a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/CMakeLists.txt index d7f45902413d..d653bfe7c1a9 100644 --- a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/CMakeLists.txt +++ b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/CMakeLists.txt @@ -34,6 +34,7 @@ iree_cc_library( MLIRSupport MLIRVector iree::compiler::Codegen::Common + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::SPIRV iree::compiler::Codegen::Utils diff --git a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp index 3afdcba1a76a..9af89bc04ae4 100644 --- a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp @@ -6,6 +6,7 @@ #include "iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.h" +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Dialect/Flow/IR/FlowOps.h" #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h" @@ -119,8 +120,8 @@ class VulkanSPIRVTargetBackend : public TargetBackend { std::string name() const override { return "vulkan"; } void getDependentDialects(DialectRegistry ®istry) const override { - registry - .insert(); + registry.insert(); } IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget( diff --git a/iree/compiler/Dialect/Shape/IR/Builders.cpp b/iree/compiler/Dialect/Shape/IR/Builders.cpp index c4557ee26aa8..8a56f16664b8 100644 --- a/iree/compiler/Dialect/Shape/IR/Builders.cpp +++ b/iree/compiler/Dialect/Shape/IR/Builders.cpp @@ -141,8 +141,8 @@ SmallVector buildOrFindDynamicDimsForValue(Location loc, Value value, // This is the first step on the path: we are going to gradually start // removing the implementation of the ShapeCarryingInterface on ops and use // the new ShapeAwareOpInterface. - auto dynamicDims = - IREE::Util::findDynamicDims(value, &*builder.getInsertionPoint()); + auto dynamicDims = IREE::Util::findDynamicDims(value, builder.getBlock(), + builder.getInsertionPoint()); if (dynamicDims.hasValue()) { return llvm::to_vector<4>(dynamicDims.getValue()); } diff --git a/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp b/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp index b77c2ef2029e..84a19314f86d 100644 --- a/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp +++ b/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp @@ -306,7 +306,9 @@ void ResourceDeallocaOp::getCanonicalizationPatterns( OpFoldResult ResourceSizeOp::fold(ArrayRef operands) { auto sizeAwareType = operand().getType().cast(); - return sizeAwareType.findSizeValue(operand(), *this); + Operation *op = this->getOperation(); + return sizeAwareType.findSizeValue(operand(), op->getBlock(), + Block::iterator(op)); } //===----------------------------------------------------------------------===// diff --git a/iree/compiler/Dialect/Util/IR/UtilInterfaces.td b/iree/compiler/Dialect/Util/IR/UtilInterfaces.td index e092f535b99c..fec6cd44418a 100644 --- a/iree/compiler/Dialect/Util/IR/UtilInterfaces.td +++ b/iree/compiler/Dialect/Util/IR/UtilInterfaces.td @@ -367,8 +367,9 @@ def Util_SizeAwareType : TypeInterface<"SizeAwareTypeInterface"> { let extraClassDeclaration = [{ // Walks the SSA use-def chain to find the size of the type. // Returns nullptr if the size cannot be found or if it is defined after - // |forOp|. - static Value findSizeValue(Value resourceValue, Operation *forOp); + // {|block|, |insertionPoint|}. + static Value findSizeValue(Value resourceValue, Block *block, + Block::iterator insertionPoint); // Returns an SSA value representing the byte size of |value| or nullptr // if not a sized value. diff --git a/iree/compiler/Dialect/Util/IR/UtilTypes.cpp b/iree/compiler/Dialect/Util/IR/UtilTypes.cpp index f1edb9240944..20d934649aab 100644 --- a/iree/compiler/Dialect/Util/IR/UtilTypes.cpp +++ b/iree/compiler/Dialect/Util/IR/UtilTypes.cpp @@ -294,34 +294,38 @@ void excludeTiedOperandAndResultIndices( // IREE::Util::SizeAwareTypeInterface //===----------------------------------------------------------------------===// -static bool isValueUsableForOp(Value value, Operation *forOp) { - if (forOp->getBlock() == nullptr) { +static bool isValueUsableForOp(Value value, Block *block, + Block::iterator insertionPoint) { + if (block == nullptr) { // Op is not in a block; can't analyze (maybe?). return false; } auto *definingBlock = value.getParentBlock(); - if (definingBlock == forOp->getBlock()) { + if (definingBlock == block) { // Defined in the same block; ensure block order. if (value.isa()) return true; - if (value.getDefiningOp()->isBeforeInBlock(forOp)) return true; + if (insertionPoint == block->end()) return true; + if (value.getDefiningOp()->isBeforeInBlock(&*insertionPoint)) { + return true; + } } else if (definingBlock->isEntryBlock()) { // Entry block always dominates - fast path for constants. return true; } else { // See if block the value is defined in dominates the forOp block. // TODO(benvanik): optimize this, it's terribly expensive to recompute. - DominanceInfo dominanceInfo(forOp->getParentOp()); - return dominanceInfo.dominates(definingBlock, forOp->getBlock()); + DominanceInfo dominanceInfo(block->getParentOp()); + return dominanceInfo.dominates(definingBlock, block); } return false; } // static -Value SizeAwareTypeInterface::findSizeValue(Value resourceValue, - Operation *forOp) { +Value SizeAwareTypeInterface::findSizeValue(Value resourceValue, Block *block, + Block::iterator insertionPoint) { // See if the value is produced by a size-aware op; we can just ask for the // size it has tied. Walking upward is always good as we know any size we find - // dominates |forOp|. + // dominates {|block|, |insertionPoint|}. SmallVector worklist; worklist.push_back(resourceValue); while (!worklist.empty()) { @@ -347,7 +351,8 @@ Value SizeAwareTypeInterface::findSizeValue(Value resourceValue, use.getOwner())) { auto sizeValue = sizeAwareOp.getOperandSize(use.getOperandNumber()); if (sizeValue) { - if (isValueUsableForOp(sizeValue, forOp)) return sizeValue; + if (isValueUsableForOp(sizeValue, block, insertionPoint)) + return sizeValue; } } if (auto tiedOp = @@ -369,8 +374,8 @@ Value SizeAwareTypeInterface::queryValueSize(Location loc, Value resourceValue, return {}; // Not a sized type. } if (!builder.getInsertionPoint().getNodePtr()->isKnownSentinel()) { - Operation &insertionPt = *builder.getInsertionPoint(); - auto sizeValue = sizeAwareType.findSizeValue(resourceValue, &insertionPt); + auto sizeValue = sizeAwareType.findSizeValue( + resourceValue, builder.getBlock(), builder.getInsertionPoint()); if (sizeValue) { return sizeValue; // Found in IR. } @@ -414,9 +419,10 @@ ValueRange findVariadicDynamicDims(unsigned idx, ValueRange values, return dynamicDims.slice(offset, shapedType.getNumDynamicDims()); } -Optional findDynamicDims(Value shapedValue, Operation *forOp) { +Optional findDynamicDims(Value shapedValue, Block *block, + Block::iterator insertionPoint) { // Look up the use-def chain: always safe, as any value we reach dominates - // |forOp| implicitly. + // {|block|, |insertionPoint|} implicitly. SmallVector worklist; worklist.push_back(shapedValue); while (!worklist.empty()) { @@ -432,16 +438,16 @@ Optional findDynamicDims(Value shapedValue, Operation *forOp) { } } - // Look down the use-def chain: not safe at some point because we'll move - // past where |forOp| is dominated. This is often fine for a bit, though, as - // |forOp| may be a user of |shapedValue| and be able to provide the shape - // itself. + // Look down the use-def chain: not safe at some point because we'll move past + // where {|block|, |insertionPoint|} is dominated. This is often fine for a + // bit, though, as {|block|, |insertionPoint|} may be a user of |shapedValue| + // and be able to provide the shape itself. for (auto &use : shapedValue.getUses()) { if (auto shapeAwareOp = dyn_cast(use.getOwner())) { auto dynamicDims = shapeAwareOp.getOperandDynamicDims(use.getOperandNumber()); if (llvm::all_of(dynamicDims, [&](Value dim) { - return isValueUsableForOp(dim, forOp); + return isValueUsableForOp(dim, block, insertionPoint); })) { return dynamicDims; } diff --git a/iree/compiler/Dialect/Util/IR/UtilTypes.h b/iree/compiler/Dialect/Util/IR/UtilTypes.h index aec114aacded..03faf9c03690 100644 --- a/iree/compiler/Dialect/Util/IR/UtilTypes.h +++ b/iree/compiler/Dialect/Util/IR/UtilTypes.h @@ -171,8 +171,9 @@ void excludeTiedOperandAndResultIndices( // Walks the SSA use-def chain to find the dynamic dimensions of the value. // Returns None if the shape cannot be found or if it is defined after -// |forOp|. -Optional findDynamicDims(Value shapedValue, Operation *forOp); +// {|block|, |insertionPoint|}. +Optional findDynamicDims(Value shapedValue, Block *block, + Block::iterator insertionPoint); // Returns the dynamic dimensions for the value at |idx|. ValueRange findVariadicDynamicDims(unsigned idx, ValueRange values, diff --git a/iree/compiler/Utils/FlatbufferUtils.cpp b/iree/compiler/Utils/FlatbufferUtils.cpp index febbde9bdda3..3a00885c0b13 100644 --- a/iree/compiler/Utils/FlatbufferUtils.cpp +++ b/iree/compiler/Utils/FlatbufferUtils.cpp @@ -32,6 +32,7 @@ static SmallVector cloneBufferIntoContiguousBytes( void *result = flatcc_builder_copy_buffer(fbb, packedData.data(), packedData.size()); assert(result && "flatcc_emitter_t impl failed (non-default?)"); + (void)result; return packedData; } diff --git a/iree/hal/cuda/status_util.c b/iree/hal/cuda/status_util.c index b6a1b9480cd3..7532ecd22c71 100644 --- a/iree/hal/cuda/status_util.c +++ b/iree/hal/cuda/status_util.c @@ -26,7 +26,7 @@ iree_status_t iree_hal_cuda_result_to_status( if (syms->cuGetErrorString(result, &error_string) != CUDA_SUCCESS) { error_string = "Unknown error."; } - return iree_make_status(IREE_STATUS_INTERNAL, - "CUDA driver error '%s' (%d): %s", error_name, result, - error_string); + return iree_make_status_with_location(file, line, IREE_STATUS_INTERNAL, + "CUDA driver error '%s' (%d): %s", + error_name, result, error_string); } diff --git a/iree/hal/vulkan/status_util.c b/iree/hal/vulkan/status_util.c index 705f299ec213..e61008c44900 100644 --- a/iree/hal/vulkan/status_util.c +++ b/iree/hal/vulkan/status_util.c @@ -37,17 +37,19 @@ iree_status_t iree_hal_vulkan_result_to_status(VkResult result, // Error codes. case VK_ERROR_OUT_OF_HOST_MEMORY: // A host memory allocation has failed. - return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, - "VK_ERROR_OUT_OF_HOST_MEMORY"); + return iree_make_status_with_location(file, line, + IREE_STATUS_RESOURCE_EXHAUSTED, + "VK_ERROR_OUT_OF_HOST_MEMORY"); case VK_ERROR_OUT_OF_DEVICE_MEMORY: // A device memory allocation has failed. - return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, - "VK_ERROR_OUT_OF_DEVICE_MEMORY"); + return iree_make_status_with_location(file, line, + IREE_STATUS_RESOURCE_EXHAUSTED, + "VK_ERROR_OUT_OF_DEVICE_MEMORY"); case VK_ERROR_INITIALIZATION_FAILED: // Initialization of an object could not be completed for // implementation-specific reasons. - return iree_make_status(IREE_STATUS_UNAVAILABLE, - "VK_ERROR_INITIALIZATION_FAILED"); + return iree_make_status_with_location(file, line, IREE_STATUS_UNAVAILABLE, + "VK_ERROR_INITIALIZATION_FAILED"); case VK_ERROR_DEVICE_LOST: // The logical or physical device has been lost. // @@ -116,87 +118,101 @@ iree_status_t iree_hal_vulkan_result_to_status(VkResult result, // command buffer is in the pending state, or whether resources are // considered in-use by the device, a return value of // VK_ERROR_DEVICE_LOST is equivalent to VK_SUCCESS. - return iree_make_status(IREE_STATUS_INTERNAL, "VK_ERROR_DEVICE_LOST"); + return iree_make_status_with_location(file, line, IREE_STATUS_INTERNAL, + "VK_ERROR_DEVICE_LOST"); case VK_ERROR_MEMORY_MAP_FAILED: // Mapping of a memory object has failed. - return iree_make_status(IREE_STATUS_INTERNAL, - "VK_ERROR_MEMORY_MAP_FAILED"); + return iree_make_status_with_location(file, line, IREE_STATUS_INTERNAL, + "VK_ERROR_MEMORY_MAP_FAILED"); case VK_ERROR_LAYER_NOT_PRESENT: // A requested layer is not present or could not be loaded. - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "VK_ERROR_LAYER_NOT_PRESENT"); + return iree_make_status_with_location( + file, line, IREE_STATUS_UNIMPLEMENTED, "VK_ERROR_LAYER_NOT_PRESENT"); case VK_ERROR_EXTENSION_NOT_PRESENT: // A requested extension is not supported. - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "VK_ERROR_EXTENSION_NOT_PRESENT"); + return iree_make_status_with_location(file, line, + IREE_STATUS_UNIMPLEMENTED, + "VK_ERROR_EXTENSION_NOT_PRESENT"); case VK_ERROR_FEATURE_NOT_PRESENT: // A requested feature is not supported. - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "VK_ERROR_FEATURE_NOT_PRESENT"); + return iree_make_status_with_location(file, line, + IREE_STATUS_UNIMPLEMENTED, + "VK_ERROR_FEATURE_NOT_PRESENT"); case VK_ERROR_INCOMPATIBLE_DRIVER: // The requested version of Vulkan is not supported by the driver or is // otherwise incompatible for implementation-specific reasons. - return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, - "VK_ERROR_INCOMPATIBLE_DRIVER"); + return iree_make_status_with_location(file, line, + IREE_STATUS_FAILED_PRECONDITION, + "VK_ERROR_INCOMPATIBLE_DRIVER"); case VK_ERROR_TOO_MANY_OBJECTS: // Too many objects of the type have already been created. - return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, - "VK_ERROR_TOO_MANY_OBJECTS"); + return iree_make_status_with_location(file, line, + IREE_STATUS_RESOURCE_EXHAUSTED, + "VK_ERROR_TOO_MANY_OBJECTS"); case VK_ERROR_FORMAT_NOT_SUPPORTED: // A requested format is not supported on this device. - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "VK_ERROR_FORMAT_NOT_SUPPORTED"); + return iree_make_status_with_location(file, line, + IREE_STATUS_UNIMPLEMENTED, + "VK_ERROR_FORMAT_NOT_SUPPORTED"); case VK_ERROR_FRAGMENTED_POOL: // A pool allocation has failed due to fragmentation of the pool’s // memory. This must only be returned if no attempt to allocate host // or device memory was made to accommodate the new allocation. - return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, - "VK_ERROR_FRAGMENTED_POOL"); + return iree_make_status_with_location(file, line, + IREE_STATUS_RESOURCE_EXHAUSTED, + "VK_ERROR_FRAGMENTED_POOL"); case VK_ERROR_OUT_OF_POOL_MEMORY: // A pool memory allocation has failed. This must only be returned if no // attempt to allocate host or device memory was made to accommodate the // new allocation. If the failure was definitely due to fragmentation of // the pool, VK_ERROR_FRAGMENTED_POOL should be returned instead. - return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, - "VK_ERROR_OUT_OF_POOL_MEMORY"); + return iree_make_status_with_location(file, line, + IREE_STATUS_RESOURCE_EXHAUSTED, + "VK_ERROR_OUT_OF_POOL_MEMORY"); case VK_ERROR_INVALID_EXTERNAL_HANDLE: // An external handle is not a valid handle of the specified type. - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "VK_ERROR_INVALID_EXTERNAL_HANDLE"); + return iree_make_status_with_location(file, line, + IREE_STATUS_INVALID_ARGUMENT, + "VK_ERROR_INVALID_EXTERNAL_HANDLE"); case VK_ERROR_SURFACE_LOST_KHR: // A surface is no longer available. - return iree_make_status(IREE_STATUS_UNAVAILABLE, - "VK_ERROR_SURFACE_LOST_KHR"); + return iree_make_status_with_location(file, line, IREE_STATUS_UNAVAILABLE, + "VK_ERROR_SURFACE_LOST_KHR"); case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR: // The requested window is already in use by Vulkan or another API in a // manner which prevents it from being used again. - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR"); + return iree_make_status_with_location( + file, line, IREE_STATUS_INVALID_ARGUMENT, + "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR"); case VK_ERROR_OUT_OF_DATE_KHR: // A surface has changed in such a way that it is no longer compatible // with the swapchain, and further presentation requests using the // swapchain will fail. Applications must query the new surface properties // and recreate their swapchain if they wish to continue presenting to the // surface. - return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, - "VK_ERROR_OUT_OF_DATE_KHR"); + return iree_make_status_with_location(file, line, + IREE_STATUS_FAILED_PRECONDITION, + "VK_ERROR_OUT_OF_DATE_KHR"); case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR: // The display used by a swapchain does not use the same presentable image // layout, or is incompatible in a way that prevents sharing an image. - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR"); + return iree_make_status_with_location( + file, line, IREE_STATUS_INVALID_ARGUMENT, + "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR"); case VK_ERROR_VALIDATION_FAILED_EXT: // Validation layer testing failed. It is not expected that an // application would see this this error code during normal use of the // validation layers. - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "VK_ERROR_VALIDATION_FAILED_EXT"); + return iree_make_status_with_location(file, line, + IREE_STATUS_INVALID_ARGUMENT, + "VK_ERROR_VALIDATION_FAILED_EXT"); case VK_ERROR_INVALID_SHADER_NV: // One or more shaders failed to compile or link. More details are // reported back to the application when the validation layer is enabled // using the extension VK_EXT_debug_report. - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "VK_ERROR_INVALID_SHADER_NV"); + return iree_make_status_with_location(file, line, + IREE_STATUS_INVALID_ARGUMENT, + "VK_ERROR_INVALID_SHADER_NV"); case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT: // When creating an image with // VkImageDrmFormatModifierExplicitCreateInfoEXT, it is the application’s @@ -208,33 +224,37 @@ iree_status_t iree_hal_vulkan_result_to_status(VkResult result, // outside the scope of Vulkan, and therefore not described by Valid Usage // requirements). If this validation fails, then vkCreateImage returns // VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT. - return iree_make_status( - IREE_STATUS_INVALID_ARGUMENT, + return iree_make_status_with_location( + file, line, IREE_STATUS_INVALID_ARGUMENT, "VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT"); case VK_ERROR_FRAGMENTATION_EXT: // A descriptor pool creation has failed due to fragmentation. - return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, - "VK_ERROR_FRAGMENTATION_EXT"); + return iree_make_status_with_location(file, line, + IREE_STATUS_RESOURCE_EXHAUSTED, + "VK_ERROR_FRAGMENTATION_EXT"); case VK_ERROR_NOT_PERMITTED_EXT: // When creating a queue, the caller does not have sufficient privileges // to request to acquire a priority above the default priority // (VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT). - return iree_make_status(IREE_STATUS_PERMISSION_DENIED, - "VK_ERROR_NOT_PERMITTED_EXT"); + return iree_make_status_with_location(file, line, + IREE_STATUS_PERMISSION_DENIED, + "VK_ERROR_NOT_PERMITTED_EXT"); case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT: // A buffer creation failed because the requested address is not // available. - return iree_make_status(IREE_STATUS_OUT_OF_RANGE, - "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT"); + return iree_make_status_with_location( + file, line, IREE_STATUS_OUT_OF_RANGE, + "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT"); case VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT: // An operation on a swapchain created with // VK_FULL_SCREEN_EXCLUSIVE_APPLICATION_CONTROLLED_EXT failed as it did // not have exlusive full-screen access. This may occur due to // implementation-dependent reasons, outside of the application’s control. - return iree_make_status(IREE_STATUS_UNAVAILABLE, - "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"); + return iree_make_status_with_location( + file, line, IREE_STATUS_UNAVAILABLE, + "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"); default: - return iree_make_status(IREE_STATUS_UNKNOWN, "VkResult=%u", - (uint32_t)result); + return iree_make_status_with_location(file, line, IREE_STATUS_UNKNOWN, + "VkResult=%u", (uint32_t)result); } } diff --git a/iree/test/e2e/regression/BUILD b/iree/test/e2e/regression/BUILD index 77e48c3a3cda..71f6df50d603 100644 --- a/iree/test/e2e/regression/BUILD +++ b/iree/test/e2e/regression/BUILD @@ -131,7 +131,6 @@ iree_check_single_backend_test_suite( ], opt_flags = [ "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=%d N0=8" % (4 if lhs_rhs_type == "i8" else 1), - "--iree-codegen-vectorize-linalg-mmt4d", ], target_backends_and_drivers = [ ("dylib-llvm-aot", "dylib"), @@ -152,7 +151,6 @@ iree_check_single_backend_test_suite( ], opt_flags = [ "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=%d N0=8" % (4 if lhs_rhs_type == "i8" else 1), - "--iree-codegen-vectorize-linalg-mmt4d", ], target_backends_and_drivers = [ ("dylib-llvm-aot", "dylib"), diff --git a/iree/test/e2e/regression/CMakeLists.txt b/iree/test/e2e/regression/CMakeLists.txt index 795a8747bb66..309f907987df 100644 --- a/iree/test/e2e/regression/CMakeLists.txt +++ b/iree/test/e2e/regression/CMakeLists.txt @@ -176,7 +176,6 @@ iree_generated_trace_runner_test( "vmvx" OPT_FLAGS "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=4 N0=8" - "--iree-codegen-vectorize-linalg-mmt4d" ) iree_generated_trace_runner_test( @@ -197,7 +196,6 @@ iree_generated_trace_runner_test( "vmvx" OPT_FLAGS "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=1 N0=8" - "--iree-codegen-vectorize-linalg-mmt4d" ) iree_generated_trace_runner_test( @@ -216,7 +214,6 @@ iree_generated_trace_runner_test( "dylib" OPT_FLAGS "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=4 N0=8" - "--iree-codegen-vectorize-linalg-mmt4d" ) iree_generated_trace_runner_test( @@ -235,7 +232,6 @@ iree_generated_trace_runner_test( "dylib" OPT_FLAGS "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=1 N0=8" - "--iree-codegen-vectorize-linalg-mmt4d" ) ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/iree/test/e2e/regression/generate_e2e_matmul_tests.py b/iree/test/e2e/regression/generate_e2e_matmul_tests.py index 26aa509ed6ef..d37d9f1a1bd5 100644 --- a/iree/test/e2e/regression/generate_e2e_matmul_tests.py +++ b/iree/test/e2e/regression/generate_e2e_matmul_tests.py @@ -4,118 +4,201 @@ # Licensed under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -"""iree_generated_check_test generator for end-to-end matrix multiplication. +"""iree_generated_trace_runner_test generator for e2e matmul tests. """ import argparse -import random import os import yaml import re - - -# Returns lists of shapes as (M, K, N) tuples. -# For example (M, K, 1) is a matrix*vector product, and (M, 1, N) is an outer -# product. -def get_test_shapes(): - return { - "small": [ # Small sizes, square matrices - (x, x, x) for x in range(1, 40) - ] + [ - # Small sizes, slightly rectangular matrices - (2, 3, 4), - (8, 7, 6), - (15, 16, 17), - (14, 19, 23), - (31, 33, 32), - (25, 41, 35), - # Small sizes, involving vectors (i.e. most rectangular cases) - (10, 1, 1), - (1, 10, 1), - (1, 1, 10), - (1, 10, 10), - (10, 1, 10), - (10, 10, 1), - # Small sizes, involving other very small dimensions just above 1 - (13, 14, 2), - (3, 17, 12), - (21, 4, 18), - # Medium sizes, square matrices - (100, 100, 100), - # Medium sizes, slightly rectangular matrices - (101, 102, 103), - # Medium sizes, involving vectors (i.e. most rectangular cases) - (10000, 1, 1), - (1, 10000, 1), - (1, 1, 10000), - (1, 1000, 1000), - (1000, 1, 1000), - (1000, 1000, 1), - # Medium sizes, involving other very small dimensions just above 1 - (1300, 1300, 2), - (1300, 1300, 3), - (1300, 1300, 4), - ], - "large": [ - # Large sizes, powers of two - (256, 256, 512), - (512, 512, 128), - (1024, 512, 512), - (512, 1024, 512), - # Large sizes, powers of two minus one - (127, 63, 511), - # Large sizes, powers of two plus one - (129, 65, 513), - # Large sizes, misc. - (200, 300, 400), - (123, 456, 789), - (500, 500, 50), - # Be conservative in adding larger shapes. They can result in - # high latency tests. If you have to, consider splitting them - # out in a way that constrains the latency impact, e.g. by - # running on fewer backends/drivers or with fewer generators - # (see get_test_generators). - ] - } - - -# Returns lists of 'generators', which are tuples of the form -# (lhs_generator, rhs_generator, acc_generator, dynamicity) -# The first 3 entries specify how to generate test input data. -# The dynamicity entry chooses between static, dynamic or mixed shapes. -# -# TODO (Issue #7431): turn into enum and dataclass. -def get_test_generators(): - return { - "small": [ - # Generators using simple matrices for ease of numerical debugging. - # They don't add significant test coverage (all bugs are hit by - # tests using random matrices anyway). They are only here to make - # the bulk of our debugging easier. - ("identity", "identity", "zero", "dynamic"), - ("random", "identity", "zero", "dynamic"), - ("identity", "random", "zero", "dynamic"), - ("identity", "identity", "random", "dynamic"), - # Generators using general random matrices - ("random", "random", "random", "dynamic"), - ("random", "random", "random", "static"), - # TODO: enable 'mixed' testcases. For now they cause iree-opt - # errors. - #("random", "random", "random", "mixed"), - ], - "large": [ - # Fewer generators are used for large shapes, to limit the - # latency impact. Most bugs are going to be caught on small - # shapes anyway. - ("random", "random", "random", "dynamic"), - ("random", "random", "random", "static"), - ] - } - - -# Generates a name for a test function in the generated MLIR code. -def function_name(lhs_rhs_type, accum_type, shape, gen): - return f"{lhs_rhs_type}_{gen[3]}_{gen[0]}_{shape[0]}x{shape[1]}_times_{gen[1]}_{shape[1]}x{shape[2]}_plus_{gen[2]}_{accum_type}" +import enum +import dataclasses +import typing + + +# Data type of matrix entries. The string values must match MLIR data types. +# This is a superset of the values accepted for the --lhs_rhs_types= flag, +# as this also includes accumulator-specific types like i32. +@enum.unique +class MatrixElemTypeId(enum.Enum): + I8 = "i8" + I32 = "i32" + F32 = "f32" + + +# Enumerates of the collections of shapes that we can generate tests for. +# The values are the accepted values for the --shapes= flag. +@enum.unique +class ShapesId(enum.Enum): + SMALL = "small" + LARGE = "large" + + +# Enumerates ways to construct MLIR tensor types. +@enum.unique +class Dynamicity(enum.Enum): + DYNAMIC = "dynamic" # Use '?' everywhere. Example: tensor. + STATIC = "static" # Use fixed values everywhere. Example: tensor<4x6xf32>. + MIXED = "mixed" # Randomly mix '?' and values. Example: tensor. + + +# Enumerates ways to initialize matrix buffer contents. +@enum.unique +class MatrixGenerator(enum.Enum): + ZERO = "zero" # Fill with zeros + IDENTITY = "identity" # Make an identity matrix (generalized to any shape). + RANDOM = "random" # Fill with (deterministic) pseudorandom values. + + +# Describes the shape of a matrix multiplication in the usual convention: +# the LHS is {m}x{k}, the RHS is {k}x{n}, the accumulator/result is {m}x{n}. +@dataclasses.dataclass +class TestShape: + m: int + k: int + n: int + + +# Describes how to construct MLIR tensor types and how to initialize buffer +# contents for a test case (for an already given TestShape, and already given +# matrix element data types). +@dataclasses.dataclass +class TestGenerator: + lhs: MatrixGenerator + rhs: MatrixGenerator + acc: MatrixGenerator + dynamicity: Dynamicity + + +# Returns the list of TestShape's to use for the collection of shapes +# identified by shapes_id. +def get_test_shapes(shapes_id: ShapesId): + # Notes: + # 1. Be conservative in adding more shapes, as that can include both the + # build and execution latency of tests. The build latency is nearly the + # same for all shapes, while execution latency grows cubicly i.e. + # linearly with m*k*n. + # 2. Some shapes are commented out: they used to be tested but have been + # disabled to improve the trade-off between test coverage and build + # latency. + if shapes_id == ShapesId.SMALL: + return [ # Small sizes, square matrices + # was range(1, 40) before trimming. The choice of 18 is so that we + # exercise a case just above 16, as 16 will be a common kernel width. + TestShape(m=x, k=x, n=x) for x in range(1, 18) + ] + [ + # Small sizes, slightly rectangular matrices + TestShape(m=2, k=3, n=4), + #TestShape(m=8, k=7, n=6), + #TestShape(m=15, k=16, n=17), + TestShape(m=14, k=19, n=23), + #TestShape(m=31, k=33, n=32), + TestShape(m=25, k=41, n=35), + # Small sizes, involving vectors (i.e. most rectangular cases) + TestShape(m=10, k=1, n=1), + TestShape(m=1, k=10, n=1), + TestShape(m=1, k=1, n=10), + #TestShape(m=1, k=10, n=10), + #TestShape(m=10, k=1, n=10), + #TestShape(m=10, k=10, n=1), + # Small sizes, involving other very small dimensions just above 1 + TestShape(m=13, k=14, n=2), + TestShape(m=3, k=17, n=12), + TestShape(m=21, k=4, n=18), + # Medium sizes, square matrices + #TestShape(m=100, k=100, n=100), + # Medium sizes, slightly rectangular matrices + TestShape(m=101, k=102, n=103), + # Medium sizes, involving vectors (i.e. most rectangular cases) + TestShape(m=10000, k=1, n=1), + TestShape(m=1, k=10000, n=1), + TestShape(m=1, k=1, n=10000), + #TestShape(m=1, k=1000, n=1000), + #TestShape(m=1000, k=1, n=1000), + #TestShape(m=1000, k=1000, n=1), + # Medium sizes, involving other very small dimensions just above 1 + TestShape(m=1300, k=1300, n=2), + #TestShape(m=1300, k=1300, n=3), + #TestShape(m=1300, k=1300, n=4), + ] + if shapes_id == ShapesId.LARGE: + return [ + # Large sizes, powers of two + TestShape(m=256, k=256, n=512), + #TestShape(m=512, k=512, n=128), + #TestShape(m=1024, k=512, n=512), + #TestShape(m=512, k=1024, n=512), + # Large sizes, powers of two minus one + TestShape(m=127, k=63, n=511), + # Large sizes, powers of two plus one + TestShape(m=129, k=65, n=513), + # Large sizes, misc. + #TestShape(m=200, k=300, n=400), + TestShape(m=123, k=456, n=789), + #TestShape(m=500, k=500, n=50), + # Be conservative in adding larger shapes. They can result in + # high latency tests. If you have to, consider splitting them + # out in a way that constrains the latency impact, e.g. by + # running on fewer backends/drivers or with fewer generators + # (see get_test_generators). + ] + raise ValueError(shapes_id) + + +# Returns the list of TestGenerator's to use for the collection of shapes +# identified by shapes_id. +def get_test_generators(shapes_id: ShapesId): + if shapes_id == ShapesId.SMALL: + return [ + # Generators using simple matrices for ease of numerical debugging. + # They don't add significant test coverage (all bugs are hit by + # tests using random matrices anyway). They are only here to make + # the bulk of our debugging easier. + TestGenerator(lhs=MatrixGenerator.IDENTITY, + rhs=MatrixGenerator.IDENTITY, + acc=MatrixGenerator.ZERO, + dynamicity=Dynamicity.DYNAMIC), + TestGenerator(lhs=MatrixGenerator.RANDOM, + rhs=MatrixGenerator.IDENTITY, + acc=MatrixGenerator.ZERO, + dynamicity=Dynamicity.DYNAMIC), + TestGenerator(lhs=MatrixGenerator.IDENTITY, + rhs=MatrixGenerator.RANDOM, + acc=MatrixGenerator.ZERO, + dynamicity=Dynamicity.DYNAMIC), + TestGenerator(lhs=MatrixGenerator.IDENTITY, + rhs=MatrixGenerator.IDENTITY, + acc=MatrixGenerator.RANDOM, + dynamicity=Dynamicity.DYNAMIC), + # Generators using general random matrices + TestGenerator(lhs=MatrixGenerator.RANDOM, + rhs=MatrixGenerator.RANDOM, + acc=MatrixGenerator.RANDOM, + dynamicity=Dynamicity.DYNAMIC), + TestGenerator(lhs=MatrixGenerator.RANDOM, + rhs=MatrixGenerator.RANDOM, + acc=MatrixGenerator.RANDOM, + dynamicity=Dynamicity.STATIC), + TestGenerator(lhs=MatrixGenerator.RANDOM, + rhs=MatrixGenerator.RANDOM, + acc=MatrixGenerator.RANDOM, + dynamicity=Dynamicity.MIXED), + ] + if shapes_id == ShapesId.LARGE: + return [ + # Fewer generators are used for large shapes, to limit the + # latency impact. Most bugs are going to be caught on small + # shapes anyway. + TestGenerator(lhs=MatrixGenerator.RANDOM, + rhs=MatrixGenerator.RANDOM, + acc=MatrixGenerator.RANDOM, + dynamicity=Dynamicity.DYNAMIC), + TestGenerator(lhs=MatrixGenerator.RANDOM, + rhs=MatrixGenerator.RANDOM, + acc=MatrixGenerator.RANDOM, + dynamicity=Dynamicity.STATIC), + ] + raise ValueError(shapes_id) # Intentionally fixed seed! We want full reproducibility here, both across runs @@ -125,43 +208,120 @@ def function_name(lhs_rhs_type, accum_type, shape, gen): local_pseudorandom_state = 1 +# A static size value, i.e. a size value that could appear in a MLIR type +# such as 'tensor'. None means a dynamic size, similar to '?' in MLIR. +@dataclasses.dataclass +class DimSize: + value: typing.Optional[int] + + # Generates a compile-time MLIR size value, i.e. either a fixed positive integer -# or a '?' depending on dynamicity. -def static_size(x, dynamicity): - if dynamicity == "dynamic": - return "?" - elif dynamicity == "static": - return x - elif dynamicity == "mixed": +# or None (which maps to MLIR '?') depending on dynamicity. +def static_size(x: int, dynamicity: Dynamicity): + if dynamicity == Dynamicity.DYNAMIC: + return DimSize(None) + elif dynamicity == Dynamicity.STATIC: + return DimSize(x) + elif dynamicity == Dynamicity.MIXED: global local_pseudorandom_state # Same as C++ std::minstd_rand. # Using a local pseudorandom generator implementation ensures that it's # completely reproducible, across runs and across machines. local_pseudorandom_state = (local_pseudorandom_state * 48271) % 2147483647 - return x if local_pseudorandom_state > 1073741824 else "?" + return DimSize(x if local_pseudorandom_state > 1073741824 else None) else: raise ValueError(dynamicity) +# Stringification used for generating MLIR types, e.g. tensor. +def int_or_question_mark(s: DimSize): + return s.value or "?" + + +# Stringification used for generating alphanumeric identifiers, e.g. +# func @somefunction_DYNxDYNxf32, where we can't use "?" characters. +def int_or_DYN(s: DimSize): + return s.value or "DYN" + + +# Describes the fully resolved static dimensions of all 3 input matrices, +# LHS, RHS, and Accumulator, in a testcase. +# Each value is a string, which may either represent a positive integer such as "123", +# or a "?" string, meaning a dynamic dimension as in MLIR. +# These string values are used to generate MLIR function names and tensor shapes. +@dataclasses.dataclass +class TestInputMatricesStaticShapes: + lhs_rows: DimSize + lhs_cols: DimSize + rhs_rows: DimSize + rhs_cols: DimSize + acc_rows: DimSize + acc_cols: DimSize + + +# Helper for generate_function. Generates TestInputMatricesStaticShapes, i.e. +# converts from the runtime shape dimensions in TestShape and given dynamicity to +# the set of static shapes to be used in a test function's input tensors. +def generate_static_shapes(shape: TestShape, dynamicity: Dynamicity): + return TestInputMatricesStaticShapes( + lhs_rows=static_size(shape.m, dynamicity), + lhs_cols=static_size(shape.k, dynamicity), + rhs_rows=static_size(shape.k, dynamicity), + rhs_cols=static_size(shape.n, dynamicity), + acc_rows=static_size(shape.m, dynamicity), + acc_cols=static_size(shape.n, dynamicity), + ) + + +# Helper for generate_function. +# Generates a name for a test function in the generated MLIR code. +def generate_function_name(lhs_rhs_type: MatrixElemTypeId, + acc_type: MatrixElemTypeId, + static_shapes: TestInputMatricesStaticShapes): + input_t = lhs_rhs_type.value + acc_t = acc_type.value + lhs_m = int_or_DYN(static_shapes.lhs_rows) + lhs_k = int_or_DYN(static_shapes.lhs_cols) + rhs_k = int_or_DYN(static_shapes.rhs_rows) + rhs_n = int_or_DYN(static_shapes.rhs_cols) + acc_m = int_or_DYN(static_shapes.acc_rows) + acc_n = int_or_DYN(static_shapes.acc_cols) + return f"matmul_{lhs_m}x{lhs_k}x{input_t}_times_{rhs_k}x{rhs_n}x{input_t}_into_{acc_m}x{acc_n}x{acc_t}" + + +# Represents a generated test function. +@dataclasses.dataclass +class MLIRFunction: + name: str + definition: str + + # Generates a test function in the generated MLIR code. # The generated function will take the same arguments as linalg.matmul and # will just call linalg.matmul with them, returning its result. -def generate_function(func_name, lhs_rhs_type, accum_type, shape, gen): - (m, k, n) = shape - lhs_m = static_size(m, gen[3]) - lhs_k = static_size(k, gen[3]) - rhs_k = static_size(k, gen[3]) - rhs_n = static_size(n, gen[3]) - acc_m = static_size(m, gen[3]) - acc_n = static_size(n, gen[3]) - lhs_tensor_type = f"tensor<{lhs_m}x{lhs_k}x{lhs_rhs_type}>" - rhs_tensor_type = f"tensor<{rhs_k}x{rhs_n}x{lhs_rhs_type}>" - acc_tensor_type = f"tensor<{acc_m}x{acc_n}x{accum_type}>" - return ( +def generate_function(lhs_rhs_type: MatrixElemTypeId, + acc_type: MatrixElemTypeId, shape: TestShape, + dynamicity: Dynamicity): + static_shapes = generate_static_shapes(shape, dynamicity) + func_name = generate_function_name(lhs_rhs_type, acc_type, static_shapes) + lhs_m = int_or_question_mark(static_shapes.lhs_rows) + lhs_k = int_or_question_mark(static_shapes.lhs_cols) + rhs_k = int_or_question_mark(static_shapes.rhs_rows) + rhs_n = int_or_question_mark(static_shapes.rhs_cols) + acc_m = int_or_question_mark(static_shapes.acc_rows) + acc_n = int_or_question_mark(static_shapes.acc_cols) + lhs_tensor_type = f"tensor<{lhs_m}x{lhs_k}x{lhs_rhs_type.value}>" + rhs_tensor_type = f"tensor<{rhs_k}x{rhs_n}x{lhs_rhs_type.value}>" + acc_tensor_type = f"tensor<{acc_m}x{acc_n}x{acc_type.value}>" + func_definition = ( f"func @{func_name}(%lhs: {lhs_tensor_type}, %rhs: {rhs_tensor_type}, %acc: {acc_tensor_type}) -> {acc_tensor_type} {{\n" f" %result = linalg.matmul ins(%lhs, %rhs: {lhs_tensor_type}, {rhs_tensor_type}) outs(%acc: {acc_tensor_type}) -> {acc_tensor_type}\n" f" return %result: {acc_tensor_type}\n" f"}}\n") + return MLIRFunction( + name=func_name, + definition=func_definition, + ) # Intentionally fixed seed! We want full reproducibility here, both across runs @@ -172,12 +332,12 @@ def generate_function(func_name, lhs_rhs_type, accum_type, shape, gen): # Generates a contents_generator tag to use in the output trace. -def contents_generator_tag(generator): - if generator == "zero": +def contents_generator_tag(generator: MatrixGenerator): + if generator == MatrixGenerator.ZERO: return "" - elif generator == "identity": + elif generator == MatrixGenerator.IDENTITY: return "!tag:iree:identity_matrix" - elif generator == "random": + elif generator == MatrixGenerator.RANDOM: global pseudorandom_generator_seed pseudorandom_generator_seed = pseudorandom_generator_seed + 1 return f"!tag:iree:fully_specified_pseudorandom {pseudorandom_generator_seed}" @@ -187,11 +347,13 @@ def contents_generator_tag(generator): # Generate a matrix function argument in the output trace, as a dictionary # to be passed to yaml.dump. -def generate_trace_matrix_arg(matrix_shape, element_type, generator): +def generate_trace_matrix_arg(matrix_shape: list, + element_type: MatrixElemTypeId, + generator: MatrixGenerator): result = { "type": "hal.buffer_view", "shape": matrix_shape, - "element_type": element_type, + "element_type": element_type.value, } generator_tag = contents_generator_tag(generator) if generator_tag: @@ -201,12 +363,14 @@ def generate_trace_matrix_arg(matrix_shape, element_type, generator): # Generates the output trace for a testcase i.e. a single test function call, # as a dictionary to be passed to yaml.dump. -def generate_trace(func_name, lhs_rhs_type, acc_type, shape, gen): - (m, k, n) = shape - lhs_arg = generate_trace_matrix_arg([m, k], lhs_rhs_type, gen[0]) - rhs_arg = generate_trace_matrix_arg([k, n], lhs_rhs_type, gen[1]) - acc_arg = generate_trace_matrix_arg([m, n], acc_type, gen[2]) - result_arg = generate_trace_matrix_arg([m, n], acc_type, "zero") +def generate_trace(func_name: str, lhs_rhs_type: MatrixElemTypeId, + acc_type: MatrixElemTypeId, shape: TestShape, + gen: TestGenerator): + lhs_arg = generate_trace_matrix_arg([shape.m, shape.k], lhs_rhs_type, gen.lhs) + rhs_arg = generate_trace_matrix_arg([shape.k, shape.n], lhs_rhs_type, gen.rhs) + acc_arg = generate_trace_matrix_arg([shape.m, shape.n], acc_type, gen.acc) + result_arg = generate_trace_matrix_arg([shape.m, shape.n], acc_type, + MatrixGenerator.ZERO) return { "type": "call", "function": "module." + func_name, @@ -220,25 +384,24 @@ def generate_trace(func_name, lhs_rhs_type, acc_type, shape, gen): # Generates all output files' contents as strings. -def generate(args): - functions = {} +def generate(lhs_rhs_type: MatrixElemTypeId, acc_type: MatrixElemTypeId, + shapes_id: ShapesId): + function_definitions = {} traces = [] - lhs_rhs_type = args.lhs_rhs_type - accum_type = 'i32' if lhs_rhs_type == 'i8' else lhs_rhs_type - for shape in get_test_shapes()[args.shapes]: - for gen in get_test_generators()[args.shapes]: - func_name = function_name(lhs_rhs_type, accum_type, shape, gen) + for shape in get_test_shapes(shapes_id): + for gen in get_test_generators(shapes_id): + function = generate_function(lhs_rhs_type, acc_type, shape, + gen.dynamicity) # Different testcases may differ only by runtime parameters but # share the same code. For example, dynamic-shapes testcases # share the same code involing tensor even though the runtime # value in the trace are different. That's why we call # generate_function conditionally, and generate_trace unconditionally. - if func_name not in functions: - functions[func_name] = generate_function(func_name, lhs_rhs_type, - accum_type, shape, gen) + if function.name not in function_definitions: + function_definitions[function.name] = function.definition traces.append( - generate_trace(func_name, lhs_rhs_type, accum_type, shape, gen)) - return (functions, traces) + generate_trace(function.name, lhs_rhs_type, acc_type, shape, gen)) + return (function_definitions, traces) def parse_arguments(): @@ -258,7 +421,7 @@ def parse_arguments(): required=True) parser.add_argument("--shapes", type=str, - choices=["small", "large"], + choices=[s.value for s in ShapesId], help="Collection of matrix shapes to test", required=True) parser.add_argument( @@ -271,10 +434,10 @@ def parse_arguments(): return parser.parse_args() -def write_code_file(functions, filename): +def write_code_file(function_definitions, filename): with open(filename, "w") as file: - for funcname in functions: - file.write(functions[funcname] + "\n") + for funcname in function_definitions: + file.write(function_definitions[funcname] + "\n") def write_trace_file(traces, filename, module_path): @@ -310,9 +473,23 @@ def write_trace_file(traces, filename, module_path): file.write(processed_yaml) +# For now, the accumulator type can always be inferred from the input LHS/RHS +# type, so we do that. That is temporary: eventually there will be cases +# where the same input types are used with different accumulator types, e.g. +# f16 inputs with both f16 and f32 accumulator. +def infer_acc_type(lhs_rhs_type: MatrixElemTypeId): + if lhs_rhs_type == MatrixElemTypeId.I8: + return MatrixElemTypeId.I32 + else: + return lhs_rhs_type + + def main(args): - (functions, traces) = generate(args) - write_code_file(functions, args.output_code) + lhs_rhs_type = MatrixElemTypeId(args.lhs_rhs_type) + acc_type = infer_acc_type(lhs_rhs_type) + shapes_id = ShapesId(args.shapes) + (function_definitions, traces) = generate(lhs_rhs_type, acc_type, shapes_id) + write_code_file(function_definitions, args.output_code) write_trace_file(traces, args.output_trace, args.module_path) diff --git a/iree/test/e2e/regression/lowering_config.mlir b/iree/test/e2e/regression/lowering_config.mlir index a70c4910f6e5..17b401fec304 100644 --- a/iree/test/e2e/regression/lowering_config.mlir +++ b/iree/test/e2e/regression/lowering_config.mlir @@ -1,11 +1,17 @@ -#config1 = {tileSizes = [[32, 32, 32]], passPipeline = 1 : i32} -#config2 = {tileSizes = [[64, 64, 64]], passPipeline = 1 : i32} +#compilation0 = #iree_codegen.compilation.info< + #iree_codegen.lowering.config, + #iree_codegen.translation.info<"CPUVectorization", workload_per_wg = [32, 32]>, + workgroup_size = []> +#compilation1 = #iree_codegen.compilation.info< + #iree_codegen.lowering.config, + #iree_codegen.translation.info<"CPUVectorization", workload_per_wg = [64, 64]>, + workgroup_size = []> func @lowering_config_test() { %a = util.unfoldable_constant dense<1.0> : tensor<128x256xf32> %b = util.unfoldable_constant dense<2.0> : tensor<256x512xf32> %c = util.unfoldable_constant dense<2.0> : tensor<256x1024xf32> - %d = "mhlo.dot"(%a, %b) {lowering.config = #config1} : (tensor<128x256xf32>, tensor<256x512xf32>) -> tensor<128x512xf32> - %e = "mhlo.dot"(%a, %c) {lowering.config = #config2} : (tensor<128x256xf32>, tensor<256x1024xf32>) -> tensor<128x1024xf32> + %d = "mhlo.dot"(%a, %b) {compilation.info = #compilation0} : (tensor<128x256xf32>, tensor<256x512xf32>) -> tensor<128x512xf32> + %e = "mhlo.dot"(%a, %c) {compilation.info = #compilation1} : (tensor<128x256xf32>, tensor<256x1024xf32>) -> tensor<128x1024xf32> check.expect_almost_eq_const(%d, dense<512.0> : tensor<128x512xf32>) : tensor<128x512xf32> check.expect_almost_eq_const(%e, dense<512.0> : tensor<128x1024xf32>) : tensor<128x1024xf32> return diff --git a/iree/tools/BUILD b/iree/tools/BUILD index 2723b386a396..9a68ab2a6b1a 100644 --- a/iree/tools/BUILD +++ b/iree/tools/BUILD @@ -100,6 +100,7 @@ cc_library( deps = [ "//iree/compiler/Bindings/Native/Transforms", "//iree/compiler/Bindings/TFLite/Transforms", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Dialect/Flow/IR", "//iree/compiler/Dialect/Flow/Transforms", "//iree/compiler/Dialect/HAL/IR:HALDialect", diff --git a/iree/tools/init_iree_dialects.h b/iree/tools/init_iree_dialects.h index b37493471664..184af7755894 100644 --- a/iree/tools/init_iree_dialects.h +++ b/iree/tools/init_iree_dialects.h @@ -14,6 +14,7 @@ #include "iree-dialects/Dialect/IREE/IREEDialect.h" #include "iree-dialects/Dialect/IREEPyDM/IR/Dialect.h" +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Dialect/Flow/IR/FlowDialect.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtDialect.h" @@ -32,7 +33,8 @@ namespace iree_compiler { // Add all the IREE dialects to the provided registry. inline void registerIreeDialects(DialectRegistry ®istry) { // clang-format off - registry.inserti32[index_reg]); + EMIT_TYPE_NAME(type_def); break; } @@ -1068,6 +1069,7 @@ iree_status_t iree_vm_bytecode_disasm_op( IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " : ")); EMIT_REF_REG_NAME(false_value_reg); EMIT_OPTIONAL_VALUE_REF(®s->ref[false_value_reg]); + EMIT_TYPE_NAME(type_def); break; }