diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4240cc753cc..2dd1695ba311 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,13 +4,7 @@
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-cmake_minimum_required(VERSION 3.16.3)
-
-# Do not set MSVC warning flags like /W3 by default (since 3.15):
-#   https://cmake.org/cmake/help/v3.15/policy/CMP0092.html
-if(POLICY CMP0092)
-  cmake_policy(SET CMP0092 NEW)
-endif()
+cmake_minimum_required(VERSION 3.16.3...3.21)
 
 # LLVM requires CMP0116 for tblgen: https://reviews.llvm.org/D101083
 # CMP0116: Ninja generators transform `DEPFILE`s from `add_custom_command()`
@@ -37,6 +31,7 @@ endif()
 
 option(IREE_ENABLE_RUNTIME_TRACING "Enables instrumented runtime tracing." OFF)
 option(IREE_ENABLE_COMPILER_TRACING "Enables instrumented compiler tracing." OFF)
+option(IREE_ENABLE_THREADING "Builds IREE in with thread library support." ON)
 
 option(IREE_BUILD_COMPILER "Builds the IREE compiler." ON)
 option(IREE_BUILD_TESTS "Builds IREE unit tests." ON)
@@ -46,10 +41,9 @@ option(IREE_BUILD_SAMPLES "Builds IREE sample projects." ON)
 option(IREE_BUILD_TRACY "Builds tracy server tools." OFF)
 
 option(IREE_BUILD_TENSORFLOW_ALL "Builds all TensorFlow compiler frontends." OFF)
-option(IREE_BUILD_TENSORFLOW_COMPILER "Builds TensorFlow compiler frontend." OFF)
-option(IREE_BUILD_TFLITE_COMPILER "Builds the TFLite compiler frontend." OFF)
-option(IREE_BUILD_XLA_COMPILER "Builds TensorFlow XLA compiler frontend." OFF)
-option(IREE_ENABLE_THREADING "Builds IREE in with thread library support." ON)
+option(IREE_BUILD_TENSORFLOW_COMPILER "Builds TensorFlow compiler frontend." "${IREE_BUILD_TENSORFLOW_ALL}")
+option(IREE_BUILD_TFLITE_COMPILER "Builds the TFLite compiler frontend." "${IREE_BUILD_TENSORFLOW_ALL}")
+option(IREE_BUILD_XLA_COMPILER "Builds TensorFlow XLA compiler frontend." "${IREE_BUILD_TENSORFLOW_ALL}")
 
 set(IREE_HAL_DRIVERS_TO_BUILD "all"
   CACHE STRING "Semicolon-separated list of HAL drivers to build, or \"all\".")
@@ -72,15 +66,12 @@ if(${IREE_BUILD_TENSORFLOW_ALL} OR
   set(IREE_ENABLE_TENSORFLOW ON)
 endif()
 
+
 option(IREE_BUILD_BINDINGS_TFLITE "Builds the IREE TFLite C API compatibility shim" ON)
 option(IREE_BUILD_BINDINGS_TFLITE_JAVA "Builds the IREE TFLite Java bindings with the C API compatibility shim" ON)
 
 # Default python bindings to enabled for some features.
-if(${IREE_ENABLE_TENSORFLOW})
-  option(IREE_BUILD_PYTHON_BINDINGS "Builds the IREE python bindings" ON)
-else()
-  option(IREE_BUILD_PYTHON_BINDINGS "Builds the IREE python bindings" OFF)
-endif()
+option(IREE_BUILD_PYTHON_BINDINGS "Builds the IREE python bindings" "${IREE_ENABLE_TENSORFLOW}")
 
 #-------------------------------------------------------------------------------
 # Experimental project flags
@@ -440,17 +431,17 @@ if(IREE_ENABLE_THREADING)
   add_subdirectory(third_party/cpuinfo EXCLUDE_FROM_ALL)
 endif()
 
-iree_set_flatcc_cmake_options()
 add_subdirectory(build_tools/third_party/flatcc EXCLUDE_FROM_ALL)
-add_subdirectory(third_party/flatcc EXCLUDE_FROM_ALL)
 
 add_subdirectory(third_party/vulkan_headers EXCLUDE_FROM_ALL)
 
 # TODO(scotttodd): Iterate some more and find a better place for this.
 if (NOT CMAKE_CROSSCOMPILING)
-  install(TARGETS iree-flatcc-cli
-          COMPONENT iree-flatcc-cli
-          RUNTIME DESTINATION bin)
+  install(
+    TARGETS iree-flatcc-cli
+    COMPONENT iree-flatcc-cli
+    RUNTIME DESTINATION bin
+  )
 endif()
 
 if(IREE_BUILD_COMPILER)
diff --git a/benchmarks/TFLite/CMakeLists.txt b/benchmarks/TFLite/CMakeLists.txt
index e78feaff8524..bfa27c357932 100644
--- a/benchmarks/TFLite/CMakeLists.txt
+++ b/benchmarks/TFLite/CMakeLists.txt
@@ -7,37 +7,52 @@
 
 ################################################################################
 #                                                                              #
-# Benchmark models for Tosa                                                    #
+# Benchmark models from TFLite                                                 #
 #                                                                              #
-# Each module specification should be a list that contains the following       #
-# fields: MODULE_NAME, MODULE_TAGS, MLIR_SOURCE, ENTRY_FUNCTION,               #
-# FUNCTION_INPUTS. See iree_mlir_benchmark_suite definition for details about  #
-# these fields.                                                                #
+# Each module specification should be a list containing alternating keys and   #
+# values. The fields are: NAME, TAGS, MLIR_SOURCE, ENTRY_FUNCTION, and         #
+# FUNCTION_INPUTS. See the iree_mlir_benchmark_suite definition for details    #
+# about these fields.                                                          #
 #                                                                              #
 ################################################################################
 
 set(DEEPLABV3_FP32_MODULE
-  "DeepLabV3"                     # MODULE_NAME
-  "fp32"                          # MODULE_TAGS
-  "https://storage.googleapis.com/iree-model-artifacts/DeepLabV3-2bcafb1.tar.gz" # MLIR_SOURCE
-  "main"                          # ENTRY_FUNCTION
-  "1x257x257x3xf32"               # FUNCTION_INPUTS
+  NAME
+    "DeepLabV3"
+  TAGS
+    "fp32"
+  MLIR_SOURCE
+    "https://storage.googleapis.com/iree-model-artifacts/DeepLabV3-2bcafb1.tar.gz"
+  ENTRY_FUNCTION
+    "main"
+  FUNCTION_INPUTS
+    "1x257x257x3xf32"
 )
 
 set(MOBILESSD_FP32_MODULE
-  "MobileSSD"                     # MODULE_NAME
-  "fp32"                          # MODULE_TAGS
-  "https://storage.googleapis.com/iree-model-artifacts/MobileSSD-2bcafb1.tar.gz" # MLIR_SOURCE
-  "main"                          # ENTRY_FUNCTION
-  "1x320x320x3xf32"               # FUNCTION_INPUTS
+  NAME
+    "MobileSSD"
+  TAGS
+    "fp32"
+  MLIR_SOURCE
+    "https://storage.googleapis.com/iree-model-artifacts/MobileSSD-2bcafb1.tar.gz"
+  ENTRY_FUNCTION
+    "main"
+  FUNCTION_INPUTS
+    "1x320x320x3xf32"
 )
 
 set(POSENET_FP32_MODULE
-  "PoseNet"                       # MODULE_NAME
-  "fp32"                          # MODULE_TAGS
-  "https://storage.googleapis.com/iree-model-artifacts/PoseNet-2bcafb1.tar.gz" # MLIR_SOURCE
-  "main"                          # ENTRY_FUNCTION
-  "1x353x257x3xf32"               # FUNCTION_INPUTS
+  NAME
+    "PoseNet"
+  TAGS
+    "fp32"
+  MLIR_SOURCE
+    "https://storage.googleapis.com/iree-model-artifacts/PoseNet-2bcafb1.tar.gz"
+  ENTRY_FUNCTION
+    "main"
+  FUNCTION_INPUTS
+    "1x353x257x3xf32"
 )
 
 ################################################################################
@@ -53,9 +68,9 @@ set(POSENET_FP32_MODULE
 # CPU, Dylib-Sync, big/little-core, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${DEEPLABV3_FP32_MODULE}
-    ${MOBILESSD_FP32_MODULE}
-    ${POSENET_FP32_MODULE}
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
 
   BENCHMARK_MODES
     "big-core,full-inference"
@@ -77,9 +92,9 @@ iree_mlir_benchmark_suite(
 # CPU, Dylib, 1-thread, big/little-core, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${DEEPLABV3_FP32_MODULE}
-    ${MOBILESSD_FP32_MODULE}
-    ${POSENET_FP32_MODULE}
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
 
   BENCHMARK_MODES
     "1-thread,big-core,full-inference"
@@ -102,9 +117,9 @@ iree_mlir_benchmark_suite(
 # GPU, Vulkan, Adreno, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${DEEPLABV3_FP32_MODULE}
-    ${MOBILESSD_FP32_MODULE}
-    ${POSENET_FP32_MODULE}
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
 
   BENCHMARK_MODES
     "full-inference"
@@ -124,9 +139,9 @@ iree_mlir_benchmark_suite(
 # GPU, Vulkan, Mali, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${DEEPLABV3_FP32_MODULE}
-    ${MOBILESSD_FP32_MODULE}
-    ${POSENET_FP32_MODULE}
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
 
   BENCHMARK_MODES
     "full-inference"
@@ -146,9 +161,9 @@ iree_mlir_benchmark_suite(
 # GPU, Vulkan, Mali, kernel-execution
 iree_mlir_benchmark_suite(
   MODULES
-    ${DEEPLABV3_FP32_MODULE}
-    ${MOBILESSD_FP32_MODULE}
-    ${POSENET_FP32_MODULE}
+    "${DEEPLABV3_FP32_MODULE}"
+    "${MOBILESSD_FP32_MODULE}"
+    "${POSENET_FP32_MODULE}"
 
   BENCHMARK_MODES
     "kernel-execution"
diff --git a/benchmarks/TensorFlow/CMakeLists.txt b/benchmarks/TensorFlow/CMakeLists.txt
index 48c79b3ebdb6..827b11b41474 100644
--- a/benchmarks/TensorFlow/CMakeLists.txt
+++ b/benchmarks/TensorFlow/CMakeLists.txt
@@ -7,49 +7,69 @@
 
 ################################################################################
 #                                                                              #
-# Benchmark models                                                             #
+# Benchmark models from TensorFlow                                             #
 #                                                                              #
-# Each module specification should be a list that contains the following       #
-# fields: MODULE_NAME, MODULE_TAGS, MLIR_SOURCE, ENTRY_FUNCTION,               #
-# FUNCTION_INPUTS. See iree_mlir_benchmark_suite definition for details about  #
-# these fields.                                                                #
+# Each module specification should be a list containing alternating keys and   #
+# values. The fields are: NAME, TAGS, MLIR_SOURCE, ENTRY_FUNCTION, and         #
+# FUNCTION_INPUTS. See the iree_mlir_benchmark_suite definition for details    #
+# about these fields.                                                          #
 #                                                                              #
 ################################################################################
 
 set(MOBILEBERT_FP16_MODULE
-  "MobileBertSquad"               # MODULE_NAME
-  "fp16"                          # MODULE_TAGS
+  NAME
+    "MobileBertSquad"
+  TAGS
+    "fp16"
   # This uses the same input MLIR source as fp32 to save download time.
   # It requires users to have "--iree-flow-demote-f32-to-f16".
-  "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-89edfa50d.tar.gz" # MLIR_SOURCE
-  "serving_default"               # ENTRY_FUNCTION
+  MLIR_SOURCE
+    "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-89edfa50d.tar.gz"
+  ENTRY_FUNCTION
+    "serving_default"
   # The conversion done by "--iree-flow-demote-f32-to-f16" won't change the
   # original input signature.
-  "1x384xi32,1x384xi32,1x384xi32" # FUNCTION_INPUTS
+  FUNCTION_INPUTS
+    "1x384xi32,1x384xi32,1x384xi32"
 )
 
 set(MOBILEBERT_FP32_MODULE
-  "MobileBertSquad"               # MODULE_NAME
-  "fp32"                          # MODULE_TAGS
-  "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-89edfa50d.tar.gz" # MLIR_SOURCE
-  "serving_default"               # ENTRY_FUNCTION
-  "1x384xi32,1x384xi32,1x384xi32" # FUNCTION_INPUTS
+  NAME
+    "MobileBertSquad"
+  TAGS
+    "fp32"
+  MLIR_SOURCE
+    "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-89edfa50d.tar.gz"
+  ENTRY_FUNCTION
+    "serving_default"
+  FUNCTION_INPUTS
+    "1x384xi32,1x384xi32,1x384xi32"
 )
 
 set(MOBILENET_V2_MODULE
-  "MobileNetV2"     # MODULE_NAME
-  "fp32,imagenet"   # MODULE_TAGS
-  "https://storage.googleapis.com/iree-model-artifacts/MobileNetV2-89edfa50d.tar.gz" # MLIR_SOURCE
-  "call"            # ENTRY_FUNCTION
-  "1x224x224x3xf32" # FUNCTION_INPUTS
+  NAME
+    "MobileNetV2"
+  TAGS
+    "fp32,imagenet"
+  MLIR_SOURCE
+    "https://storage.googleapis.com/iree-model-artifacts/MobileNetV2-89edfa50d.tar.gz"
+  ENTRY_FUNCTION
+    "call"
+  FUNCTION_INPUTS
+    "1x224x224x3xf32"
 )
 
 set(MOBILENET_V3SMALL_MODULE
-  "MobileNetV3Small" # MODULE_NAME
-  "fp32,imagenet"    # MODULE_TAGS
-  "https://storage.googleapis.com/iree-model-artifacts/MobileNetV3Small-89edfa50d.tar.gz" # MLIR_SOURCE
-  "call"             # ENTRY_FUNCTION
-  "1x224x224x3xf32"  # FUNCTION_INPUTS
+  NAME
+    "MobileNetV3Small"
+  TAGS
+    "fp32,imagenet"
+  MLIR_SOURCE
+    "https://storage.googleapis.com/iree-model-artifacts/MobileNetV3Small-89edfa50d.tar.gz"
+  ENTRY_FUNCTION
+    "call"
+  FUNCTION_INPUTS
+    "1x224x224x3xf32"
 )
 
 ################################################################################
@@ -65,8 +85,8 @@ set(MOBILENET_V3SMALL_MODULE
 # CPU, VMVX, 3-thread, little-core, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${MOBILENET_V2_MODULE}
-    ${MOBILENET_V3SMALL_MODULE}
+    "${MOBILENET_V2_MODULE}"
+    "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
     "3-thread,little-core,full-inference"
@@ -86,8 +106,8 @@ iree_mlir_benchmark_suite(
 # CPU, Dylib-Sync, big/little-core, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${MOBILENET_V2_MODULE}
-    ${MOBILENET_V3SMALL_MODULE}
+    "${MOBILENET_V2_MODULE}"
+    "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
     "big-core,full-inference"
@@ -109,8 +129,8 @@ iree_mlir_benchmark_suite(
 # CPU, Dylib, 1-thread, big/little-core, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${MOBILENET_V2_MODULE}
-    ${MOBILENET_V3SMALL_MODULE}
+    "${MOBILENET_V2_MODULE}"
+    "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
     "1-thread,big-core,full-inference"
@@ -133,8 +153,8 @@ iree_mlir_benchmark_suite(
 # CPU, Dylib, 3-thread, big/little-core, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${MOBILENET_V2_MODULE}
-    ${MOBILENET_V3SMALL_MODULE}
+    "${MOBILENET_V2_MODULE}"
+    "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
     "3-thread,big-core,full-inference"
@@ -157,9 +177,9 @@ iree_mlir_benchmark_suite(
 # GPU, Vulkan, Adreno, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${MOBILEBERT_FP32_MODULE}
-    ${MOBILENET_V2_MODULE}
-    ${MOBILENET_V3SMALL_MODULE}
+    "${MOBILEBERT_FP32_MODULE}"
+    "${MOBILENET_V2_MODULE}"
+    "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
     "full-inference"
@@ -179,8 +199,8 @@ iree_mlir_benchmark_suite(
 # GPU, Vulkan, Adreno, kernel-execution
 iree_mlir_benchmark_suite(
   MODULES
-    ${MOBILENET_V2_MODULE}
-    ${MOBILENET_V3SMALL_MODULE}
+    "${MOBILENET_V2_MODULE}"
+    "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
     "kernel-execution"
@@ -203,9 +223,9 @@ iree_mlir_benchmark_suite(
 # GPU, Vulkan, Mali, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${MOBILEBERT_FP32_MODULE}
-    ${MOBILENET_V2_MODULE}
-    ${MOBILENET_V3SMALL_MODULE}
+    "${MOBILEBERT_FP32_MODULE}"
+    "${MOBILENET_V2_MODULE}"
+    "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
     "full-inference"
@@ -225,8 +245,8 @@ iree_mlir_benchmark_suite(
 # GPU, Vulkan, Mali, kernel-execution
 iree_mlir_benchmark_suite(
   MODULES
-    ${MOBILENET_V2_MODULE}
-    ${MOBILENET_V3SMALL_MODULE}
+    "${MOBILENET_V2_MODULE}"
+    "${MOBILENET_V3SMALL_MODULE}"
 
   BENCHMARK_MODES
     "kernel-execution"
@@ -249,7 +269,7 @@ iree_mlir_benchmark_suite(
 # GPU, Vulkan, Mali, kernel-execution
 iree_mlir_benchmark_suite(
   MODULES
-    ${MOBILEBERT_FP16_MODULE}
+    "${MOBILEBERT_FP16_MODULE}"
 
   BENCHMARK_MODES
     "kernel-execution"
@@ -273,7 +293,7 @@ iree_mlir_benchmark_suite(
 # GPU, Vulkan, Mali, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${MOBILEBERT_FP16_MODULE}
+    "${MOBILEBERT_FP16_MODULE}"
 
   BENCHMARK_MODES
     "full-inference"
@@ -303,7 +323,7 @@ iree_mlir_benchmark_suite(
 # CPU, Dylib-Sync, big/little-core, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${MOBILEBERT_FP32_MODULE}
+    "${MOBILEBERT_FP32_MODULE}"
 
   BENCHMARK_MODES
     "big-core,full-inference"
@@ -325,7 +345,7 @@ iree_mlir_benchmark_suite(
 # CPU, Dylib, 1-thread, big/little-core, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${MOBILEBERT_FP32_MODULE}
+    "${MOBILEBERT_FP32_MODULE}"
 
   BENCHMARK_MODES
     "1-thread,big-core,full-inference"
@@ -348,7 +368,7 @@ iree_mlir_benchmark_suite(
 # CPU, Dylib, 3-thread, big/little-core, full-inference
 iree_mlir_benchmark_suite(
   MODULES
-    ${MOBILEBERT_FP32_MODULE}
+    "${MOBILEBERT_FP32_MODULE}"
 
   BENCHMARK_MODES
     "3-thread,big-core,full-inference"
diff --git a/build_tools/cmake/build_android.sh b/build_tools/cmake/build_android.sh
index 12d1bd1175ae..dcf07f48862a 100755
--- a/build_tools/cmake/build_android.sh
+++ b/build_tools/cmake/build_android.sh
@@ -45,6 +45,7 @@ cd build-host
 # Configure, build, install.
 "${CMAKE_BIN?}" -G Ninja .. \
   -DCMAKE_INSTALL_PREFIX=./install \
+  -DIREE_ENABLE_ASSERTIONS=ON \
   -DIREE_BUILD_COMPILER=ON \
   -DIREE_BUILD_TESTS=OFF \
   -DIREE_BUILD_BENCHMARKS=ON \
@@ -74,6 +75,7 @@ cd build-android
   -DANDROID_ABI="${ANDROID_ABI?}" \
   -DANDROID_PLATFORM=android-29 \
   -DIREE_HOST_BINARY_ROOT=$PWD/../build-host/install \
+  -DIREE_ENABLE_ASSERTIONS=ON \
   -DIREE_BUILD_COMPILER=OFF \
   -DIREE_BUILD_TESTS=ON \
   -DIREE_BUILD_SAMPLES=OFF
diff --git a/build_tools/cmake/build_riscv.sh b/build_tools/cmake/build_riscv.sh
index bf085acec339..4bc9ae4d5bc0 100755
--- a/build_tools/cmake/build_riscv.sh
+++ b/build_tools/cmake/build_riscv.sh
@@ -40,6 +40,7 @@ fi
   -DCMAKE_INSTALL_PREFIX="${BUILD_HOST_DIR?}/install" \
   -DCMAKE_C_COMPILER=clang \
   -DCMAKE_CXX_COMPILER=clang++ \
+  -DIREE_ENABLE_ASSERTIONS=ON \
   -DIREE_BUILD_COMPILER=ON \
   -DIREE_BUILD_TESTS=OFF \
   -DIREE_BUILD_SAMPLES=OFF \
@@ -66,6 +67,7 @@ args=(
   -DCMAKE_TOOLCHAIN_FILE="$(realpath ${ROOT_DIR?}/build_tools/cmake/riscv.toolchain.cmake)"
   -DIREE_HOST_BINARY_ROOT="$(realpath ${BUILD_HOST_DIR?}/install)"
   -DRISCV_CPU="${RISCV_CONFIG?}"
+  -DIREE_ENABLE_ASSERTIONS=ON
   -DIREE_BUILD_COMPILER=OFF
   -DIREE_BUILD_SAMPLES=ON
 )
diff --git a/build_tools/cmake/iree_macros.cmake b/build_tools/cmake/iree_macros.cmake
index 3dccb5af0828..aebbf9f15c16 100644
--- a/build_tools/cmake/iree_macros.cmake
+++ b/build_tools/cmake/iree_macros.cmake
@@ -284,7 +284,7 @@ endfunction()
 # Adds test environment variable properties based on the current build options.
 #
 # Parameters:
-# TEST_NAME: the test name, e.g. iree/base:math_test
+#   TEST_NAME: the test name, e.g. iree/base:math_test
 function(iree_add_test_environment_properties TEST_NAME)
   # IREE_*_DISABLE environment variables may used to skip test cases which
   # require both a compiler target backend and compatible runtime HAL driver.
@@ -303,3 +303,51 @@ function(iree_add_test_environment_properties TEST_NAME)
     set_property(TEST ${TEST_NAME} APPEND PROPERTY ENVIRONMENT "IREE_LLVMAOT_DISABLE=1")
   endif()
 endfunction()
+
+# iree_check_defined
+#
+# A lightweight way to check that all the given variables are defined. Useful
+# in cases like checking that a function has been passed all required arguments.
+# Doesn't give usage-specific error messages, but still significantly better
+# than no error checking.
+# Variable names should be passed directly without quoting or dereferencing.
+# Example:
+#   iree_check_defined(_SOME_VAR _AND_ANOTHER_VAR)
+macro(iree_check_defined)
+  foreach(_VAR ${ARGN})
+    if(NOT DEFINED "${_VAR}")
+      message(SEND_ERROR "${_VAR} is not defined")
+    endif()
+  endforeach()
+endmacro()
+
+# iree_validate_required_arguments
+#
+# Validates that no arguments went unparsed or were given no values and that all
+# required arguments have values. Expects to be called after
+# cmake_parse_arguments and verifies that the variables it creates have been
+# populated as appropriate.
+function(iree_validate_required_arguments
+         PREFIX
+         REQUIRED_ONE_VALUE_KEYWORDS
+         REQUIRED_MULTI_VALUE_KEYWORDS)
+  if(DEFINED ${PREFIX}_UNPARSED_ARGUMENTS)
+    message(SEND_ERROR "Unparsed argument(s): '${${PREFIX}_UNPARSED_ARGUMENTS}'")
+  endif()
+  if(DEFINED ${PREFIX}_KEYWORDS_MISSING_VALUES)
+    message(SEND_ERROR
+            "No values for field(s) '${${PREFIX}_KEYWORDS_MISSING_VALUES}'")
+  endif()
+
+  foreach(_ONE_VALUE_KEYWORD IN LISTS REQUIRED_ONE_VALUE_KEYWORDS)
+    if(NOT DEFINED ${PREFIX}_${_ONE_VALUE_KEYWORD})
+      message(SEND_ERROR "Missing required argument ${_ONE_VALUE_KEYWORD}")
+    endif()
+  endforeach()
+
+  foreach(_MULTI_VALUE_KEYWORD IN LISTS REQUIRED_MULTI_VALUE_KEYWORDS)
+    if(NOT DEFINED ${PREFIX}_${_MULTI_VALUE_KEYWORD})
+      message(SEND_ERROR "Missing required argument ${_MULTI_VALUE_KEYWORD}")
+    endif()
+  endforeach()
+endfunction()
diff --git a/build_tools/cmake/iree_mlir_benchmark_suite.cmake b/build_tools/cmake/iree_mlir_benchmark_suite.cmake
index 8028809c8ce0..f2971bd99fc1 100644
--- a/build_tools/cmake/iree_mlir_benchmark_suite.cmake
+++ b/build_tools/cmake/iree_mlir_benchmark_suite.cmake
@@ -71,62 +71,44 @@ function(iree_mlir_benchmark_suite)
     "BENCHMARK_MODES;MODULES;TRANSLATION_FLAGS;RUNTIME_FLAGS"
   )
 
-  # All fields' names for each module.
-  set(_FIELD_NAMES "_MODULE_NAME" "_MODULE_TAGS"
-                   "_MLIR_SOURCE" "_ENTRY_FUNCTION" "_FUNCTION_INPUTS")
-  list(LENGTH _FIELD_NAMES _FIELD_COUNT)
-  math(EXPR _MAX_FIELD_INDEX "${_FIELD_COUNT} - 1")
-
-  # Make sure we have some multiple of six elements.
-  list(LENGTH _RULE_MODULES _MODULE_TOTAL_ELEMENT_COUNT)
-  math(EXPR _MODULE_COUNT
-       "${_MODULE_TOTAL_ELEMENT_COUNT} / ${_FIELD_COUNT}")
-  math(EXPR _MODULE_ELEMENT_REMAINDER
-       "${_MODULE_TOTAL_ELEMENT_COUNT} % ${_FIELD_COUNT}")
-  if(NOT ${_MODULE_ELEMENT_REMAINDER} EQUAL 0)
-    message(SEND_ERROR "MODULES expected to have some multiple of six "
-                       "elements; some module has missing/redundant fields.")
-  endif()
+  iree_validate_required_arguments(
+    _RULE
+    "DRIVER;TARGET_BACKEND;TARGET_ARCHITECTURE"
+    "BENCHMARK_MODES;MODULES"
+  )
 
-  # Loop over all modules to create targets.
-  math(EXPR _MAX_MODULE_INDEX "${_MODULE_COUNT} - 1")
-  foreach(_MODULE_INDEX RANGE 0 "${_MAX_MODULE_INDEX}")
-    # Loop over all elements for the current module and assign them to the
-    # corresponding field names for later use.
-    foreach(_FIELD_INDEX RANGE 0 "${_MAX_FIELD_INDEX}")
-      list(GET _FIELD_NAMES ${_FIELD_INDEX} _FIELD_NAME)
-      math(EXPR _INDEX "${_MODULE_INDEX} * ${_FIELD_COUNT} + ${_FIELD_INDEX}")
-      list(GET _RULE_MODULES ${_INDEX} ${_FIELD_NAME})
-    endforeach()
-
-    # Use the last directory's name as the category.
-    get_filename_component(_CATEGORY "${CMAKE_CURRENT_SOURCE_DIR}" NAME)
+  foreach(_MODULE IN LISTS _RULE_MODULES)
+    cmake_parse_arguments(
+      _MODULE
+      ""
+      "NAME;TAGS;MLIR_SOURCE;ENTRY_FUNCTION;FUNCTION_INPUTS"
+      ""
+      ${_MODULE}
+    )
+    iree_validate_required_arguments(
+      _MODULE
+      "NAME;TAGS;MLIR_SOURCE;ENTRY_FUNCTION;FUNCTION_INPUTS"
+      ""
+    )
 
-    # Generate all benchmarks to the root build directory. This helps for
-    # discovering them and execute them on devices.
+    get_filename_component(_CATEGORY "${CMAKE_CURRENT_SOURCE_DIR}" NAME)
     set(_ROOT_ARTIFACTS_DIR "${IREE_BINARY_DIR}/benchmark_suites/${_CATEGORY}")
     set(_VMFB_ARTIFACTS_DIR "${_ROOT_ARTIFACTS_DIR}/vmfb")
 
-    # The source file used to generate benchmark artifacts.
-    set(_SOURCE_FILE "${_MLIR_SOURCE}")
     # The CMake target's name if we need to download from the web.
     set(_DOWNLOAD_TARGET_NAME "")
 
-    # If the source file is from the web, create a custom command to download it.
-    # And wrap that with a custom target so later we can use for dependency.
+    # If the source file is from the web, create a custom command to download
+    # it and wrap that with a custom target so later we can use for dependency.
     #
     # Note: We actually should not do this; instead, we should directly compile
     # from the initial source (i.e., TensorFlow Python models). But that is
-    # tangled with the pending Python testing infrastructure revamp so we'd prefer
-    # to not do that right now.
-    if("${_MLIR_SOURCE}" MATCHES "^https?://")
+    # tangled with the pending Python testing infrastructure revamp so we'd
+    # prefer to not do that right now.
+    if("${_MODULE_MLIR_SOURCE}" MATCHES "^https?://")
       # Update the source file to the downloaded-to place.
-      string(REPLACE "/" ";" _SOURCE_URL_SEGMENTS "${_MLIR_SOURCE}")
-      # TODO: we can do `list(POP_BACK _SOURCE_URL_SEGMENTS _LAST_URL_SEGMENT)`
-      # after migrating to CMake 3.15.
-      list(LENGTH _SOURCE_URL_SEGMENTS _URL_SEGMENT_COUNT)
-      math(EXPR _SEGMENT_LAST_INDEX "${_URL_SEGMENT_COUNT} - 1")
-      list(GET _SOURCE_URL_SEGMENTS ${_SEGMENT_LAST_INDEX} _LAST_URL_SEGMENT)
+      string(REPLACE "/" ";" _SOURCE_URL_SEGMENTS "${_MODULE_MLIR_SOURCE}")
+      list(POP_BACK _SOURCE_URL_SEGMENTS _LAST_URL_SEGMENT)
       set(_DOWNLOAD_TARGET_NAME "iree-download-benchmark-source-${_LAST_URL_SEGMENT}")
 
       string(REPLACE "tar.gz" "mlir" _FILE_NAME "${_LAST_URL_SEGMENT}")
@@ -137,10 +119,10 @@ function(iree_mlir_benchmark_suite)
           OUTPUT "${_SOURCE_FILE}"
           COMMAND
             "${Python3_EXECUTABLE}" "${IREE_ROOT_DIR}/scripts/download_file.py"
-            "${_MLIR_SOURCE}" -o "${_ROOT_ARTIFACTS_DIR}"
+            "${_MODULE_MLIR_SOURCE}" -o "${_ROOT_ARTIFACTS_DIR}"
           DEPENDS
             "${IREE_ROOT_DIR}/scripts/download_file.py"
-          COMMENT "Downloading ${_MLIR_SOURCE}"
+          COMMENT "Downloading ${_MODULE_MLIR_SOURCE}"
         )
         add_custom_target("${_DOWNLOAD_TARGET_NAME}"
           DEPENDS "${_SOURCE_FILE}"
@@ -160,8 +142,8 @@ function(iree_mlir_benchmark_suite)
       string(REPLACE "," "-" _TAGS "${_MODULE_TAGS}")
       string(REPLACE "," "-" _MODE "${_BENCHMARK_MODE}")
       list(APPEND _COMMON_NAME_SEGMENTS
-           "${_TAGS}" "${_MODE}" "${_RULE_TARGET_BACKEND}"
-           "${_RULE_TARGET_ARCHITECTURE}")
+            "${_TAGS}" "${_MODE}" "${_RULE_TARGET_BACKEND}"
+            "${_RULE_TARGET_ARCHITECTURE}")
 
       # The full list of translation flags.
       set(_TRANSLATION_ARGS "--iree-mlir-to-vm-bytecode-module")
@@ -220,8 +202,8 @@ function(iree_mlir_benchmark_suite)
           "${Python3_EXECUTABLE}" "${IREE_ROOT_DIR}/scripts/generate_flagfile.py"
             --module_file="../../vmfb/compiled-${_VMFB_HASH}.vmfb"
             --driver=${_RULE_DRIVER}
-            --entry_function=${_ENTRY_FUNCTION}
-            --function_inputs=${_FUNCTION_INPUTS}
+            --entry_function=${_MODULE_ENTRY_FUNCTION}
+            --function_inputs=${_MODULE_FUNCTION_INPUTS}
             "${_ADDITIONAL_ARGS_CL}"
             -o "${_FLAG_FILE}"
         DEPENDS
@@ -241,5 +223,6 @@ function(iree_mlir_benchmark_suite)
       # Mark dependency so that we have one target to drive them all.
       add_dependencies(iree-benchmark-suites "${_FLAGFILE_GEN_TARGET_NAME}")
     endforeach(_BENCHMARK_MODE IN LISTS _RULE_BENCHMARK_MODES)
-  endforeach(_MODULE_INDEX RANGE 0 "${_MAX_MODULE_INDEX}")
-endfunction()
+
+  endforeach(_MODULE IN LISTS _RULE_MODULES)
+endfunction(iree_mlir_benchmark_suite)
diff --git a/build_tools/cmake/iree_run_binary_test.cmake b/build_tools/cmake/iree_run_binary_test.cmake
index c233760b1927..5d689004e749 100644
--- a/build_tools/cmake/iree_run_binary_test.cmake
+++ b/build_tools/cmake/iree_run_binary_test.cmake
@@ -74,8 +74,7 @@ function(iree_run_binary_test)
   endif()
 
   if(ANDROID)
-    set(_ANDROID_REL_DIR "${_PACKAGE_PATH}/${_RULE_NAME}")
-    set(_ANDROID_ABS_DIR "/data/local/tmp/${_ANDROID_REL_DIR}")
+    set(_ANDROID_ABS_DIR "/data/local/tmp/${_PACKAGE_PATH}/${_RULE_NAME}")
   endif()
 
   if (DEFINED _RULE_TEST_INPUT_FILE_ARG)
@@ -92,9 +91,6 @@ function(iree_run_binary_test)
   string(REGEX REPLACE "^::" "${_PACKAGE_NS}::" _TEST_BINARY_TARGET ${_RULE_TEST_BINARY})
 
   if(ANDROID)
-    set(_ANDROID_REL_DIR "${_PACKAGE_PATH}/${_RULE_NAME}")
-    set(_ANDROID_ABS_DIR "/data/local/tmp/${_ANDROID_REL_DIR}")
-
     # Define a custom target for pushing and running the test on Android device.
     set(_TEST_NAME ${_TEST_NAME}_on_android_device)
     add_test(
@@ -102,7 +98,7 @@ function(iree_run_binary_test)
         ${_TEST_NAME}
       COMMAND
         "${CMAKE_SOURCE_DIR}/build_tools/cmake/run_android_test.${IREE_HOST_SCRIPT_EXT}"
-        "${_ANDROID_REL_DIR}/$<TARGET_FILE_NAME:${_TEST_BINARY_TARGET}>"
+        "${_ANDROID_ABS_DIR}/$<TARGET_FILE_NAME:${_TEST_BINARY_TARGET}>"
         ${_RULE_ARGS}
     )
     # Use environment variables to instruct the script to push artifacts
diff --git a/build_tools/cmake/iree_third_party_cmake_options.cmake b/build_tools/cmake/iree_third_party_cmake_options.cmake
index 2efba4e42db9..37fd9da2508b 100644
--- a/build_tools/cmake/iree_third_party_cmake_options.cmake
+++ b/build_tools/cmake/iree_third_party_cmake_options.cmake
@@ -17,19 +17,6 @@ macro(iree_set_cpuinfo_cmake_options)
   set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "" FORCE)
 endmacro()
 
-macro(iree_set_flatcc_cmake_options)
-  set(FLATCC_TEST OFF CACHE BOOL "" FORCE)
-  set(FLATCC_CXX_TEST OFF CACHE BOOL "" FORCE)
-  set(FLATCC_REFLECTION OFF CACHE BOOL "" FORCE)
-  set(FLATCC_ALLOW_WERROR OFF CACHE BOOL "" FORCE)
-
-  if(CMAKE_CROSSCOMPILING)
-    set(FLATCC_RTONLY ON CACHE BOOL "" FORCE)
-  else()
-    set(FLATCC_RTONLY OFF CACHE BOOL "" FORCE)
-  endif()
-endmacro()
-
 macro(iree_set_googletest_cmake_options)
   set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
   set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
diff --git a/build_tools/cmake/rebuild.sh b/build_tools/cmake/rebuild.sh
index 2e202457b967..68e94200f089 100755
--- a/build_tools/cmake/rebuild.sh
+++ b/build_tools/cmake/rebuild.sh
@@ -41,6 +41,13 @@ CMAKE_ARGS=(
   # Enable building the python bindings on CI. Most heavy targets are gated on
   # IREE_ENABLE_TENSORFLOW, so what's left here should be fast.
   "-DIREE_BUILD_PYTHON_BINDINGS=ON"
+
+  # Enable assertions. We wouldn't want to be testing *only* with assertions
+  # enabled, but at the moment only certain CI builds are using this script,
+  # e.g. ASan builds are not using this, so by enabling assertions here, we
+  # get a reasonable mix of {builds with asserts, builds with other features
+  # such as ASan but without asserts}.
+  "-DIREE_ENABLE_ASSERTIONS=ON"
 )
 
 "$CMAKE_BIN" "${CMAKE_ARGS[@]?}" ..
diff --git a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh
index 3c6a6158b02d..1d9512db0484 100755
--- a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh
@@ -100,6 +100,9 @@ label_exclude_regex="($(IFS="|" ; echo "${label_exclude_args[*]?}"))"
 # These tests currently have asan failures
 # TODO(#5715): Fix these
 declare -a excluded_tests=(
+  "iree/base/internal/file_io_test"
+  "iree/samples/static_library/static_library_demo_test"
+  "bindings/tflite/smoke_test"
   "iree/hal/cts/allocator_test"
   "iree/hal/cts/buffer_mapping_test"
   "iree/hal/cts/command_buffer_test"
@@ -107,10 +110,9 @@ declare -a excluded_tests=(
   "iree/hal/cts/driver_test"
   "iree/hal/cts/event_test"
   "iree/hal/cts/executable_layout_test"
-  "iree/hal/cts/semaphore_test"
   "iree/hal/cts/semaphore_submission_test"
+  "iree/hal/cts/semaphore_test"
   "iree/modules/check/check_test"
-  "bindings/tflite/smoke_test"
   "iree/samples/simple_embedding/simple_embedding_vulkan_test"
 )
 
diff --git a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh
index c4e0084c4603..4af8334c0970 100755
--- a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh
@@ -25,7 +25,7 @@ docker_setup
 
 docker run "${DOCKER_RUN_ARGS[@]?}" \
   gcr.io/iree-oss/cmake-swiftshader@sha256:031aded9cd66d30fcfa4dabea05a69721f33239516bc2e10ca216afd9ae4c012 \
-  build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build.sh
+  build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh
 
 # Kokoro will rsync this entire directory back to the executor orchestrating the
 # build which takes forever and is totally useless.
diff --git a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/common.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/common.cfg
index de6fb31e1e2e..73224e5e83f2 100644
--- a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/common.cfg
+++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/common.cfg
@@ -6,4 +6,4 @@
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-build_file: "iree/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build_kokoro.sh"
+build_file: "iree/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh"
diff --git a/build_tools/third_party/flatcc/CMakeLists.txt b/build_tools/third_party/flatcc/CMakeLists.txt
index f707b567f03b..74caf63763c1 100644
--- a/build_tools/third_party/flatcc/CMakeLists.txt
+++ b/build_tools/third_party/flatcc/CMakeLists.txt
@@ -85,14 +85,35 @@ external_cc_library(
 if(NOT CMAKE_CROSSCOMPILING)
   add_executable(iree-flatcc-cli
     "${FLATCC_ROOT}/src/cli/flatcc_cli.c"
+    "${FLATCC_ROOT}/external/hash/cmetrohash64.c"
+    "${FLATCC_ROOT}/external/hash/str_set.c"
+    "${FLATCC_ROOT}/external/hash/ptr_set.c"
+    "${FLATCC_ROOT}/src/compiler/hash_tables/symbol_table.c"
+    "${FLATCC_ROOT}/src/compiler/hash_tables/scope_table.c"
+    "${FLATCC_ROOT}/src/compiler/hash_tables/name_table.c"
+    "${FLATCC_ROOT}/src/compiler/hash_tables/schema_table.c"
+    "${FLATCC_ROOT}/src/compiler/hash_tables/value_set.c"
+    "${FLATCC_ROOT}/src/compiler/fileio.c"
+    "${FLATCC_ROOT}/src/compiler/parser.c"
+    "${FLATCC_ROOT}/src/compiler/semantics.c"
+    "${FLATCC_ROOT}/src/compiler/coerce.c"
+    "${FLATCC_ROOT}/src/compiler/codegen_schema.c"
+    "${FLATCC_ROOT}/src/compiler/flatcc.c"
+    "${FLATCC_ROOT}/src/compiler/codegen_c.c"
+    "${FLATCC_ROOT}/src/compiler/codegen_c_reader.c"
+    "${FLATCC_ROOT}/src/compiler/codegen_c_sort.c"
+    "${FLATCC_ROOT}/src/compiler/codegen_c_builder.c"
+    "${FLATCC_ROOT}/src/compiler/codegen_c_verifier.c"
+    "${FLATCC_ROOT}/src/compiler/codegen_c_sorter.c"
+    "${FLATCC_ROOT}/src/compiler/codegen_c_json_parser.c"
+    "${FLATCC_ROOT}/src/compiler/codegen_c_json_printer.c"
+    "${FLATCC_ROOT}/src/runtime/builder.c"
+    "${FLATCC_ROOT}/src/runtime/emitter.c"
+    "${FLATCC_ROOT}/src/runtime/refmap.c"
   )
-
-  target_link_libraries(iree-flatcc-cli
-    flatcc
-  )
-
   target_include_directories(iree-flatcc-cli SYSTEM
     PUBLIC
+      "${FLATCC_ROOT}/external"
       "${FLATCC_ROOT}/include"
       "${FLATCC_ROOT}/config"
   )
diff --git a/docs/website/docs/building-from-source/optional-features.md b/docs/website/docs/building-from-source/optional-features.md
index 216f63e8f6df..b29458608923 100644
--- a/docs/website/docs/building-from-source/optional-features.md
+++ b/docs/website/docs/building-from-source/optional-features.md
@@ -79,7 +79,8 @@ package manager ([about](https://docs.python.org/3/library/venv.html),
     python -m pip install -r bindings\python\build_requirements.txt
     ```
 
-When done, close your shell or run `deactivate`.
+When you are done with the venv, you can close it by closing your shell
+or running `deactivate`.
 
 ### Usage
 
diff --git a/integrations/tensorflow/CMakeLists.txt b/integrations/tensorflow/CMakeLists.txt
index 777bf753b35b..71c188ee396d 100644
--- a/integrations/tensorflow/CMakeLists.txt
+++ b/integrations/tensorflow/CMakeLists.txt
@@ -8,57 +8,10 @@
 # dependent code under this directory tree. The CMake support is limited to
 # compiler binaries and python bindings.
 #
-# Bazel is a beast that likes to be the center of the universe. There is some
-# fragility in delegating to it in this fashion.
-#
 # If this directory is included, then building TensorFlow is assumed (the
 # config option happens at the higher level).
 
-set(_bazel_targets)
-set(_executable_paths)
-
-set(IREE_TF_TOOLS_ROOT
-    "${CMAKE_SOURCE_DIR}/integrations/tensorflow/bazel-bin/iree_tf_compiler"
-    CACHE STRING "Root directory for IREE TensorFlow integration binaries")
-
-
-if(${IREE_BUILD_TENSORFLOW_COMPILER} OR ${IREE_BUILD_TENSORFLOW_ALL})
-  add_executable(iree_tf_compiler_iree-import-tf IMPORTED GLOBAL)
-  set_property(TARGET iree_tf_compiler_iree-import-tf
-    PROPERTY IMPORTED_LOCATION
-        "${IREE_TF_TOOLS_ROOT}/iree-import-tf"
-  )
-endif()
-
-if(${IREE_BUILD_TFLITE_COMPILER} OR ${IREE_BUILD_TENSORFLOW_ALL})
-  add_executable(iree_tf_compiler_iree-import-tflite IMPORTED GLOBAL)
-  set_property(TARGET iree_tf_compiler_iree-import-tflite
-    PROPERTY IMPORTED_LOCATION
-        "${IREE_TF_TOOLS_ROOT}/iree-import-tflite"
-  )
-endif()
-
-if(${IREE_BUILD_XLA_COMPILER} OR ${IREE_BUILD_TENSORFLOW_ALL})
-  add_executable(iree_tf_compiler_iree-import-xla IMPORTED GLOBAL)
-  set_property(TARGET iree_tf_compiler_iree-import-xla
-    PROPERTY IMPORTED_LOCATION
-        "${IREE_TF_TOOLS_ROOT}/iree-import-xla"
-  )
-endif()
-
-if(${IREE_BUILD_TESTS})
-  add_executable(iree_tf_compiler_iree-tf-opt IMPORTED GLOBAL)
-  set_property(TARGET iree_tf_compiler_iree-tf-opt
-    PROPERTY IMPORTED_LOCATION
-        "${IREE_TF_TOOLS_ROOT}/iree-tf-opt"
-  )
-
-  add_executable(iree_tf_compiler_iree-opt-tflite IMPORTED GLOBAL)
-  set_property(TARGET iree_tf_compiler_iree-opt-tflite
-    PROPERTY IMPORTED_LOCATION
-        "${IREE_TF_TOOLS_ROOT}/iree-opt-tflite"
-  )
-endif()
+add_subdirectory(iree_tf_compiler)
 
 if(${IREE_BUILD_PYTHON_BINDINGS})
   add_subdirectory(bindings/python)
diff --git a/integrations/tensorflow/bindings/python/iree/tools/tf/CMakeLists.txt b/integrations/tensorflow/bindings/python/iree/tools/tf/CMakeLists.txt
index 0cd23102a47d..77fd6c117012 100644
--- a/integrations/tensorflow/bindings/python/iree/tools/tf/CMakeLists.txt
+++ b/integrations/tensorflow/bindings/python/iree/tools/tf/CMakeLists.txt
@@ -14,12 +14,12 @@ iree_py_library(
     tf
   SRCS ${_srcs}
   DEPS
-  iree_tf_compiler_iree-import-tf
+  integrations::tensorflow::iree_tf_compiler::iree-import-tf
 )
 
 iree_symlink_tool(
   TARGET tf
-  FROM_TOOL_TARGET iree_tf_compiler_iree-import-tf
+  FROM_TOOL_TARGET integrations::tensorflow::iree_tf_compiler::iree-import-tf
   TO_EXE_NAME iree-import-tf
 )
 
@@ -29,13 +29,13 @@ iree_py_install_package(
   MODULE_PATH iree/tools/tf
   FILES_MATCHING ${_srcs}
   DEPS
-    iree_tf_compiler_iree-import-tf
+    integrations::tensorflow::iree_tf_compiler::iree-import-tf
 )
 
 # Since imported, need to resolve the TARGET_FILE ourselves instead of
 # install TARGETS form.
 install(
-  PROGRAMS "$<TARGET_FILE:iree_tf_compiler_iree-import-tf>"
+  PROGRAMS "$<TARGET_FILE:integrations::tensorflow::iree_tf_compiler::iree-import-tf>"
   DESTINATION "${PY_INSTALL_MODULE_DIR}"
   COMPONENT "${PY_INSTALL_COMPONENT}"
 )
diff --git a/integrations/tensorflow/bindings/python/iree/tools/tflite/CMakeLists.txt b/integrations/tensorflow/bindings/python/iree/tools/tflite/CMakeLists.txt
index 69d301f7ab27..d5cbb650ea71 100644
--- a/integrations/tensorflow/bindings/python/iree/tools/tflite/CMakeLists.txt
+++ b/integrations/tensorflow/bindings/python/iree/tools/tflite/CMakeLists.txt
@@ -14,12 +14,12 @@ iree_py_library(
     tflite
   SRCS ${_srcs}
   DEPS
-    iree_tf_compiler_iree-import-tflite
+    integrations::tensorflow::iree_tf_compiler::iree-import-tflite
 )
 
 iree_symlink_tool(
   TARGET tflite
-  FROM_TOOL_TARGET iree_tf_compiler_iree-import-tflite
+  FROM_TOOL_TARGET integrations::tensorflow::iree_tf_compiler::iree-import-tflite
   TO_EXE_NAME iree-import-tflite
 )
 
@@ -29,13 +29,13 @@ iree_py_install_package(
   MODULE_PATH iree/tools/tflite
   FILES_MATCHING ${_srcs}
   DEPS
-    iree_tf_compiler_iree-import-tflite
+    integrations::tensorflow::iree_tf_compiler::iree-import-tflite
 )
 
 # Since imported, need to resolve the TARGET_FILE ourselves instead of
 # install TARGETS form.
 install(
-  PROGRAMS "$<TARGET_FILE:iree_tf_compiler_iree-import-tflite>"
+  PROGRAMS "$<TARGET_FILE:integrations::tensorflow::iree_tf_compiler::iree-import-tflite>"
   DESTINATION "${PY_INSTALL_MODULE_DIR}"
   COMPONENT "${PY_INSTALL_COMPONENT}"
 )
diff --git a/integrations/tensorflow/bindings/python/iree/tools/xla/CMakeLists.txt b/integrations/tensorflow/bindings/python/iree/tools/xla/CMakeLists.txt
index 8e7133d600b3..637bb8179abb 100644
--- a/integrations/tensorflow/bindings/python/iree/tools/xla/CMakeLists.txt
+++ b/integrations/tensorflow/bindings/python/iree/tools/xla/CMakeLists.txt
@@ -14,12 +14,12 @@ iree_py_library(
     xla
   SRCS ${_srcs}
   DEPS
-    iree_tf_compiler_iree-import-xla
+    integrations::tensorflow::iree_tf_compiler::iree-import-xla
 )
 
 iree_symlink_tool(
   TARGET xla
-  FROM_TOOL_TARGET iree_tf_compiler_iree-import-xla
+  FROM_TOOL_TARGET integrations::tensorflow::iree_tf_compiler::iree-import-xla
   TO_EXE_NAME iree-import-xla
 )
 
@@ -29,13 +29,13 @@ iree_py_install_package(
   MODULE_PATH iree/tools/xla
   FILES_MATCHING ${_srcs}
   DEPS
-    iree_tf_compiler_iree-import-xla
+    integrations::tensorflow::iree_tf_compiler::iree-import-xla
 )
 
 # Since imported, need to resolve the TARGET_FILE ourselves instead of
 # install TARGETS form.
 install(
-  PROGRAMS "$<TARGET_FILE:iree_tf_compiler_iree-import-xla>"
+  PROGRAMS "$<TARGET_FILE:integrations::tensorflow::iree_tf_compiler::iree-import-xla>"
   DESTINATION "${PY_INSTALL_MODULE_DIR}"
   COMPONENT "${PY_INSTALL_COMPONENT}"
 )
diff --git a/integrations/tensorflow/iree_tf_compiler/CMakeLists.txt b/integrations/tensorflow/iree_tf_compiler/CMakeLists.txt
new file mode 100644
index 000000000000..0e3e42b4c018
--- /dev/null
+++ b/integrations/tensorflow/iree_tf_compiler/CMakeLists.txt
@@ -0,0 +1,40 @@
+# Copyright 2020 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+set(IREE_TF_TOOLS_ROOT
+    "${CMAKE_SOURCE_DIR}/integrations/tensorflow/bazel-bin/iree_tf_compiler"
+    CACHE STRING "Root directory for IREE TensorFlow integration binaries")
+
+iree_package_name(_PACKAGE_NAME)
+iree_package_ns(_PACKAGE_NS)
+
+function(configure_tf_binary BINARY_NAME)
+  set(_NAME "${_PACKAGE_NAME}_${BINARY_NAME}")
+  add_executable("${_NAME}" IMPORTED GLOBAL)
+  set_property(TARGET ${_NAME}
+    PROPERTY IMPORTED_LOCATION
+        "${IREE_TF_TOOLS_ROOT}/${BINARY_NAME}"
+  )
+  add_executable(${_PACKAGE_NS}::${BINARY_NAME} ALIAS ${_NAME})
+  add_executable(${BINARY_NAME} ALIAS ${_NAME})
+endfunction()
+
+if(${IREE_BUILD_TENSORFLOW_COMPILER})
+  configure_tf_binary("iree-import-tf")
+endif()
+
+if(${IREE_BUILD_TFLITE_COMPILER})
+  configure_tf_binary("iree-import-tflite")
+endif()
+
+if(${IREE_BUILD_XLA_COMPILER})
+  configure_tf_binary("iree-import-xla")
+endif()
+
+if(${IREE_BUILD_TESTS})
+  configure_tf_binary("iree-tf-opt")
+  configure_tf_binary("iree-opt-tflite")
+endif()
diff --git a/iree/base/status.h b/iree/base/status.h
index 790c29797e9c..81baad3a8004 100644
--- a/iree/base/status.h
+++ b/iree/base/status.h
@@ -230,6 +230,8 @@ typedef struct iree_status_handle_t* iree_status_t;
 #if IREE_STATUS_FEATURES == 0
 #define IREE_STATUS_IMPL_MAKE_(code, ...) \
   (iree_status_t)(uintptr_t)((code)&IREE_STATUS_CODE_MASK)
+#define IREE_STATUS_IMPL_MAKE_LOC_(file, line, code, ...) \
+  IREE_STATUS_IMPL_MAKE_(code)
 #undef IREE_STATUS_IMPL_RETURN_IF_API_ERROR_
 #define IREE_STATUS_IMPL_RETURN_IF_API_ERROR_(var, ...)                      \
   iree_status_t var = (IREE_STATUS_IMPL_IDENTITY_(                           \
@@ -254,6 +256,8 @@ typedef struct iree_status_handle_t* iree_status_t;
 #else
 #define IREE_STATUS_IMPL_MAKE_(...) \
   IREE_STATUS_IMPL_MAKE_SWITCH_(__FILE__, __LINE__, __VA_ARGS__)
+#define IREE_STATUS_IMPL_MAKE_LOC_(file, line, ...) \
+  IREE_STATUS_IMPL_MAKE_SWITCH_(file, line, __VA_ARGS__)
 #endif  // !IREE_STATUS_FEATURES
 
 // Returns an IREE_STATUS_OK.
@@ -271,6 +275,15 @@ typedef struct iree_status_handle_t* iree_status_t;
 //  return iree_make_status(IREE_STATUS_CANCELLED, "because %d > %d", a, b);
 #define iree_make_status IREE_STATUS_IMPL_MAKE_
 
+// Makes an iree_status_t with the given iree_status_code_t code using the given
+// source location. Besides taking the file and line of the source location this
+// is the same as iree_make_status.
+//
+// Examples:
+//  return iree_make_status_with_location(
+//      "file.c", 40, IREE_STATUS_CANCELLED, "because %d > %d", a, b);
+#define iree_make_status_with_location IREE_STATUS_IMPL_MAKE_LOC_
+
 // Propagates the error returned by (expr) by returning from the current
 // function on non-OK status. Optionally annotates the status with additional
 // information (see iree_status_annotate for more information).
diff --git a/iree/compiler/Codegen/Common/BUILD b/iree/compiler/Codegen/Common/BUILD
index 6f71b73b74a9..0593b9b1b5bb 100644
--- a/iree/compiler/Codegen/Common/BUILD
+++ b/iree/compiler/Codegen/Common/BUILD
@@ -46,6 +46,7 @@ cc_library(
     deps = [
         "//iree/compiler/Codegen:PassHeaders",
         "//iree/compiler/Codegen/Common:FoldTensorExtractOpIncGen",
+        "//iree/compiler/Codegen/Dialect:IREECodegenDialect",
         "//iree/compiler/Codegen/Transforms",
         "//iree/compiler/Codegen/Utils",
         "//iree/compiler/Dialect/Flow/IR",
diff --git a/iree/compiler/Codegen/Common/CMakeLists.txt b/iree/compiler/Codegen/Common/CMakeLists.txt
index 5e77f084ba4b..ce29221d09ec 100644
--- a/iree/compiler/Codegen/Common/CMakeLists.txt
+++ b/iree/compiler/Codegen/Common/CMakeLists.txt
@@ -56,6 +56,7 @@ iree_cc_library(
     MLIRTransforms
     MLIRVector
     iree::compiler::Codegen::Common::FoldTensorExtractOpIncGen
+    iree::compiler::Codegen::Dialect::IREECodegenDialect
     iree::compiler::Codegen::PassHeaders
     iree::compiler::Codegen::Transforms
     iree::compiler::Codegen::Utils
diff --git a/iree/compiler/Codegen/Common/SetNumWorkgroupsPass.cpp b/iree/compiler/Codegen/Common/SetNumWorkgroupsPass.cpp
index 0f70605ec154..06f3f51ff1ca 100644
--- a/iree/compiler/Codegen/Common/SetNumWorkgroupsPass.cpp
+++ b/iree/compiler/Codegen/Common/SetNumWorkgroupsPass.cpp
@@ -4,14 +4,13 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
 #include "iree/compiler/Codegen/Transforms/Transforms.h"
-#include "iree/compiler/Codegen/Utils/Utils.h"
 #include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
 #include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
@@ -90,14 +89,9 @@ void SetNumWorkgroupsPass::runOnOperation() {
     if (!workloadPerWorkgroup.empty()) {
       currWorkloadPerWorkgroup.assign(workloadPerWorkgroup.begin(),
                                       workloadPerWorkgroup.end());
-    } else if (IREE::HAL::TranslationInfo translationInfo =
+    } else if (IREE::Codegen::TranslationInfoAttr translationInfo =
                    getTranslationInfo(entryPointOp)) {
-      if (ArrayAttr workloadPerWorkgroupAttr =
-              translationInfo.workloadPerWorkgroup()) {
-        currWorkloadPerWorkgroup = llvm::to_vector<4>(llvm::map_range(
-            workloadPerWorkgroupAttr,
-            [](Attribute attr) { return attr.cast<IntegerAttr>().getInt(); }));
-      }
+      currWorkloadPerWorkgroup = translationInfo.getWorkloadPerWorkgroupVals();
     }
 
     if (!currWorkloadPerWorkgroup.empty()) {
diff --git a/iree/compiler/Codegen/Common/VectorizeMMT4d.cpp b/iree/compiler/Codegen/Common/VectorizeMMT4d.cpp
index 34340efeeb6b..c7db560c70bf 100644
--- a/iree/compiler/Codegen/Common/VectorizeMMT4d.cpp
+++ b/iree/compiler/Codegen/Common/VectorizeMMT4d.cpp
@@ -14,6 +14,23 @@ namespace iree_compiler {
 
 namespace {
 
+Value promoteVector(Location loc, Value inputVector, Type promotedElementType,
+                    PatternRewriter &rewriter) {
+  VectorType inputVectorType = inputVector.getType().cast<VectorType>();
+  if (inputVectorType.getElementType() == promotedElementType) {
+    return inputVector;
+  } else {
+    auto promotedVectorType = inputVectorType.clone(promotedElementType);
+    if (promotedElementType.isIntOrIndex()) {
+      return rewriter.create<arith::ExtSIOp>(loc, inputVector,
+                                             promotedVectorType);
+    } else {
+      return rewriter.create<arith::ExtFOp>(loc, inputVector,
+                                            promotedVectorType);
+    }
+  }
+}
+
 /// Converts linalg.mmt4d into vector.contract.
 /// This converts linalg.mmt4d with operands <1x1xM0xK0>, <1x1xK0xN0>
 /// to vector.contract where K0 is the contraction dimension.
@@ -22,12 +39,13 @@ struct VectorizeMMT4DOp : public OpRewritePattern<linalg::Mmt4DOp> {
 
   LogicalResult matchAndRewrite(linalg::Mmt4DOp mmt4DOp,
                                 PatternRewriter &rewriter) const override {
-    auto lhs = mmt4DOp.inputs()[0];
-    auto rhs = mmt4DOp.inputs()[1];
-    auto dst = mmt4DOp.outputs()[0];
+    Value lhs = mmt4DOp.inputs()[0];
+    Value rhs = mmt4DOp.inputs()[1];
+    Value dst = mmt4DOp.outputs()[0];
 
-    auto lhsType = lhs.getType().dyn_cast<ShapedType>();
-    auto rhsType = rhs.getType().dyn_cast<ShapedType>();
+    ShapedType lhsType = lhs.getType().dyn_cast<ShapedType>();
+    ShapedType rhsType = rhs.getType().dyn_cast<ShapedType>();
+    ShapedType dstType = dst.getType().dyn_cast<ShapedType>();
 
     // This pattern expects tensors of static shapes.
     // In practice, dynamic shapes are meant to be handled by other passes,
@@ -55,16 +73,20 @@ struct VectorizeMMT4DOp : public OpRewritePattern<linalg::Mmt4DOp> {
     int N0 = rhsType.getShape()[2];
     int K0 = lhsType.getShape()[3];
 
-    auto loc = mmt4DOp.getLoc();
-    auto c0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Location loc = mmt4DOp.getLoc();
+    Value c0 = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+
+    Type lhsElementType = lhsType.getElementType();
+    Type rhsElementType = rhsType.getElementType();
+    Type dstElementType = dstType.getElementType();
 
-    auto lhsVecType = VectorType::get({1, 1, M0, K0}, rewriter.getF32Type());
-    auto rhsVecType = VectorType::get({1, 1, N0, K0}, rewriter.getF32Type());
-    auto dstVecType = VectorType::get({1, 1, M0, N0}, rewriter.getF32Type());
+    auto lhsVecType = VectorType::get({1, 1, M0, K0}, lhsElementType);
+    auto rhsVecType = VectorType::get({1, 1, N0, K0}, rhsElementType);
+    auto dstVecType = VectorType::get({1, 1, M0, N0}, dstElementType);
 
-    auto lhsVecType2D = VectorType::get({M0, K0}, rewriter.getF32Type());
-    auto rhsVecType2D = VectorType::get({N0, K0}, rewriter.getF32Type());
-    auto dstVecType2D = VectorType::get({M0, N0}, rewriter.getF32Type());
+    auto lhsVecType2D = VectorType::get({M0, K0}, lhsElementType);
+    auto rhsVecType2D = VectorType::get({N0, K0}, rhsElementType);
+    auto dstVecType2D = VectorType::get({M0, N0}, dstElementType);
 
     auto identityMap = rewriter.getMultiDimIdentityMap(4);
 
@@ -84,6 +106,14 @@ struct VectorizeMMT4DOp : public OpRewritePattern<linalg::Mmt4DOp> {
     Value dstVec2D =
         rewriter.create<vector::ShapeCastOp>(loc, dstVecType2D, dstVec);
 
+    // Promote, if needed, the element type in the lhs and rhs vectors to
+    // match the dst vector, so that the vector.contract below will involve
+    // only one element type. This is in line with planned design, see
+    // the closing comment on https://reviews.llvm.org/D112508 where the
+    // alternative of using mixed types was considered.
+    Value promLhsVec2d = promoteVector(loc, lhsVec2D, dstElementType, rewriter);
+    Value promRhsVec2d = promoteVector(loc, rhsVec2D, dstElementType, rewriter);
+
     // Generate the vector.contract on 2D vectors replacing the mmt4d op.
     auto m = rewriter.getAffineDimExpr(0);
     auto n = rewriter.getAffineDimExpr(1);
@@ -96,7 +126,7 @@ struct VectorizeMMT4DOp : public OpRewritePattern<linalg::Mmt4DOp> {
         {getParallelIteratorTypeName(), getParallelIteratorTypeName(),
          getReductionIteratorTypeName()});
     Value contractResult = rewriter.create<vector::ContractionOp>(
-        loc, lhsVec2D, rhsVec2D, dstVec2D, indexingMaps, iterators);
+        loc, promLhsVec2d, promRhsVec2d, dstVec2D, indexingMaps, iterators);
 
     // Convert the output vector from 2D shape (M0xN0) to 4D shape (1x1xM0xN0)
     Value contractResult4D =
diff --git a/iree/compiler/Codegen/Dialect/BUILD b/iree/compiler/Codegen/Dialect/BUILD
new file mode 100644
index 000000000000..9c6008b7ec0c
--- /dev/null
+++ b/iree/compiler/Codegen/Dialect/BUILD
@@ -0,0 +1,107 @@
+# Copyright 2019 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+load("@llvm-project//mlir:tblgen.bzl", "gentbl_cc_library", "td_library")
+load("//build_tools/bazel:enforce_glob.bzl", "enforce_glob")
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+exports_files([
+    "IREECodegenAttributes.td",
+    "IREECodegenDialect.td",
+    "LoweringConfig.td",
+])
+
+td_library(
+    name = "td_files",
+    srcs = enforce_glob(
+        [
+            "IREECodegenAttributes.td",
+            "IREECodegenDialect.td",
+            "LoweringConfig.td",
+        ],
+        include = ["*.td"],
+    ),
+    deps = [
+        "@llvm-project//mlir:OpBaseTdFiles",
+    ],
+)
+
+cc_library(
+    name = "IREECodegenDialect",
+    srcs = [
+        "IREECodegenDialect.cpp",
+        "LoweringConfig.cpp",
+    ],
+    hdrs = [
+        "IREECodegenDialect.h",
+        "LoweringConfig.h",
+    ],
+    textual_hdrs = [
+        "IREECodegenDialect.cpp.inc",
+        "IREECodegenDialect.h.inc",
+        "LoweringConfig.cpp.inc",
+        "LoweringConfig.h.inc",
+        "LoweringConfigEnums.cpp.inc",
+        "LoweringConfigEnums.h.inc",
+    ],
+    deps = [
+        ":IREECodegenDialectGen",
+        ":LoweringConfigGen",
+        "//iree/compiler/Codegen/Utils",
+        "@llvm-project//llvm:Support",
+        "@llvm-project//mlir:DialectUtils",
+        "@llvm-project//mlir:IR",
+        "@llvm-project//mlir:Parser",
+        "@llvm-project//mlir:StandardOps",
+    ],
+)
+
+gentbl_cc_library(
+    name = "IREECodegenDialectGen",
+    tbl_outs = [
+        (
+            ["-gen-dialect-decls"],
+            "IREECodegenDialect.h.inc",
+        ),
+        (
+            ["-gen-dialect-defs"],
+            "IREECodegenDialect.cpp.inc",
+        ),
+    ],
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
+    td_file = "IREECodegenAttributes.td",
+    deps = [":td_files"],
+)
+
+gentbl_cc_library(
+    name = "LoweringConfigGen",
+    tbl_outs = [
+        (
+            ["-gen-attrdef-decls"],
+            "LoweringConfig.h.inc",
+        ),
+        (
+            ["-gen-attrdef-defs"],
+            "LoweringConfig.cpp.inc",
+        ),
+        (
+            ["-gen-enum-decls"],
+            "LoweringConfigEnums.h.inc",
+        ),
+        (
+            ["-gen-enum-defs"],
+            "LoweringConfigEnums.cpp.inc",
+        ),
+    ],
+    tblgen = "@llvm-project//mlir:mlir-tblgen",
+    td_file = "LoweringConfig.td",
+    deps = [":td_files"],
+)
diff --git a/iree/compiler/Codegen/Dialect/CMakeLists.txt b/iree/compiler/Codegen/Dialect/CMakeLists.txt
new file mode 100644
index 000000000000..16f6826cb558
--- /dev/null
+++ b/iree/compiler/Codegen/Dialect/CMakeLists.txt
@@ -0,0 +1,62 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
+# iree/compiler/Codegen/Dialect/BUILD                                          #
+#                                                                              #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
+# CMake-only content.                                                          #
+#                                                                              #
+# To disable autogeneration for this file entirely, delete this header.        #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_cc_library(
+  NAME
+    IREECodegenDialect
+  HDRS
+    "IREECodegenDialect.h"
+    "LoweringConfig.h"
+  TEXTUAL_HDRS
+    "IREECodegenDialect.cpp.inc"
+    "IREECodegenDialect.h.inc"
+    "LoweringConfig.cpp.inc"
+    "LoweringConfig.h.inc"
+    "LoweringConfigEnums.cpp.inc"
+    "LoweringConfigEnums.h.inc"
+  SRCS
+    "IREECodegenDialect.cpp"
+    "LoweringConfig.cpp"
+  DEPS
+    ::IREECodegenDialectGen
+    ::LoweringConfigGen
+    LLVMSupport
+    MLIRIR
+    MLIRParser
+    MLIRStandard
+    iree::compiler::Codegen::Utils
+  PUBLIC
+)
+
+iree_tablegen_library(
+  NAME
+    IREECodegenDialectGen
+  TD_FILE
+    "IREECodegenAttributes.td"
+  OUTS
+    -gen-dialect-decls IREECodegenDialect.h.inc
+    -gen-dialect-defs IREECodegenDialect.cpp.inc
+)
+
+iree_tablegen_library(
+  NAME
+    LoweringConfigGen
+  TD_FILE
+    "LoweringConfig.td"
+  OUTS
+    -gen-attrdef-decls LoweringConfig.h.inc
+    -gen-attrdef-defs LoweringConfig.cpp.inc
+    -gen-enum-decls LoweringConfigEnums.h.inc
+    -gen-enum-defs LoweringConfigEnums.cpp.inc
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/compiler/Codegen/Dialect/IREECodegenAttributes.td b/iree/compiler/Codegen/Dialect/IREECodegenAttributes.td
new file mode 100644
index 000000000000..e5f8b2607de0
--- /dev/null
+++ b/iree/compiler/Codegen/Dialect/IREECodegenAttributes.td
@@ -0,0 +1,14 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_CODEGEN_DIALECT_IREECODEGEN_ATTRIBUTES
+#define IREE_CODEGEN_DIALECT_IREECODEGEN_ATTRIBUTES
+
+include "iree/compiler/Codegen/Dialect/IREECodegenDialect.td"
+include "iree/compiler/Codegen/Dialect/LoweringConfig.td"
+
+
+#endif // IREE_CODEGEN_DIALECT_IREECODEGEN_ATTRIBUTES
diff --git a/iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp b/iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp
new file mode 100644
index 000000000000..8f2cf4769979
--- /dev/null
+++ b/iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp
@@ -0,0 +1,62 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
+
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp.inc"
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
+#include "mlir/IR/DialectImplementation.h"
+
+namespace mlir {
+namespace iree_compiler {
+namespace IREE {
+namespace Codegen {
+
+struct IREECodegenDialectOpAsmInterface : public OpAsmDialectInterface {
+  using OpAsmDialectInterface::OpAsmDialectInterface;
+  AliasResult getAlias(Attribute attr, raw_ostream &os) const override {
+    if (attr.isa<TranslationInfoAttr>()) {
+      os << "translation";
+      return AliasResult::OverridableAlias;
+    } else if (attr.isa<CompilationInfoAttr>()) {
+      os << "compilation";
+      return AliasResult::OverridableAlias;
+    } else if (attr.isa<LoweringConfigAttr>()) {
+      os << "config";
+      return AliasResult::OverridableAlias;
+    }
+    return AliasResult::NoAlias;
+  }
+};
+
+void IREECodegenDialect::initialize() {
+  initializeCodegenAttrs();
+  addInterfaces<IREECodegenDialectOpAsmInterface>();
+}
+
+Attribute IREECodegenDialect::parseAttribute(DialectAsmParser &parser,
+                                             Type type) const {
+  StringRef mnemonic;
+  if (failed(parser.parseKeyword(&mnemonic))) return {};
+  Attribute genAttr;
+  OptionalParseResult parseResult =
+      parseCodegenAttrs(parser, mnemonic, type, genAttr);
+  if (parseResult.hasValue()) return genAttr;
+  parser.emitError(parser.getNameLoc(), "unknown iree_codegen attribute");
+  return Attribute();
+}
+
+void IREECodegenDialect::printAttribute(Attribute attr,
+                                        DialectAsmPrinter &p) const {
+  if (failed(printCodegenAttrs(attr, p))) {
+    llvm_unreachable("unhandled iree_codegen attribute");
+  }
+}
+
+}  // namespace Codegen
+}  // namespace IREE
+}  // namespace iree_compiler
+}  // namespace mlir
diff --git a/iree/compiler/Codegen/Dialect/IREECodegenDialect.h b/iree/compiler/Codegen/Dialect/IREECodegenDialect.h
new file mode 100644
index 000000000000..bdb94731af1e
--- /dev/null
+++ b/iree/compiler/Codegen/Dialect/IREECodegenDialect.h
@@ -0,0 +1,17 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_COMPILER_CODEGEN_DIALECT_IREECODEGEN_DIALECT_H_
+#define IREE_COMPILER_CODEGEN_DIALECT_IREECODEGEN_DIALECT_H_
+
+#include "mlir/IR/Dialect.h"
+#include "mlir/IR/OpDefinition.h"
+
+// clang-format off: must be included after all LLVM/MLIR eaders
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h.inc"  // IWYU pragma: keep
+// clang-format on
+
+#endif  // IREE_COMPILER_CODEGEN_DIALECT_IREECODEGEN_DIALECT_H_
diff --git a/iree/compiler/Codegen/Dialect/IREECodegenDialect.td b/iree/compiler/Codegen/Dialect/IREECodegenDialect.td
new file mode 100644
index 000000000000..b1233e490e0e
--- /dev/null
+++ b/iree/compiler/Codegen/Dialect/IREECodegenDialect.td
@@ -0,0 +1,44 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_CODEGEN_DIALECT_IREECODEGEN_DIALECT
+#define IREE_CODEGEN_DIALECT_IREECODEGEN_DIALECT
+
+include "mlir/IR/OpBase.td"
+
+//===----------------------------------------------------------------------===//
+// IREE Codegen dialect
+//===----------------------------------------------------------------------===//
+
+def IREECodegen_Dialect : Dialect {
+  let name = "iree_codegen";
+  let cppNamespace = "::mlir::iree_compiler::IREE::Codegen";
+
+  let summary = [{
+    A dialect representing attributes used by the IREE Code generation.
+  }];
+  let description = [{
+    This dialect is primarily meant to hold attributes that carry the
+    state of the compilation when lowered to scalar code for an
+    architecture. Typically, a backend starts by analysing the entry
+    point functions within the `hal.executable.variant` and deciding
+    which compilation pipeline to chose. During this, even the values
+    for parameters such as tile sizes, etc. are also decided. The rest
+    of the compilation flow does not make any heuristic decisions,
+    rather just looks at the values of the decision specified in
+    attributes that belong to this dialect. This allows an external
+    search to easily override the heuristics that are hard-coded
+    within a backend.
+  }];
+  let extraClassDeclaration = [{
+    void initializeCodegenAttrs();
+    OptionalParseResult parseCodegenAttrs(DialectAsmParser &parser,
+        StringRef mnemonic, Type type, Attribute &value) const;
+    LogicalResult printCodegenAttrs(Attribute attr, DialectAsmPrinter &p) const;
+  }];
+}
+
+#endif // IREE_CODEGEN_DIALECT_IREECODEGEN_DIALECT
\ No newline at end of file
diff --git a/iree/compiler/Codegen/Dialect/LoweringConfig.cpp b/iree/compiler/Codegen/Dialect/LoweringConfig.cpp
new file mode 100644
index 000000000000..79990d6eaf4b
--- /dev/null
+++ b/iree/compiler/Codegen/Dialect/LoweringConfig.cpp
@@ -0,0 +1,632 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
+
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
+#include "llvm/ADT/TypeSwitch.h"
+#include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/IR/DialectImplementation.h"
+
+#define GET_ATTRDEF_CLASSES
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.cpp.inc"
+#include "iree/compiler/Codegen/Dialect/LoweringConfigEnums.cpp.inc"
+
+static const char kConfigAttrName[] = "lowering.config";
+static const char kTranslationInfoAttrName[] = "translation.info";
+static const char kCompilationInfoAttrName[] = "compilation.info";
+
+namespace mlir {
+namespace iree_compiler {
+
+//===----------------------------------------------------------------------===//
+// Utility function for common code patterns.
+//===----------------------------------------------------------------------===//
+
+static bool checkIntegerArrayAttr(ArrayAttr arrayAttr) {
+  return !llvm::any_of(arrayAttr,
+                       [](Attribute attr) { return !attr.isa<IntegerAttr>(); });
+}
+
+/// Returns an `ArrayAttr` where each element is an `IntegerAttr` of `IndexType`
+/// whose values is obtained from `values`.
+static ArrayAttr getIndexIntegerArrayAttr(MLIRContext *context,
+                                          ArrayRef<int64_t> values) {
+  auto attrs = llvm::to_vector<4>(
+      llvm::map_range(values, [&context](int64_t value) -> Attribute {
+        return IntegerAttr::get(IndexType::get(context), APInt(64, value));
+      }));
+  return ArrayAttr::get(context, attrs);
+}
+
+/// Returns an `ArrayAttr` where each element is an `IntegerAttr` of 64-bit
+/// integer type whose values is obtained from `values`.
+static ArrayAttr getI64IntegerArrayAttr(MLIRContext *context,
+                                        ArrayRef<int64_t> values) {
+  auto attrs = llvm::to_vector<4>(
+      llvm::map_range(values, [&context](int64_t value) -> Attribute {
+        return IntegerAttr::get(IntegerType::get(context, 64),
+                                APInt(64, value));
+      }));
+  return ArrayAttr::get(context, attrs);
+}
+
+/// Assumes that `arrayAttr` is a list of `IntegerAttr`s and returns the values
+/// in these attributes as a vector.
+static SmallVector<int64_t> getIntegerVals(ArrayAttr arrayAttr) {
+  if (!arrayAttr) return {};
+  SmallVector<int64_t> values(arrayAttr.size());
+  for (auto attr : llvm::enumerate(arrayAttr)) {
+    values[attr.index()] = attr.value().cast<IntegerAttr>().getInt();
+  }
+  return values;
+}
+
+namespace IREE {
+namespace Codegen {
+
+namespace {
+
+// TODO(ravishankarm): The IREEFieldParser is part of the patch D111594 (where
+// it is called ::mlir::FieldParser). Remove this when the upstream change lands
+// in IREE.
+
+//===----------------------------------------------------------------------===//
+// Parse Fields
+//===----------------------------------------------------------------------===//
+
+/// Provide a template class that can be specialized by users to dispatch to
+/// parsers. Auto-generated parsers generate calls to
+/// `IREEFieldParser<T>::parse`, where `T` is the parameter storage type, to
+/// parse custom types.
+template <typename T, typename = T>
+struct IREEFieldParser;
+
+/// Parse an attribute.
+template <typename AttributeT>
+struct IREEFieldParser<
+    AttributeT, std::enable_if_t<std::is_base_of<Attribute, AttributeT>::value,
+                                 AttributeT>> {
+  static FailureOr<AttributeT> parse(DialectAsmParser &parser) {
+    AttributeT value;
+    if (parser.parseAttribute(value)) return failure();
+    return value;
+  }
+};
+
+/// Parse any integer.
+template <typename IntT>
+struct IREEFieldParser<IntT,
+                       std::enable_if_t<std::is_integral<IntT>::value, IntT>> {
+  static FailureOr<IntT> parse(DialectAsmParser &parser) {
+    IntT value;
+    if (parser.parseInteger(value)) return failure();
+    return value;
+  }
+};
+
+/// Parse a string.
+template <>
+struct IREEFieldParser<std::string> {
+  static FailureOr<std::string> parse(DialectAsmParser &parser) {
+    std::string value;
+    if (parser.parseString(&value)) return failure();
+    return value;
+  }
+};
+
+/// Parse any container that supports back insertion as a list.
+template <typename ContainerT>
+struct IREEFieldParser<
+    ContainerT, std::enable_if_t<std::is_member_function_pointer<decltype(
+                                     &ContainerT::push_back)>::value,
+                                 ContainerT>> {
+  using ElementT = typename ContainerT::value_type;
+  static FailureOr<ContainerT> parse(DialectAsmParser &parser) {
+    ContainerT elements;
+    auto elementParser = [&]() {
+      auto element = IREEFieldParser<ElementT>::parse(parser);
+      if (failed(element)) return failure();
+      elements.push_back(element.getValue());
+      return success();
+    };
+    if (parser.parseCommaSeparatedList(elementParser)) return failure();
+    return elements;
+  }
+};
+}  // namespace
+
+//===----------------------------------------------------------------------===//
+// iree_codegen.translation.info
+//===----------------------------------------------------------------------===//
+
+TranslationInfoAttr TranslationInfoAttr::get(
+    MLIRContext *context, DispatchLoweringPassPipeline passPipeline,
+    ArrayRef<int64_t> workloadPerWorkgroup) {
+  auto pipelineAttr = StringAttr::get(context, stringifyEnum(passPipeline));
+  ArrayAttr workloadPerWorkgroupAttr =
+      getI64IntegerArrayAttr(context, workloadPerWorkgroup);
+  return get(context, pipelineAttr, workloadPerWorkgroupAttr);
+}
+
+DispatchLoweringPassPipeline
+TranslationInfoAttr::getDispatchLoweringPassPipeline() {
+  Optional<DispatchLoweringPassPipeline> passPipeline =
+      symbolizeEnum<DispatchLoweringPassPipeline>(getPassPipeline().getValue());
+  return passPipeline.getValue();
+}
+
+SmallVector<int64_t> TranslationInfoAttr::getWorkloadPerWorkgroupVals() {
+  return getIntegerVals(getWorkloadPerWorkgroup());
+}
+
+LogicalResult TranslationInfoAttr::verify(
+    function_ref<InFlightDiagnostic()> emitError, StringAttr passPipeline,
+    ArrayAttr workloadPerWorkgroup) {
+  if (!passPipeline) {
+    return emitError() << "missing pass pipeline specification";
+  }
+  auto passPipelineValue =
+      symbolizeEnum<IREE::Codegen::DispatchLoweringPassPipeline>(
+          passPipeline.getValue());
+  if (!passPipelineValue) {
+    return emitError() << "invalid pass pipeline value : "
+                       << passPipeline.getValue();
+  }
+  if (!workloadPerWorkgroup) {
+    return emitError() << "expected workload_per_wg to be specified (even if "
+                          "specified as empty)";
+  }
+  if (!checkIntegerArrayAttr(workloadPerWorkgroup)) {
+    return emitError() << "expected workload_per_wg to be an IntegerAttr list";
+  }
+  return success();
+}
+
+::mlir::Attribute TranslationInfoAttr::parse(::mlir::DialectAsmParser &parser,
+                                             ::mlir::Type attrType) {
+  ::mlir::FailureOr<StringAttr> _result_passPipeline;
+  ::mlir::FailureOr<ArrayAttr> _result_workloadPerWorkgroup;
+  // Parse literal '<'
+  if (parser.parseLess()) return {};
+  // Parse variable 'passPipeline'
+  _result_passPipeline = IREEFieldParser<StringAttr>::parse(parser);
+  if (failed(_result_passPipeline)) {
+    parser.emitError(parser.getCurrentLocation(),
+                     "failed to parse IREECodegen_TranslationInfoAttr "
+                     "parameter 'passPipeline' which is to be a `StringAttr`");
+    return {};
+  }
+  // Parse literal ','
+  if (parser.parseComma()) return {};
+  // Parse literal 'workload_per_wg'
+  if (parser.parseKeyword("workload_per_wg")) return {};
+  // Parse literal '='
+  if (parser.parseEqual()) return {};
+  // Parse variable 'workloadPerWorkgroup'
+  _result_workloadPerWorkgroup = IREEFieldParser<ArrayAttr>::parse(parser);
+  if (failed(_result_workloadPerWorkgroup)) {
+    parser.emitError(
+        parser.getCurrentLocation(),
+        "failed to parse IREECodegen_TranslationInfoAttr parameter "
+        "'workloadPerWorkgroup' which is to be a `ArrayAttr`");
+    return {};
+  }
+  // Parse literal '>'
+  if (parser.parseGreater()) return {};
+  return TranslationInfoAttr::get(parser.getContext(),
+                                  _result_passPipeline.getValue(),
+                                  _result_workloadPerWorkgroup.getValue());
+}
+
+void TranslationInfoAttr::print(::mlir::DialectAsmPrinter &printer) const {
+  printer << "translation.info";
+  printer << "<";
+  printer << getPassPipeline();
+  printer << ",";
+  printer << ' ' << "workload_per_wg";
+  printer << ' ' << "=";
+  printer << ' ';
+  printer << getWorkloadPerWorkgroup();
+  printer << ">";
+}
+
+//===----------------------------------------------------------------------===//
+// iree_codegen.lowering.config
+//===----------------------------------------------------------------------===//
+
+LoweringConfigAttr LoweringConfigAttr::get(MLIRContext *context,
+                                           TileSizesListTypeRef tileSizes,
+                                           ArrayRef<int64_t> nativeVectorSize) {
+  auto attrList = llvm::to_vector<4>(
+      llvm::map_range(tileSizes, [&](ArrayRef<int64_t> sizes) -> Attribute {
+        return getI64IntegerArrayAttr(context, sizes);
+      }));
+  ArrayAttr tileSizesAttr = ArrayAttr::get(context, attrList);
+  ArrayAttr nativeVectorSizeAttr =
+      getI64IntegerArrayAttr(context, nativeVectorSize);
+  return get(context, tileSizesAttr, nativeVectorSizeAttr);
+}
+
+TileSizesListType LoweringConfigAttr::getTileSizeVals() {
+  auto tileSizesAttr = getTileSizes();
+  if (!tileSizesAttr) return {};
+  TileSizesListType tileSizes;
+  for (auto attr : tileSizesAttr) {
+    auto vals = getIntegerVals(attr.cast<ArrayAttr>());
+    tileSizes.emplace_back(std::move(vals));
+  }
+  return tileSizes;
+}
+
+SmallVector<int64_t> LoweringConfigAttr::getTileSizeVals(unsigned level) {
+  ArrayAttr tileSizesAttr = getTileSizes();
+  if (!tileSizesAttr || tileSizesAttr.size() <= level) return {};
+  return getIntegerVals(tileSizesAttr[level].cast<ArrayAttr>());
+}
+
+SmallVector<int64_t> LoweringConfigAttr::getNativeVectorSizeVals() {
+  ArrayAttr nativeVectorSizeAttr = getNativeVectorSize();
+  if (!nativeVectorSizeAttr) return {};
+  return getIntegerVals(nativeVectorSizeAttr);
+}
+
+LogicalResult LoweringConfigAttr::verify(
+    function_ref<InFlightDiagnostic()> emitError, ArrayAttr tileSizes,
+    ArrayAttr nativeVectorSize) {
+  if (!tileSizes) {
+    return emitError() << "expected tile_sizes to be specified (even is "
+                          "specified as empty)";
+  }
+  if (llvm::any_of(tileSizes, [](Attribute attr) {
+        auto arrayAttr = attr.dyn_cast<ArrayAttr>();
+        return !arrayAttr || !checkIntegerArrayAttr(arrayAttr);
+      })) {
+    return emitError()
+           << "expected all elements of tile_sizes to be a list of integers";
+  }
+  if (!nativeVectorSize) {
+    return emitError() << "expected native_vector_size to be specified (even "
+                          "if specified as empty)";
+  }
+  if (!checkIntegerArrayAttr(nativeVectorSize)) {
+    return emitError()
+           << "expected native_vector_size to be a list of integer values";
+  }
+  return success();
+}
+
+::mlir::Attribute LoweringConfigAttr::parse(::mlir::DialectAsmParser &parser,
+                                            ::mlir::Type attrType) {
+  ::mlir::FailureOr<ArrayAttr> _result_tileSizes;
+  ::mlir::FailureOr<ArrayAttr> _result_nativeVectorSize;
+  // Parse literal '<'
+  if (parser.parseLess()) return {};
+  // Parse literal 'tile_sizes'
+  if (parser.parseKeyword("tile_sizes")) return {};
+  // Parse literal '='
+  if (parser.parseEqual()) return {};
+  // Parse variable 'tileSizes'
+  _result_tileSizes = IREEFieldParser<ArrayAttr>::parse(parser);
+  if (failed(_result_tileSizes)) {
+    parser.emitError(parser.getCurrentLocation(),
+                     "failed to parse IREECodegen_LoweringConfigAttr parameter "
+                     "'tileSizes' which is to be a `ArrayAttr`");
+    return {};
+  }
+  // Parse literal ','
+  if (parser.parseComma()) return {};
+  // Parse literal 'native_vector_size'
+  if (parser.parseKeyword("native_vector_size")) return {};
+  // Parse literal '='
+  if (parser.parseEqual()) return {};
+  // Parse variable 'nativeVectorSize'
+  _result_nativeVectorSize = IREEFieldParser<ArrayAttr>::parse(parser);
+  if (failed(_result_nativeVectorSize)) {
+    parser.emitError(parser.getCurrentLocation(),
+                     "failed to parse IREECodegen_LoweringConfigAttr parameter "
+                     "'nativeVectorSize' which is to be a `ArrayAttr`");
+    return {};
+  }
+  // Parse literal '>'
+  if (parser.parseGreater()) return {};
+  return LoweringConfigAttr::get(parser.getContext(),
+                                 _result_tileSizes.getValue(),
+                                 _result_nativeVectorSize.getValue());
+}
+
+void LoweringConfigAttr::print(::mlir::DialectAsmPrinter &printer) const {
+  printer << "lowering.config";
+  printer << "<";
+  printer << "tile_sizes";
+  printer << ' ' << "=";
+  printer << ' ';
+  printer << getTileSizes();
+  printer << ",";
+  printer << ' ' << "native_vector_size";
+  printer << ' ' << "=";
+  printer << ' ';
+  printer << getNativeVectorSize();
+  printer << ">";
+}
+
+//===----------------------------------------------------------------------===//
+// iree.compilation.info
+//===----------------------------------------------------------------------===//
+
+CompilationInfoAttr CompilationInfoAttr::get(MLIRContext *context,
+                                             TileSizesListTypeRef tileSizes,
+                                             ArrayRef<int64_t> nativeVectorSize,
+                                             ArrayRef<int64_t> workgroupSize) {
+  LoweringConfigAttr configAttr =
+      LoweringConfigAttr::get(context, tileSizes, nativeVectorSize);
+  TranslationInfoAttr translationInfo =
+      TranslationInfoAttr::get(context, DispatchLoweringPassPipeline::None);
+  ArrayAttr workgroupSizeAttr = getI64IntegerArrayAttr(context, workgroupSize);
+  return get(context, configAttr, translationInfo, workgroupSizeAttr);
+}
+
+CompilationInfoAttr CompilationInfoAttr::get(
+    MLIRContext *context, TileSizesListTypeRef tileSizes,
+    ArrayRef<int64_t> nativeVectorSize,
+    DispatchLoweringPassPipeline passPipeline,
+    ArrayRef<int64_t> workloadPerWorkgroup, ArrayRef<int64_t> workgroupSize) {
+  LoweringConfigAttr configAttr =
+      LoweringConfigAttr::get(context, tileSizes, nativeVectorSize);
+  TranslationInfoAttr translationInfoAttr =
+      TranslationInfoAttr::get(context, passPipeline, workloadPerWorkgroup);
+  ArrayAttr workgroupSizeAttr = getI64IntegerArrayAttr(context, workgroupSize);
+  return get(context, configAttr, translationInfoAttr, workgroupSizeAttr);
+}
+
+LogicalResult CompilationInfoAttr::verify(
+    function_ref<InFlightDiagnostic()> emitError,
+    LoweringConfigAttr loweringConfig, TranslationInfoAttr translationInfo,
+    ArrayAttr workgroupSize) {
+  if (!loweringConfig) {
+    return emitError() << "missing lowering config";
+  }
+  if (failed(
+          LoweringConfigAttr::verify(emitError, loweringConfig.getTileSizes(),
+                                     loweringConfig.getNativeVectorSize()))) {
+    return failure();
+  }
+  if (!translationInfo) {
+    return emitError() << "missing translation info";
+  }
+  if (failed(TranslationInfoAttr::verify(
+          emitError, translationInfo.getPassPipeline(),
+          translationInfo.getWorkloadPerWorkgroup()))) {
+    return failure();
+  }
+  if (!workgroupSize) {
+    return emitError() << "expected workgroup_size to be specified (even if "
+                          "specified empty)";
+  }
+  if (!checkIntegerArrayAttr(workgroupSize)) {
+    return emitError() << "expected workgroup_size to be a list of integers";
+  }
+  return success();
+}
+
+/// Parser method that is copied from the auto-generated using `assemblyFormat`
+/// available with patch D111594. Replace after that change is in IREE.
+::mlir::Attribute CompilationInfoAttr::parse(::mlir::DialectAsmParser &parser,
+                                             ::mlir::Type attrType) {
+  ::mlir::FailureOr<LoweringConfigAttr> _result_loweringConfig;
+  ::mlir::FailureOr<TranslationInfoAttr> _result_translationInfo;
+  ::mlir::FailureOr<ArrayAttr> _result_workgroupSize;
+  // Parse literal '<'
+  if (parser.parseLess()) return {};
+  // Parse variable 'loweringConfig'
+  _result_loweringConfig = IREEFieldParser<LoweringConfigAttr>::parse(parser);
+  if (failed(_result_loweringConfig)) {
+    parser.emitError(
+        parser.getCurrentLocation(),
+        "failed to parse IREECodegen_CompilationInfoAttr parameter "
+        "'loweringConfig' which is to be a `LoweringConfigAttr`");
+    return {};
+  }
+  // Parse literal ','
+  if (parser.parseComma()) return {};
+  // Parse variable 'translationInfo'
+  _result_translationInfo = IREEFieldParser<TranslationInfoAttr>::parse(parser);
+  if (failed(_result_translationInfo)) {
+    parser.emitError(
+        parser.getCurrentLocation(),
+        "failed to parse IREECodegen_CompilationInfoAttr parameter "
+        "'translationInfo' which is to be a `TranslationInfoAttr`");
+    return {};
+  }
+  // Parse literal ','
+  if (parser.parseComma()) return {};
+  // Parse literal 'workgroup_size'
+  if (parser.parseKeyword("workgroup_size")) return {};
+  // Parse literal '='
+  if (parser.parseEqual()) return {};
+  // Parse variable 'workgroupSize'
+  _result_workgroupSize = IREEFieldParser<ArrayAttr>::parse(parser);
+  if (failed(_result_workgroupSize)) {
+    parser.emitError(parser.getCurrentLocation(),
+                     "failed to parse IREECodegen_CompilationInfoAttr "
+                     "parameter 'workgroupSize' which is to be a `ArrayAttr`");
+    return {};
+  }
+  // Parse literal '>'
+  if (parser.parseGreater()) return {};
+  return CompilationInfoAttr::get(
+      parser.getContext(), _result_loweringConfig.getValue(),
+      _result_translationInfo.getValue(), _result_workgroupSize.getValue());
+}
+
+/// Printer method that is copied from the auto-generated using `assemblyFormat`
+/// available with patch D111594. Replace after that change is in IREE.
+void CompilationInfoAttr::print(::mlir::DialectAsmPrinter &printer) const {
+  printer << "compilation.info";
+  printer << "<";
+  printer << getLoweringConfig();
+  printer << ",";
+  printer << ' ';
+  printer << getTranslationInfo();
+  printer << ",";
+  printer << ' ' << "workgroup_size";
+  printer << ' ' << "=";
+  printer << ' ';
+  printer << getWorkgroupSize();
+  printer << ">";
+}
+
+SmallVector<int64_t> CompilationInfoAttr::getWorkgroupSizeVals() {
+  ArrayAttr workgroupSizeAttr = getWorkgroupSize();
+  if (!workgroupSizeAttr) return {};
+  return getIntegerVals(workgroupSizeAttr);
+}
+
+//===----------------------------------------------------------------------===//
+// Initialize attributes
+//===----------------------------------------------------------------------===//
+
+void IREECodegenDialect::initializeCodegenAttrs() {
+  addAttributes<
+#define GET_ATTRDEF_LIST
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.cpp.inc"  // IWYU pragma: keeep
+      >();
+}
+
+OptionalParseResult IREECodegenDialect::parseCodegenAttrs(
+    DialectAsmParser &parser, StringRef mnemonic, Type type,
+    Attribute &value) const {
+  return generatedAttributeParser(parser, mnemonic, type, value);
+}
+
+LogicalResult IREECodegenDialect::printCodegenAttrs(
+    Attribute attr, DialectAsmPrinter &p) const {
+  return generatedAttributePrinter(attr, p);
+}
+
+}  // namespace Codegen
+}  // namespace IREE
+
+//===----------------------------------------------------------------------===//
+// Helpers for getting/setting iree_codegen.translation.info attribute on the
+// `hal.executable.entry_point`
+// ===----------------------------------------------------------------------===//
+
+IREE::Codegen::TranslationInfoAttr getTranslationInfo(
+    IREE::HAL::ExecutableEntryPointOp entryPointOp) {
+  return entryPointOp->getAttrOfType<IREE::Codegen::TranslationInfoAttr>(
+      kTranslationInfoAttrName);
+}
+
+SmallVector<int64_t> getWorkgroupSize(
+    IREE::HAL::ExecutableEntryPointOp entryPointOp) {
+  if (Optional<ArrayAttr> workgroupSizeAttrList =
+          entryPointOp.workgroup_size()) {
+    return getIntegerVals(*workgroupSizeAttrList);
+  }
+  return {};
+}
+
+void setTranslationInfo(IREE::HAL::ExecutableEntryPointOp entryPointOp,
+                        IREE::Codegen::TranslationInfoAttr translationInfo,
+                        ArrayRef<int64_t> workgroupSize) {
+  entryPointOp->setAttr(kTranslationInfoAttrName, translationInfo);
+  // The workgroup size is set on the entry point op directly.
+  if (!workgroupSize.empty()) {
+    MLIRContext *context = entryPointOp->getContext();
+    auto attrs = getIndexIntegerArrayAttr(context, workgroupSize);
+    entryPointOp.workgroup_sizeAttr(attrs);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers for getting/setting `iree_codegen.lowering.config` attribute on root
+// operations.
+// ===----------------------------------------------------------------------===//
+
+IREE::Codegen::LoweringConfigAttr getLoweringConfig(Operation *op) {
+  return op->getAttrOfType<IREE::Codegen::LoweringConfigAttr>(kConfigAttrName);
+}
+
+SmallVector<int64_t> getTileSizes(Operation *op, unsigned level) {
+  IREE::Codegen::LoweringConfigAttr configAttr = getLoweringConfig(op);
+  if (!configAttr) return {};
+  return configAttr.getTileSizeVals(level);
+}
+SmallVector<Value, 4> getTileSizes(OpBuilder &b, Operation *op,
+                                   unsigned level) {
+  return llvm::to_vector<4>(
+      llvm::map_range(getTileSizes(op, level), [&](int64_t t) -> Value {
+        return b.create<arith::ConstantIndexOp>(op->getLoc(), t);
+      }));
+}
+
+void setLoweringConfig(Operation *op,
+                       IREE::Codegen::LoweringConfigAttr config) {
+  op->setAttr(kConfigAttrName, config);
+}
+
+LogicalResult setOpConfigAndEntryPointFnTranslation(
+    FuncOp entryPointFn, Operation *op,
+    IREE::Codegen::LoweringConfigAttr config,
+    IREE::Codegen::DispatchLoweringPassPipeline passPipeline,
+    ArrayRef<int64_t> workgroupSize) {
+  auto partitionedLoops = getPartitionedLoops(op);
+  SmallVector<int64_t, 3> workloadPerWorkgroup;
+  auto tileSizes = config.getTileSizeVals(0);
+  if (!tileSizes.empty() && !partitionedLoops.empty()) {
+    for (unsigned depth : partitionedLoops) {
+      if (depth >= tileSizes.size()) {
+        return op->emitOpError(
+                   "illegal configuration for lowering op, expect first level "
+                   "tile size to contain at least ")
+               << partitionedLoops.back() << " elements";
+      }
+      if (tileSizes[depth] == 0) {
+        return op->emitOpError("illegal to set tilesize of loop ")
+               << depth
+               << " to zero since it is set to be partitioned at the flow "
+                  "level";
+      }
+      workloadPerWorkgroup.push_back(tileSizes[depth]);
+    }
+    if (!workloadPerWorkgroup.empty()) {
+      workloadPerWorkgroup =
+          llvm::to_vector<3>(llvm::reverse(workloadPerWorkgroup));
+    }
+  }
+  auto entryPointOp = getEntryPoint(entryPointFn);
+  if (!entryPointOp) {
+    return entryPointFn.emitOpError(
+        "unable to find entry point op for entry point function");
+  }
+  auto translationInfo = IREE::Codegen::TranslationInfoAttr::get(
+      entryPointOp->getContext(), passPipeline, workloadPerWorkgroup);
+  setTranslationInfo(entryPointOp, translationInfo, workgroupSize);
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers for getting/setting `iree_codegen.compilation.info` attribute on root
+// operations to override IREEs default compilation.
+// ===----------------------------------------------------------------------===//
+
+IREE::Codegen::CompilationInfoAttr getCompilationInfo(Operation *op) {
+  return op->getAttrOfType<IREE::Codegen::CompilationInfoAttr>(
+      kCompilationInfoAttrName);
+}
+
+void setCompilationInfo(Operation *op,
+                        IREE::Codegen::CompilationInfoAttr config) {
+  op->setAttr(kCompilationInfoAttrName, config);
+}
+
+void eraseCompilationInfo(Operation *op) {
+  op->removeAttr(kCompilationInfoAttrName);
+}
+
+}  // namespace iree_compiler
+}  // namespace mlir
diff --git a/iree/compiler/Codegen/Dialect/LoweringConfig.h b/iree/compiler/Codegen/Dialect/LoweringConfig.h
new file mode 100644
index 000000000000..6d99215f7e8f
--- /dev/null
+++ b/iree/compiler/Codegen/Dialect/LoweringConfig.h
@@ -0,0 +1,152 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+//===- LoweringConfig.h - Declares configuration for lowering Linalg ops --===//
+//
+// This file declares an attribute that drives how a dispatch region containing
+// a set of operations are lowered. The attribute itself is attached to Linalg
+// operations, and help converting a Linalg operation into "scalar code".
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_
+#define IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_
+
+#include "iree/compiler/Codegen/Utils/Utils.h"
+#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/BuiltinTypes.h"
+
+namespace mlir {
+namespace iree_compiler {
+/// Typedef for tile sizes to use at different levels of tiling.
+using TileSizesListType = SmallVector<SmallVector<int64_t>>;
+using TileSizesListTypeRef = ArrayRef<SmallVector<int64_t>>;
+}  // namespace iree_compiler
+}  // namespace mlir
+
+// clang-format off
+#include "iree/compiler/Codegen/Dialect/LoweringConfigEnums.h.inc"
+#define GET_ATTRDEF_CLASSES
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h.inc"
+// clang-format on
+
+namespace mlir {
+namespace iree_compiler {
+//===----------------------------------------------------------------------===//
+// Helpers for getting/setting iree_codegen.translation.info attribute on the
+// `hal.executable.entry_point`
+// ===----------------------------------------------------------------------===//
+
+/// Gets the translate executable info attribute value associated with
+/// `entryPointOp`. It expects that the attribute is stored using the identifier
+/// `translation.info`.
+IREE::Codegen::TranslationInfoAttr getTranslationInfo(
+    IREE::HAL::ExecutableEntryPointOp entryPointOp);
+/// Returns the translation info for the `funcOp` (by looking at the entry
+/// point). Returns `nullptr` on failure.
+inline IREE::Codegen::TranslationInfoAttr getTranslationInfo(FuncOp funcOp) {
+  auto entryPointOp = getEntryPoint(funcOp);
+  if (!entryPointOp) return nullptr;
+  return getTranslationInfo(entryPointOp);
+}
+
+/// Returns the workgroup size specified on the `entryPointOp`.
+SmallVector<int64_t> getWorkgroupSize(
+    IREE::HAL::ExecutableEntryPointOp entryPointOp);
+
+/// Set the translate executable info with the entry point op. Overwrites the
+/// existing attributes.
+// TODO(ravishankarm, benvanik): Eventually all the information needed for the
+// lowering will be consolidated into a single attribute with richer
+// information.
+void setTranslationInfo(IREE::HAL::ExecutableEntryPointOp entryPointOp,
+                        IREE::Codegen::TranslationInfoAttr translationInfo,
+                        ArrayRef<int64_t> workgroupSize = {});
+inline void setTranslationInfo(
+    FuncOp entryPointFn, IREE::Codegen::TranslationInfoAttr translationInfo,
+    ArrayRef<int64_t> workgroupSize = {}) {
+  auto entryPointOp = getEntryPoint(entryPointFn);
+  return setTranslationInfo(entryPointOp, translationInfo, workgroupSize);
+}
+
+/// Sets the translation info on the `hal.executable.entry_point` op
+/// corresponding to the `entryPointFn`. Returns failure if a translation info
+/// is already set on the entry point op and is incompatible with what is being
+/// set.
+inline void setTranslationInfo(
+    FuncOp entryPointFn,
+    IREE::Codegen::DispatchLoweringPassPipeline passPipeline,
+    ArrayRef<int64_t> workloadPerWorkgroup, ArrayRef<int64_t> workgroupSize) {
+  auto entryPointOp = getEntryPoint(entryPointFn);
+  MLIRContext *context = entryPointFn.getContext();
+  auto translationInfo = IREE::Codegen::TranslationInfoAttr::get(
+      context, passPipeline, workloadPerWorkgroup);
+  setTranslationInfo(entryPointOp, translationInfo, workgroupSize);
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers for getting/setting `iree_codegen.lowering.config` attribute on root
+// operations.
+// ===----------------------------------------------------------------------===//
+
+/// Returns the lowering configuration set for an operation. Returns `nullptr`
+/// if no value is set.  It expects that the attribute is stored using the
+/// identifier `lowering.config`.
+IREE::Codegen::LoweringConfigAttr getLoweringConfig(Operation *op);
+
+/// Returns the tile sizes for a particular operation if the
+/// `iree_codegen.lowering.config` attribute is set on it.
+SmallVector<int64_t> getTileSizes(Operation *op, unsigned level);
+SmallVector<Value, 4> getTileSizes(OpBuilder &b, Operation *op, unsigned level);
+
+/// Sets the lowering configuration, overwriting existing attribute values.
+void setLoweringConfig(Operation *op, IREE::Codegen::LoweringConfigAttr config);
+
+/// Sets translation for the entry-point function based on op configuration.
+LogicalResult setOpConfigAndEntryPointFnTranslation(
+    FuncOp entryPointFn, Operation *op,
+    IREE::Codegen::LoweringConfigAttr config,
+    IREE::Codegen::DispatchLoweringPassPipeline passPipeline,
+    ArrayRef<int64_t> workgroupSize = {});
+inline LogicalResult setOpConfigAndEntryPointFnTranslation(
+    FuncOp entryPointFn, Operation *op, TileSizesListTypeRef tileSizes,
+    ArrayRef<int64_t> nativeVectorSize,
+    IREE::Codegen::DispatchLoweringPassPipeline passPipeline,
+    ArrayRef<int64_t> workgroupSize = {}) {
+  MLIRContext *context = entryPointFn.getContext();
+  auto config = IREE::Codegen::LoweringConfigAttr::get(context, tileSizes,
+                                                       nativeVectorSize);
+  setLoweringConfig(op, config);
+  return setOpConfigAndEntryPointFnTranslation(entryPointFn, op, config,
+                                               passPipeline, workgroupSize);
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers for getting/setting `iree_codegen.compilation.info` attribute on root
+// operations to override IREEs default compilation.
+// ===----------------------------------------------------------------------===//
+
+/// Returns the `#iree_codegen.compilation.info` set on the operation. Assumes
+/// that the identifier used is `compilation.info`.
+IREE::Codegen::CompilationInfoAttr getCompilationInfo(Operation *op);
+
+/// Sets the `config` to use for compiling the operation. If `op` is the root
+/// operation of the dispatch region, overrides the default configuration that
+/// is used for compilation.
+void setCompilationInfo(Operation *op,
+                        IREE::Codegen::CompilationInfoAttr config);
+
+/// Removes the `#iree_codegen.compilation.info` attribute that is set on the
+/// operation.
+void eraseCompilationInfo(Operation *op);
+
+}  // namespace iree_compiler
+}  // namespace mlir
+
+#endif  // IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_
diff --git a/iree/compiler/Codegen/Dialect/LoweringConfig.td b/iree/compiler/Codegen/Dialect/LoweringConfig.td
new file mode 100644
index 000000000000..2e7b5470de7e
--- /dev/null
+++ b/iree/compiler/Codegen/Dialect/LoweringConfig.td
@@ -0,0 +1,190 @@
+// Copyright 2021 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef IREE_COMPILER_CODEGEN_DIALECT_LOWERINGCONFIG
+#define IREE_COMPILER_CODEGEN_DIALECT_LOWERINGCONFIG
+
+include "iree/compiler/Codegen/Dialect/IREECodegenDialect.td"
+
+// List of pre-existing pipelines for translating executables.
+def CPU_Default
+    : StrEnumAttrCase<"CPUDefault">;
+def CPU_Vectorization
+    : StrEnumAttrCase<"CPUVectorization">;
+def CPU_TensorToVectors
+    : StrEnumAttrCase<"CPUTensorToVectors">;
+
+def LLVMGPU_SimpleDistribute
+    : StrEnumAttrCase<"LLVMGPUDistribute">;
+def LLVMGPU_Vectorize
+    : StrEnumAttrCase<"LLVMGPUVectorize">;
+def LLVMGPU_MatmulSimt
+    : StrEnumAttrCase<"LLVMGPUMatmulSimt">;
+
+def SPIRV_SimpleDistribute
+    : StrEnumAttrCase<"SPIRVDistribute">;
+def SPIRV_Vectorize
+    : StrEnumAttrCase<"SPIRVVectorize">;
+def SPIRV_VectorizeToCooperativeOps
+    : StrEnumAttrCase<"SPIRVVectorizeToCooperativeOps">;
+
+def None
+    : StrEnumAttrCase<"None">;
+
+// EnumAttrCase for all known lowerings for ops within dispatch region
+// to scalar/native-vector code.
+def DispatchLoweringPassPipelineEnum : StrEnumAttr<
+    "DispatchLoweringPassPipeline",
+    "identifier for pass pipeline use to lower dispatch region",
+    [CPU_Default, CPU_TensorToVectors, CPU_Vectorization,
+     LLVMGPU_SimpleDistribute, LLVMGPU_Vectorize, LLVMGPU_MatmulSimt,
+     SPIRV_SimpleDistribute, SPIRV_Vectorize,
+     SPIRV_VectorizeToCooperativeOps, None]> {
+  let cppNamespace = "::mlir::iree_compiler::IREE::Codegen";
+}
+
+def IREECodegen_TranslationInfoAttr :
+    AttrDef<IREECodegen_Dialect, "TranslationInfo", []> {
+  let mnemonic = "translation.info";
+  let summary = [{drive dispatch entry point lowering}];
+  let description = [{
+    Specifies the information that is used to drive the translation of
+    an entry point function using Linalg based structured-op
+    lowering.. During executable translation this is attached to the
+    `hal.executable.entry_point` operation.
+
+    If this operation is already set on the root operation (as part of
+    `iree_codegen.compilation.info`) that drives the compilation of a
+    dispatch region (like `linalg.matmul`/`linalg.*conv*`), this
+    attribute gets propagated to the entry point function.
+
+    The fields are
+    - `passPipeline` : The pass pipeline to use.
+    - `workloadPerWorkgroup` : Specifies how much of the original
+      `workload` is handled by a workgroup along `x`, `y` and `z`.  If
+      left empty it implies that that there is a single workgroup that
+      does the entire `workload`.
+
+  }];
+
+  // TODO(ravishankarm): Commented out till patch D111594 lands.
+  // let assemblyFormat = [{
+  //   `<` $passPipeline `,` `workload_per_wg` `=` $workloadPerWorkgroup `>`
+  // }];
+
+  let parameters = (ins
+    AttrParameter<"StringAttr", "">:$passPipeline,
+    AttrParameter<"ArrayAttr", "">:$workloadPerWorkgroup
+  );
+  let builders = [
+    AttrBuilder<(ins "DispatchLoweringPassPipeline":$passPipeline,
+        CArg<"ArrayRef<int64_t>", "{}">:$workloadPerWorkgroup)>
+  ];
+  let extraClassDeclaration = [{
+    // Returns the lowering pass pipeline set.
+    DispatchLoweringPassPipeline getDispatchLoweringPassPipeline();
+
+    // Returns values of the workloadPerWorkgroup field if set.
+    SmallVector<int64_t> getWorkloadPerWorkgroupVals();
+  }];
+  let genVerifyDecl = 1;
+}
+
+def IREECodegen_LoweringConfigAttr :
+    AttrDef<IREECodegen_Dialect, "LoweringConfig", []> {
+  let mnemonic = "lowering.config";
+  let summary = [{drive lowering of an operation within dispatch region}];
+  let description = [{
+    Specifies the information that is used by backend compiler to
+    translate an operation to scalar code. The way the information is
+    used is specific to each backend (indeed specific to the pass
+    pipeline used) to compile that operation.
+
+    TODO: Currently there is no verification that the configuration
+    specifies everything needed for a pass-pipeline. The values to set
+    for these parameters is dependent on the pass-pipeline
+    implementation. In future, each pass pipeline could verify that
+    the lowering configuration has all the necessary attributes for
+    the pipeline.
+
+  }];
+
+  // TODO(ravishankarm): Commented out till patch D111594 lands.
+  // let assemblyFormat = [{
+  //   `<` `tile_sizes` `=` $tileSizes `,` `native_vector_size` `=` $nativeVectorSize `>`
+  // }];
+
+  let parameters = (ins
+    AttrParameter<"ArrayAttr", "">:$tileSizes,
+    AttrParameter<"ArrayAttr", "">:$nativeVectorSize
+  );
+  let builders = [
+    AttrBuilder<(ins "TileSizesListTypeRef":$tileSizes,
+        CArg<"ArrayRef<int64_t>", "{}">:$nativeVectorSize)>
+  ];
+  let extraClassDeclaration = [{
+    // Returns the tile sizes for all levels set for the op.
+    TileSizesListType getTileSizeVals();
+
+    // Returns the tile sizes for a level set for the op.
+    SmallVector<int64_t> getTileSizeVals(unsigned level = 0);
+
+    // Returns the native vector size to use.
+    SmallVector<int64_t> getNativeVectorSizeVals();
+  }];
+  let genVerifyDecl = 1;
+}
+
+def IREECodegen_CompilationInfoAttr :
+    AttrDef<IREECodegen_Dialect, "CompilationInfo", []> {
+  let mnemonic = "compilation.info";
+  let summary = [{drive lowering of an operation from input dialect}];
+  let description = [{
+    Specifies the information that allows controlling the compilation
+    of operations like `linalg.matmul`/`linalg.*conv` within
+    IREE. This information is used to override the defaults used by
+    the IREE compiler. Currently it is only valid to set this on
+    `linalg.matmul`/`linalg.*conv*` operations.
+
+    TODO: It is expected that the `TranslationInfoAttr` and the
+    `LoweringConfigAttr` are specified. Currently there is no
+    verification that the values of the `LoweringConfigAttr` fully
+    specifies the behaviour of the compilation path chosen with
+    `TranslationInfoAttr`. This could be added in the future.  Note:
+    Typically the values used for the first-level tiling in
+    `LoweringConfigAttr` and `workload_per_wg` value in the
+    `TranslationInfoAttr` are the same since the first-level of tile +
+    distribute is already done at the `Flow` level. This verification
+    is also a TODO.
+  }];
+  let parameters = (ins
+    AttrParameter<"LoweringConfigAttr", "">:$loweringConfig,
+    AttrParameter<"TranslationInfoAttr", "">:$translationInfo,
+    AttrParameter<"ArrayAttr", "">:$workgroupSize
+  );
+
+  // TODO(ravishankarm): Commented out till patch D111594 lands.
+  // let assemblyFormat = [{
+  //   `<` $loweringConfig `,` $translationInfo `,` `workgroup_size` `=` $workgroupSize `>`
+  // }];
+
+  let builders = [
+    AttrBuilder<(ins "TileSizesListTypeRef":$tileSizes,
+      "ArrayRef<int64_t>":$nativeVectorSize,
+      CArg<"ArrayRef<int64_t>", "{}">:$workgroupSize)>,
+    AttrBuilder<(ins "TileSizesListTypeRef":$tileSizes,
+      "ArrayRef<int64_t>":$nativeVectorSize,
+      "DispatchLoweringPassPipeline":$passPipeline,
+      "ArrayRef<int64_t>":$workloadPerWorkgroup,
+      CArg<"ArrayRef<int64_t>", "{}">:$workgroupSize)>,
+  ];
+  let extraClassDeclaration = [{
+    SmallVector<int64_t> getWorkgroupSizeVals();
+  }];
+  let genVerifyDecl = 1;
+}
+
+#endif // IREE_COMPILER_CODEGEN_DIALECT_LOWERINGCONFIG
diff --git a/iree/compiler/Codegen/Dialect/test/BUILD b/iree/compiler/Codegen/Dialect/test/BUILD
new file mode 100644
index 000000000000..704286c9afdd
--- /dev/null
+++ b/iree/compiler/Codegen/Dialect/test/BUILD
@@ -0,0 +1,30 @@
+# Copyright 2021 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# Tests for common transforms.
+
+load("//iree:lit_test.bzl", "iree_lit_test_suite")
+load("//build_tools/bazel:enforce_glob.bzl", "enforce_glob")
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)
+
+iree_lit_test_suite(
+    name = "lit",
+    srcs = enforce_glob(
+        [
+            "lowering_config_attr.mlir",
+        ],
+        include = ["*.mlir"],
+    ),
+    data = [
+        "//iree/tools:IreeFileCheck",
+        "//iree/tools:iree-opt",
+    ],
+)
diff --git a/iree/compiler/Codegen/Dialect/test/CMakeLists.txt b/iree/compiler/Codegen/Dialect/test/CMakeLists.txt
new file mode 100644
index 000000000000..4de932a994f6
--- /dev/null
+++ b/iree/compiler/Codegen/Dialect/test/CMakeLists.txt
@@ -0,0 +1,23 @@
+################################################################################
+# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from           #
+# iree/compiler/Codegen/Dialect/test/BUILD                                     #
+#                                                                              #
+# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary   #
+# CMake-only content.                                                          #
+#                                                                              #
+# To disable autogeneration for this file entirely, delete this header.        #
+################################################################################
+
+iree_add_all_subdirs()
+
+iree_lit_test_suite(
+  NAME
+    lit
+  SRCS
+    "lowering_config_attr.mlir"
+  DATA
+    iree::tools::IreeFileCheck
+    iree::tools::iree-opt
+)
+
+### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/compiler/Codegen/Dialect/test/lowering_config_attr.mlir b/iree/compiler/Codegen/Dialect/test/lowering_config_attr.mlir
new file mode 100644
index 000000000000..363999672367
--- /dev/null
+++ b/iree/compiler/Codegen/Dialect/test/lowering_config_attr.mlir
@@ -0,0 +1,37 @@
+// RUN: iree-opt -split-input-file %s | IreeFileCheck %s
+
+module attributes {
+  translation.info = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [32, 42]>
+} { }
+// CHECK: #translation = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [32, 42]>
+
+// -----
+
+module attributes {
+  translation.info = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []>
+} { }
+// CHECK: #translation = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []>
+
+// -----
+
+module attributes {
+  lowering.config = #iree_codegen.lowering.config<tile_sizes = [], native_vector_size = []>
+} { }
+// CHECK: #config = #iree_codegen.lowering.config<tile_sizes = [], native_vector_size = []>
+
+// -----
+
+module attributes {
+  lowering.config = #iree_codegen.lowering.config<tile_sizes = [[], [10]], native_vector_size = [32, 32]>
+} { }
+// CHECK: #config = #iree_codegen.lowering.config<tile_sizes = {{\[}}[], [10]{{\]}}, native_vector_size = [32, 32]>
+
+// -----
+
+module attributes {
+  compilation.info = #iree_codegen.compilation.info<
+      #iree_codegen.lowering.config<tile_sizes = [], native_vector_size = []>,
+      #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []>,
+      workgroup_size = []>
+} { }
+// CHECK: #compilation = #iree_codegen.compilation.info<#iree_codegen.lowering.config<tile_sizes = [], native_vector_size = []>, #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []>, workgroup_size = []>
\ No newline at end of file
diff --git a/iree/compiler/Codegen/LLVMCPU/BUILD b/iree/compiler/Codegen/LLVMCPU/BUILD
index 0d410558c28e..21e280217659 100644
--- a/iree/compiler/Codegen/LLVMCPU/BUILD
+++ b/iree/compiler/Codegen/LLVMCPU/BUILD
@@ -29,6 +29,7 @@ cc_library(
     deps = [
         "//iree/compiler/Codegen:PassHeaders",
         "//iree/compiler/Codegen/Common",
+        "//iree/compiler/Codegen/Dialect:IREECodegenDialect",
         "//iree/compiler/Codegen/Transforms",
         "//iree/compiler/Codegen/Utils",
         "//iree/compiler/Dialect/Flow/IR",
diff --git a/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt b/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt
index 50207fdd49f7..44d43b1becd7 100644
--- a/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt
+++ b/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt
@@ -57,6 +57,7 @@ iree_cc_library(
     MLIRVectorToLLVM
     MLIRVectorToSCF
     iree::compiler::Codegen::Common
+    iree::compiler::Codegen::Dialect::IREECodegenDialect
     iree::compiler::Codegen::PassHeaders
     iree::compiler::Codegen::Transforms
     iree::compiler::Codegen::Utils
diff --git a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
index 3504b5bd94ae..dbbdccad2f74 100644
--- a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
+++ b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -8,9 +8,7 @@
 
 #include "iree/compiler/Codegen/Transforms/Transforms.h"
 #include "iree/compiler/Codegen/Utils/MarkerUtils.h"
-#include "iree/compiler/Codegen/Utils/Utils.h"
 #include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
 #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/CommandLine.h"
@@ -227,8 +225,8 @@ static LogicalResult setDefaultLaunchConfig(
       getDefaultWorkloadPerWorkgroup(tiledLoops, nativeVectorSizeInElements);
 
   setTranslationInfo(
-      entryPointFn, IREE::HAL::DispatchLoweringPassPipeline::CPUDefault,
-      /*workgroupSize =*/ArrayRef<int64_t>{}, workloadPerWorkgroup);
+      entryPointFn, IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault,
+      workloadPerWorkgroup, /*workgroupSize =*/ArrayRef<int64_t>{});
   return success();
 }
 
@@ -299,8 +297,9 @@ static LogicalResult setRootConfig(FuncOp entryPointFn,
                     vectorSizeVals[i]);
   }
   setTranslationInfo(
-      entryPointFn, IREE::HAL::DispatchLoweringPassPipeline::CPUTensorToVectors,
-      /*workgroupSize =*/ArrayRef<int64_t>{}, workloadPerWorkgroup);
+      entryPointFn,
+      IREE::Codegen::DispatchLoweringPassPipeline::CPUTensorToVectors,
+      workloadPerWorkgroup, /*workgroupSize =*/ArrayRef<int64_t>{});
 
   SmallVector<int64_t, 4> l1TileSizes, vectorTileSizes;
   if (isBatchMatmul) {
@@ -321,8 +320,8 @@ static LogicalResult setRootConfig(FuncOp entryPointFn,
                             // level tiling.
   tileSizes.emplace_back(std::move(l1TileSizes));
   tileSizes.emplace_back(std::move(vectorTileSizes));
-  IREE::HAL::LoweringConfig config =
-      buildConfigAttr(tileSizes, vectorSizeVals, entryPointFn.getContext());
+  auto config = IREE::Codegen::LoweringConfigAttr::get(
+      entryPointFn.getContext(), tileSizes, vectorSizeVals);
   setLoweringConfig(contractionOp, config);
   return success();
 }
@@ -368,14 +367,14 @@ static LogicalResult setRootConfig(FuncOp entryPointFn, linalg::Mmt4DOp mmt4dOp,
     return {1, 1, 1, M0, N0, K0};
   };
 
-  SmallVector<int64_t, 4> nativeVectorSize = getVectorSizes();
+  SmallVector<int64_t> nativeVectorSize = getVectorSizes();
 
   TileSizesListType tileSizes = {getWorkgroupTileSizes(), getL1TileSizes(),
                                  nativeVectorSize};
 
   return setOpConfigAndEntryPointFnTranslation(
       entryPointFn, mmt4dOp, tileSizes, nativeVectorSize,
-      IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization);
+      IREE::Codegen::DispatchLoweringPassPipeline::CPUVectorization);
 }
 
 /// Sets the lowering configuration for dispatch region for linalg_ext.fft
@@ -384,8 +383,7 @@ static LogicalResult setRootConfig(FuncOp entryPointFn, linalg_ext::FftOp fftOp,
                                    ArrayRef<TiledLoopInfo> tiledLoops) {
   auto partitionedLoops = getPartitionedLoops(fftOp);
   unsigned maxDepth = partitionedLoops.back() + 1;
-  SmallVector<int64_t, 4> workgroupTileSizes(maxDepth,
-                                             defaultWorkgroupTileSize);
+  SmallVector<int64_t> workgroupTileSizes(maxDepth, defaultWorkgroupTileSize);
   llvm::DenseSet<unsigned> partitionedLoopsSet(partitionedLoops.begin(),
                                                partitionedLoops.end());
   for (auto dim : llvm::seq<int64_t>(0, workgroupTileSizes.size())) {
@@ -412,7 +410,7 @@ static LogicalResult setRootConfig(FuncOp entryPointFn, linalg_ext::FftOp fftOp,
   return setOpConfigAndEntryPointFnTranslation(
       entryPointFn, fftOp, tileSizes,
       /*nativeVectorSizes=*/ArrayRef<int64_t>{},
-      IREE::HAL::DispatchLoweringPassPipeline::CPUDefault);
+      IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault);
 }
 
 /// Finds the root operation in the given list of linalg operations and sets
@@ -454,11 +452,8 @@ static LogicalResult setTranslationInfoAndRootConfig(
   for (auto computeOp : computeOps) {
     if (!hasMarker(computeOp, getWorkgroupMarker())) continue;
 
-    if (auto config = getLoweringConfig(computeOp)) {
-      // Check if the op has a preset pipeline.
-      auto passPipeline = getLoweringPassPipeline(config);
-      if (!passPipeline) continue;
-
+    if (IREE::Codegen::CompilationInfoAttr compilationInfo =
+            getCompilationInfo(computeOp)) {
       // If the function already has a translation, error out.
       if (auto translationInfo = getTranslationInfo(entryPointFn)) {
         return computeOp->emitOpError(
@@ -466,17 +461,12 @@ static LogicalResult setTranslationInfoAndRootConfig(
             "info");
       }
 
-      SmallVector<int64_t, 4> workgroupSize;
-      if (auto workgroupSizeAttr = config.workgroupSize()) {
-        workgroupSize = llvm::to_vector<4>(
-            llvm::map_range(workgroupSizeAttr, [](Attribute intAttr) {
-              return intAttr.cast<IntegerAttr>().getInt();
-            }));
-      }
-      if (failed(setOpConfigAndEntryPointFnTranslation(
-              entryPointFn, computeOp, config, *passPipeline, workgroupSize))) {
-        return failure();
-      }
+      SmallVector<int64_t> workgroupSize =
+          compilationInfo.getWorkgroupSizeVals();
+      setTranslationInfo(entryPointFn, compilationInfo.getTranslationInfo(),
+                         workgroupSize);
+      setLoweringConfig(computeOp, compilationInfo.getLoweringConfig());
+      eraseCompilationInfo(computeOp);
     }
   }
 
diff --git a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h
index dbacd1a5c3b4..afb616e8cb08 100644
--- a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h
+++ b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h
@@ -7,7 +7,7 @@
 #ifndef IREE_COMPILER_CODEGEN_LLVMCPU_KERNELDISPATCH_H_
 #define IREE_COMPILER_CODEGEN_LLVMCPU_KERNELDISPATCH_H_
 
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "mlir/IR/BuiltinOps.h"
 
 namespace mlir {
diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
index 87bd94ba4313..0732fbbe480a 100644
--- a/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
+++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp
@@ -4,10 +4,10 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
 #include "iree/compiler/Codegen/LLVMCPU/KernelDispatch.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
-#include "iree/compiler/Codegen/Utils/Utils.h"
 #include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
@@ -30,8 +30,9 @@ class LLVMCPULowerExecutableTargetPass
           LLVMCPULowerExecutableTargetPass> {
  public:
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<IREE::HAL::HALDialect, linalg::LinalgDialect,
-                    LLVM::LLVMDialect, vector::VectorDialect>();
+    registry.insert<IREE::Codegen::IREECodegenDialect, IREE::HAL::HALDialect,
+                    linalg::LinalgDialect, LLVM::LLVMDialect,
+                    vector::VectorDialect>();
   }
 
   LLVMCPULowerExecutableTargetPass(bool vectorize = true)
@@ -123,16 +124,15 @@ void LLVMCPULowerExecutableTargetPass::runOnOperation() {
     // is fine.
     llvm::StringMap<IREE::HAL::ExecutableEntryPointOp> entryPoints =
         getAllEntryPoints(moduleOp);
-    Optional<IREE::HAL::DispatchLoweringPassPipeline> passPipeline;
+    Optional<IREE::Codegen::DispatchLoweringPassPipeline> passPipeline;
     for (auto &it : entryPoints) {
       auto entryPointOp = it.second;
-      if (IREE::HAL::TranslationInfo translationInfo =
+      if (IREE::Codegen::TranslationInfoAttr translationInfo =
               getTranslationInfo(entryPointOp)) {
-        Optional<IREE::HAL::DispatchLoweringPassPipeline> currPipeline =
-            getLoweringPassPipeline(translationInfo);
-        if (!currPipeline) continue;
+        IREE::Codegen::DispatchLoweringPassPipeline currPipeline =
+            translationInfo.getDispatchLoweringPassPipeline();
         if (passPipeline) {
-          if (currPipeline.getValue() != passPipeline.getValue()) {
+          if (currPipeline != passPipeline.getValue()) {
             moduleOp.emitError(
                 "unhandled compilation of entry point function with different "
                 "pass pipelines within a module");
@@ -150,14 +150,14 @@ void LLVMCPULowerExecutableTargetPass::runOnOperation() {
       OpPassManager &nestedModulePM =
           executableLoweringPipeline.nest<ModuleOp>();
       switch (passPipeline.getValue()) {
-        case IREE::HAL::DispatchLoweringPassPipeline::CPUDefault:
-        case IREE::HAL::DispatchLoweringPassPipeline::None:
+        case IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault:
+        case IREE::Codegen::DispatchLoweringPassPipeline::None:
           addCPUDefaultPassPipeline(nestedModulePM);
           break;
-        case IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization:
+        case IREE::Codegen::DispatchLoweringPassPipeline::CPUVectorization:
           addCPUVectorizationPassPipeline(nestedModulePM, lowerToVectors);
           break;
-        case IREE::HAL::DispatchLoweringPassPipeline::CPUTensorToVectors:
+        case IREE::Codegen::DispatchLoweringPassPipeline::CPUTensorToVectors:
           addTensorToVectorsPassPipeline(nestedModulePM, lowerToVectors);
           break;
         default:
diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp
index aa1a3c69d03b..0c77693bb043 100644
--- a/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp
+++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp
@@ -4,6 +4,7 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/LLVMCPU/KernelDispatch.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
@@ -20,7 +21,7 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-#define DEBUG_TYPE "iree-linalg-to-llvm-tile-and-pad-workgroups"
+#define DEBUG_TYPE "iree-llvmcpu-tile-and-vectorize"
 
 namespace mlir {
 namespace iree_compiler {
@@ -72,15 +73,20 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() {
   MLIRContext *context = &getContext();
   auto funcOp = getOperation();
 
-  // First level of tiling patterns {
+  DEBUG_WITH_TYPE(DEBUG_TYPE, {
+    llvm::dbgs() << "\n--- Before LLVMCPUTileAndVectorizePass ---\n";
+    funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope());
+    llvm::dbgs() << "\n\n";
+  });
+
+  // First level of tiling patterns
   {
     OwningRewritePatternList l1patterns(&getContext());
     l1patterns.insert<TileWorkgroups>(
         context,
         linalg::LinalgTilingOptions().setTileSizeComputationFunction(
-            [](OpBuilder &builder,
-               Operation *operation) -> SmallVector<Value, 4> {
-              return getTileSizes(builder, operation,
+            [](OpBuilder &builder, Operation *op) -> SmallVector<Value, 4> {
+              return getTileSizes(builder, op,
                                   static_cast<unsigned>(TilingLevel::L1Tiles));
             }),
         linalg::LinalgTransformationFilter(
@@ -90,6 +96,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() {
     if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(l1patterns)))) {
       return signalPassFailure();
     }
+
+    DEBUG_WITH_TYPE(DEBUG_TYPE, {
+      llvm::dbgs() << "\n--- After first level of tiling patterns ---\n";
+      funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope());
+      llvm::dbgs() << "\n\n";
+    });
   }
 
   // Apply canoncalization
@@ -104,6 +116,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() {
             funcOp, std::move(canonicalizationPatterns)))) {
       return signalPassFailure();
     }
+
+    DEBUG_WITH_TYPE(DEBUG_TYPE, {
+      llvm::dbgs() << "\n--- After canonicalization ---\n";
+      funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope());
+      llvm::dbgs() << "\n\n";
+    });
   }
 
   // Second level of tiling patterns{
@@ -112,11 +130,9 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() {
     l2patterns.insert<TileWorkgroups>(
         context,
         linalg::LinalgTilingOptions().setTileSizeComputationFunction(
-            [](OpBuilder &builder,
-               Operation *operation) -> SmallVector<Value, 4> {
+            [](OpBuilder &builder, Operation *op) -> SmallVector<Value, 4> {
               return getTileSizes(
-                  builder, operation,
-                  static_cast<unsigned>(TilingLevel::VectorTiles));
+                  builder, op, static_cast<unsigned>(TilingLevel::VectorTiles));
             }),
         linalg::LinalgTransformationFilter(
             Identifier::get(getWorkgroupL1TileMarker(), context),
@@ -125,7 +141,14 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() {
     if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(l2patterns)))) {
       return signalPassFailure();
     }
+
+    DEBUG_WITH_TYPE(DEBUG_TYPE, {
+      llvm::dbgs() << "\n--- After second level of tiling patterns ---\n";
+      funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope());
+      llvm::dbgs() << "\n\n";
+    });
   }
+
   // Apply canoncalization
   {
     OwningRewritePatternList canonicalizationPatterns(&getContext());
@@ -138,6 +161,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() {
             funcOp, std::move(canonicalizationPatterns)))) {
       return signalPassFailure();
     }
+
+    DEBUG_WITH_TYPE(DEBUG_TYPE, {
+      llvm::dbgs() << "\n--- After canonicalization ---\n";
+      funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope());
+      llvm::dbgs() << "\n\n";
+    });
   }
 
   if (!lowerToVectors) {
@@ -156,6 +185,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() {
             funcOp, std::move(vectorizationPatterns)))) {
       return signalPassFailure();
     }
+
+    DEBUG_WITH_TYPE(DEBUG_TYPE, {
+      llvm::dbgs() << "\n--- After vectorization ---\n";
+      funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope());
+      llvm::dbgs() << "\n\n";
+    });
   }
 
   {
@@ -165,6 +200,14 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() {
                                                        context);
     (void)applyPatternsAndFoldGreedily(funcOp,
                                        std::move(canonicalizationPatterns));
+
+    DEBUG_WITH_TYPE(DEBUG_TYPE, {
+      llvm::dbgs()
+          << "\n--- After folding consumer add ops into contraction op "
+             "iteself ---\n";
+      funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope());
+      llvm::dbgs() << "\n\n";
+    });
   }
 
   // Apply vector specific operation lowering.
@@ -183,6 +226,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() {
             funcOp, std::move(vectorContractLoweringPatterns)))) {
       return signalPassFailure();
     }
+
+    DEBUG_WITH_TYPE(DEBUG_TYPE, {
+      llvm::dbgs() << "\n--- After vector specific operatrion lowering ---\n";
+      funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope());
+      llvm::dbgs() << "\n\n";
+    });
   }
 }
 
diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp
index 2fc018541ea0..53092aec2db4 100644
--- a/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp
+++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp
@@ -167,9 +167,8 @@ void LLVMCPUVectorizationPass::runOnOperation() {
     l1patterns.insert<TileWorkgroups>(
         context,
         linalg::LinalgTilingOptions().setTileSizeComputationFunction(
-            [](OpBuilder &builder,
-               Operation *operation) -> SmallVector<Value, 4> {
-              return getTileSizes(builder, operation,
+            [](OpBuilder &builder, Operation *op) -> SmallVector<Value, 4> {
+              return getTileSizes(builder, op,
                                   static_cast<unsigned>(TilingLevel::L1Tiles));
             }),
         linalg::LinalgTransformationFilter(
@@ -188,11 +187,9 @@ void LLVMCPUVectorizationPass::runOnOperation() {
     l2patterns.insert<TileWorkgroups>(
         context,
         linalg::LinalgTilingOptions().setTileSizeComputationFunction(
-            [](OpBuilder &builder,
-               Operation *operation) -> SmallVector<Value, 4> {
+            [](OpBuilder &builder, Operation *op) -> SmallVector<Value, 4> {
               return getTileSizes(
-                  builder, operation,
-                  static_cast<unsigned>(TilingLevel::VectorTiles));
+                  builder, op, static_cast<unsigned>(TilingLevel::VectorTiles));
             }),
         linalg::LinalgTransformationFilter(
             Identifier::get(getWorkgroupL1TileMarker(), context),
diff --git a/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir b/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir
index 91728ceed539..c307c9bc5261 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir
+++ b/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir
@@ -59,10 +59,11 @@ hal.executable private @matmul_tensors  {
   }
 }
 
-//  CHECK-DAG: #[[CONFIG:.+]] = {nativeVectorSize = [4, 4, 4], tileSizes = {{\[}}{{\[}}{{\]}}, [32, 32, 32], [4, 4, 4]{{\]}}}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[], [32, 32, 32], [4, 4, 4]{{\]}}, native_vector_size = [4, 4, 4]>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUTensorToVectors", workload_per_wg = [64, 64]>
 //  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
 //      CHECK: hal.executable.entry_point public @matmul_tensors
-// CHECK-SAME:   translation.info = {passPipeline = "CPUTensorToVectors", workloadPerWorkgroup = [64, 64]}
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   (%[[ARG0:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:    %[[ARG1:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:    %[[ARG2:[a-zA-Z0-9_]+]]: index)
@@ -118,11 +119,10 @@ hal.executable private @add_no_config  {
     }
   }
 }
-
-//       CHECK:  #[[CONFIG:[a-zA-Z]+]] = {passPipeline = "CPUDefault"}
-//       CHECK:  hal.executable private @add_no_config
-//       CHECK:  hal.executable.entry_point public @add_no_config
-//  CHECK-SAME:      translation.info = #[[CONFIG]]
+//      CHECK:  #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []>
+//      CHECK:  hal.executable private @add_no_config
+//      CHECK:  hal.executable.entry_point public @add_no_config
+// CHECK-SAME:      translation.info = #[[TRANSLATION]]
 
 // -----
 
@@ -192,9 +192,10 @@ hal.executable private @add  {
     }
   }
 }
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64]>
 //  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
 //      CHECK: hal.executable.entry_point public @add
-// CHECK-SAME:   translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64]}
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   (%[[ARG0:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:    %[[ARG1:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:    %[[ARG2:[a-zA-Z0-9_]+]]: index)
@@ -296,8 +297,9 @@ hal.executable private @add4D  {
     }
   }
 //  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64, 64]>
 //      CHECK: hal.executable.entry_point public @add4D
-// CHECK-SAME:   translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64, 64]}
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   (%[[ARG0:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:    %[[ARG1:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:    %[[ARG2:[a-zA-Z0-9_]+]]: index)
@@ -378,8 +380,9 @@ hal.executable private @batch_matmul_tensors  {
     }
   }
 }
-//  CHECK-DAG: #[[CONFIG:.+]] = {nativeVectorSize = [1, 4, 4, 4], tileSizes = {{\[}}[], [1, 32, 32, 32], [1, 4, 4, 4]{{\]}}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[], [1, 32, 32, 32], [1, 4, 4, 4]{{\]}}, native_vector_size = [1, 4, 4, 4]>
 //  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUTensorToVectors", workload_per_wg = [64, 64]>
 //      CHECK: hal.executable.entry_point public @batch_matmul_tensors
 // CHECK-NEXT: (%[[ARG0:[a-zA-Z0-9]+]]: index
 // CHECK-SAME:  %[[ARG1:[a-zA-Z0-9]+]]: index
@@ -393,6 +396,10 @@ hal.executable private @batch_matmul_tensors  {
 
 // -----
 
+#compilation = #iree_codegen.compilation.info<
+    #iree_codegen.lowering.config<tile_sizes = [[32, 32, 32]], native_vector_size = []>,
+    #iree_codegen.translation.info<"CPUVectorization", workload_per_wg = [32, 32]>,
+    workgroup_size = []>
 hal.executable private @preset_config_matmul_tensors  {
   hal.executable.variant @system_elf_x86_64, target = #hal.executable.target<"llvm", "system-elf-x86_64"> {
     hal.executable.entry_point @preset_config attributes {interface = @io, ordinal = 0 : index}
@@ -427,7 +434,11 @@ hal.executable private @preset_config_matmul_tensors  {
             %14 = affine.min affine_map<(d0)[s0] -> (-d0 + 512, s0)>(%arg1)[%workgroup_size_x]
             %15 = linalg.init_tensor [%13, %14] : tensor<?x?xf32>
             %16 = linalg.fill(%cst, %15) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
-            %17 = linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = {passPipeline = "CPUVectorization", tileSizes = [[32, 32, 32]]}} ins(%8, %10 : tensor<?x256xf32>, tensor<256x?xf32>) outs(%16 : tensor<?x?xf32>) -> tensor<?x?xf32>
+            %17 = linalg.matmul {
+                 __internal_linalg_transform__ = "workgroup",
+                 compilation.info = #compilation}
+                 ins(%8, %10 : tensor<?x256xf32>, tensor<256x?xf32>)
+                 outs(%16 : tensor<?x?xf32>) -> tensor<?x?xf32>
             flow.dispatch.tensor.store %17, %2, offsets = [%arg0, %arg1], sizes = [%11, %12], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:128x512xf32>
           }
         }
@@ -441,11 +452,12 @@ hal.executable private @preset_config_matmul_tensors  {
     }
   }
 }
-//  CHECK-DAG: #[[CONFIG:.+]] = {passPipeline = "CPUVectorization", tileSizes = {{\[}}[32, 32, 32]{{\]}}}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[32, 32, 32]{{\]}}, native_vector_size = []>
 //  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)>
 //  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 32)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUVectorization", workload_per_wg = [32, 32]>
 //      CHECK: hal.executable.entry_point
-// CHECK-SAME:     translation.info = {passPipeline = "CPUVectorization", workloadPerWorkgroup = [32, 32]}
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index
 //  CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:     %[[NWG_X:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]]
@@ -511,9 +523,10 @@ hal.executable @tensor_insert {
     }
   }
 }
-//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64]>
 //      CHECK: hal.executable.entry_point public @tensor_insert_slice
-// CHECK-SAME:   translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64]}
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   %[[ARG0:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
 //  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
@@ -548,12 +561,11 @@ hal.executable private @static_1d_fft_stage2  {
     }
   }
 }
-//   CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[64]]}
+//   CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[64]{{\]}}, native_vector_size = []>
 //   CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//   CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]>
 //       CHECK: hal.executable.entry_point public @static_1d_fft_stage2
-//  CHECK-SAME:   translation.info = {
-//  CHECK-SAME:     passPipeline = "CPUDefault"
-//  CHECK-SAME:     workloadPerWorkgroup = [64]}
+//  CHECK-SAME:   translation.info = #[[TRANSLATION]]
 //  CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index):
 //  CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-NEXT:   %[[T0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]]
@@ -620,12 +632,11 @@ hal.executable private @static_3d_fft_stage3  {
   }
 }
 
-//   CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[64, 64, 64]]}
+//   CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[64, 64, 64]{{\]}}, native_vector_size = []>
 //   CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//   CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64, 64]>
 //       CHECK: hal.executable.entry_point public @static_3d_fft_stage3
-//  CHECK-SAME:   translation.info = {
-//  CHECK-SAME:     passPipeline = "CPUDefault"
-//  CHECK-SAME:   workloadPerWorkgroup = [64, 64, 64]}
+//  CHECK-SAME:   translation.info = #[[TRANSLATION]]
 //  CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index):
 //  CHECK-NEXT:   %[[T0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]]
 //  CHECK-NEXT:   %[[T1:.+]] = affine.apply #[[MAP0]]()[%[[ARG1]]]
@@ -700,8 +711,9 @@ hal.executable private @outs_fusion {
     }
   }
 }
+//      CHECK: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64]>
 //      CHECK: hal.executable.entry_point public @outs_fusion_fn
-// CHECK-SAME:   translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64]}
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
 
 // -----
 
@@ -768,9 +780,10 @@ hal.executable private @conv {
     }
   }
 }
-//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64, 64]>
 //      CHECK: hal.executable.entry_point public @conv attributes
-// CHECK-SAME:     translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64, 64]}
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index)
 //  CHECK-DAG:     %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]
 //  CHECK-DAG:     %[[D1:.+]] = affine.apply #[[MAP0]]()[%[[ARG1]]
@@ -844,8 +857,9 @@ hal.executable private @conv_static {
 }
 //  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
 //  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64, 32]>
 //      CHECK: hal.executable.entry_point public @conv_static attributes
-// CHECK-SAME:     translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64, 32]}
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index)
 //  CHECK-DAG:     %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]
 //  CHECK-DAG:     %[[D1:.+]] = affine.apply #[[MAP0]]()[%[[ARG1]]
@@ -902,8 +916,9 @@ hal.executable private @generic_static {
 }
 //  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)>
 //  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [32, 8]>
 //      CHECK: hal.executable.entry_point public @generic_static attributes
-// CHECK-SAME:     translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [32, 8]}
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index)
 //  CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:     %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]
@@ -960,11 +975,12 @@ hal.executable private @matmul_static {
     }
   }
 }
-//   CHECK-DAG: #[[CONFIG:.+]] = {nativeVectorSize = [4, 4, 4], tileSizes = {{\[}}[], [28, 8, 24], [4, 4, 4]{{\]}}
+//   CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[], [28, 8, 24], [4, 4, 4]{{\]}}, native_vector_size = [4, 4, 4]>
 //   CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
 //   CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 28)>
+//   CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUTensorToVectors", workload_per_wg = [8, 28]>
 //       CHECK: hal.executable.entry_point public @matmul_static attributes
-//  CHECK-SAME:     translation.info = {passPipeline = "CPUTensorToVectors", workloadPerWorkgroup = [8, 28]}
+//  CHECK-SAME:     translation.info = #[[TRANSLATION]]
 //  CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index)
 //   CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
 //   CHECK-DAG:     %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]]
@@ -1035,8 +1051,9 @@ hal.executable private @restrict_num_workgroups {
 //   CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
 //   CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
 //   CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)>
+//   CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 8, 4]>
 //       CHECK: hal.executable.entry_point public @restrict_num_workgroups attributes
-//  CHECK-SAME:     translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 8, 4]}
+//  CHECK-SAME:     translation.info = #[[TRANSLATION]]
 //  CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index)
 //   CHECK-DAG:     %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]]
 //   CHECK-DAG:     %[[D1:.+]] = affine.apply #[[MAP1]]()[%[[ARG1]]]
@@ -1074,9 +1091,10 @@ hal.executable private @test_exp_0 {
     }
   }
 }
-//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]>
 //      CHECK: hal.executable.entry_point public @test_exp_0 attributes
-// CHECK-SAME:     translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]}
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index
 //  CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:     %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
@@ -1113,9 +1131,10 @@ hal.executable private @test_exp_1 {
     }
   }
 }
-//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECk-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]>
 //      CHECK: hal.executable.entry_point public @test_exp_1 attributes
-// CHECK-SAME:     translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]}
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index
 //  CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:     %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
@@ -1152,9 +1171,10 @@ hal.executable private @test_exp_2 {
     }
   }
 }
-//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]>
 //      CHECK: hal.executable.entry_point public @test_exp_2 attributes
-// CHECK-SAME:     translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]}
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index
 //  CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:     %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
@@ -1191,9 +1211,10 @@ hal.executable private @test_exp_3 {
     }
   }
 }
-//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]>
 //      CHECK: hal.executable.entry_point public @test_exp_3 attributes
-// CHECK-SAME:     translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]}
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index
 //  CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:     %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
@@ -1230,9 +1251,10 @@ hal.executable private @test_exp_4 {
     }
   }
 }
-//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]>
 //      CHECK: hal.executable.entry_point public @test_exp_4 attributes
-// CHECK-SAME:     translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]}
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index
 //  CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:     %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
@@ -1269,9 +1291,10 @@ hal.executable private @test_exp_5 {
     }
   }
 }
-//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]>
 //      CHECK: hal.executable.entry_point public @test_exp_5 attributes
-// CHECK-SAME:     translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]}
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index
 //  CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:     %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
diff --git a/iree/compiler/Codegen/LLVMCPU/test/matmul_vectorization.mlir b/iree/compiler/Codegen/LLVMCPU/test/matmul_vectorization.mlir
index e49121783ecb..21e0e9a74a0e 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/matmul_vectorization.mlir
+++ b/iree/compiler/Codegen/LLVMCPU/test/matmul_vectorization.mlir
@@ -1,7 +1,7 @@
 // RUN: iree-opt -pass-pipeline="hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target{use-lowering-pipeline='builtin.func(iree-llvmcpu-vectorization)'}))" -split-input-file %s | IreeFileCheck %s
 // RUN: iree-opt -pass-pipeline="hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target{use-lowering-pipeline='builtin.func(iree-llvmcpu-vectorization{promote-workgroup-to-full-tiles}),cse'}))" -split-input-file %s | IreeFileCheck %s -check-prefix=CHECK-PROMOTED
 
-#config = {nativeVectorSize = [4, 4, 4], tileSizes = [[64, 64], [32, 32, 32], [4, 4, 4]]}
+#config = #iree_codegen.lowering.config<tile_sizes = [[64, 64], [32, 32, 32], [4, 4, 4]], native_vector_size = [4, 4, 4]>
 hal.executable private @dynamic_matmul  {
   hal.interface @io {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -114,7 +114,7 @@ hal.executable private @dynamic_matmul  {
 
 // -----
 
-#config = {nativeVectorSize = [4, 4, 4], tileSizes = [[64, 64], [32, 32, 32], [4, 4, 4]]}
+#config = #iree_codegen.lowering.config<tile_sizes = [[64, 64], [32, 32, 32], [4, 4, 4]], native_vector_size = [4, 4, 4]>
 hal.executable private @matmul_i8_i8_i32  {
   hal.interface @io {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
diff --git a/iree/compiler/Codegen/LLVMCPU/test/tile_and_vectorize.mlir b/iree/compiler/Codegen/LLVMCPU/test/tile_and_vectorize.mlir
index ddc4b8fb6d15..4a51e1ad099f 100644
--- a/iree/compiler/Codegen/LLVMCPU/test/tile_and_vectorize.mlir
+++ b/iree/compiler/Codegen/LLVMCPU/test/tile_and_vectorize.mlir
@@ -1,7 +1,7 @@
 // RUN: iree-opt %s -cse -iree-llvmcpu-tile-and-vectorize -cse -canonicalize -split-input-file | IreeFileCheck %s
 
-#config0 = {tileSizes = [[64, 64]]}
-#config1 = {nativeVectorSize = [4, 4, 4], tileSizes = [[64, 64], [32, 32, 32], [4, 4, 4]]}
+#config0 = #iree_codegen.lowering.config<tile_sizes = [[64, 64]], native_vector_size = []>
+#config1 = #iree_codegen.lowering.config<tile_sizes = [[64, 64], [32, 32, 32], [4, 4, 4]], native_vector_size = [4, 4, 4]>
 #map0 = affine_map<()[s0] -> (s0 * 64)>
 #map1 = affine_map<(d0) -> (64, -d0 + 383)>
 #map2 = affine_map<(d0) -> (64, -d0 + 513)>
diff --git a/iree/compiler/Codegen/LLVMGPU/BUILD b/iree/compiler/Codegen/LLVMGPU/BUILD
index e9cc75d421fc..51adc6e585a5 100644
--- a/iree/compiler/Codegen/LLVMGPU/BUILD
+++ b/iree/compiler/Codegen/LLVMGPU/BUILD
@@ -34,6 +34,7 @@ cc_library(
     deps = [
         "//iree/compiler/Codegen:PassHeaders",
         "//iree/compiler/Codegen/Common",
+        "//iree/compiler/Codegen/Dialect:IREECodegenDialect",
         "//iree/compiler/Codegen/Transforms",
         "//iree/compiler/Codegen/Utils",
         "//iree/compiler/Dialect/Flow/IR",
diff --git a/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt b/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt
index cf63b73c3033..f3c058ed86ae 100644
--- a/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt
+++ b/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt
@@ -61,6 +61,7 @@ iree_cc_library(
     MLIRVectorToLLVM
     MLIRVectorToSCF
     iree::compiler::Codegen::Common
+    iree::compiler::Codegen::Dialect::IREECodegenDialect
     iree::compiler::Codegen::PassHeaders
     iree::compiler::Codegen::Transforms
     iree::compiler::Codegen::Utils
diff --git a/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp b/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
index fa009cd74b6f..0d1ae4e7cdbe 100644
--- a/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
+++ b/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp
@@ -8,7 +8,7 @@
 
 #include <numeric>
 
-#include "iree/compiler/Codegen/Utils/Utils.h"
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
 #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h"
 #include "llvm/Support/Debug.h"
@@ -100,7 +100,7 @@ static LogicalResult setContractConfig(FuncOp entryPoint, linalg::LinalgOp op) {
   }
   // Currently just a basic tile size to enable tiling and vectorization.
   // TODO: pick a more efficient tile size and tile at subgroup level.
-  SmallVector<int64_t, 4> ts;
+  SmallVector<int64_t> ts;
   // Tile all the higher parallel dimension with a size of 1 and the 2 most
   // inner dimension with the tileX/tileY size.
   ts.append(op.getNumParallelLoops() - 2, 1);
@@ -110,14 +110,14 @@ static LogicalResult setContractConfig(FuncOp entryPoint, linalg::LinalgOp op) {
   tileSizes.push_back(ts);  // Workgroup level.
   return setOpConfigAndEntryPointFnTranslation(
       entryPoint, op, tileSizes, /*nativeVectorSizes=*/ArrayRef<int64_t>{},
-      IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt,
+      IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt,
       workgroupSize);
 }
 
 static LogicalResult setFftConfig(FuncOp entryPoint, linalg_ext::FftOp op) {
   auto partitionedLoops = getPartitionedLoops(op);
   unsigned loopDepth = partitionedLoops.back() + 1;
-  SmallVector<int64_t, 4> workgroupTileSize(loopDepth, 0);
+  SmallVector<int64_t> workgroupTileSize(loopDepth, 0);
   SmallVector<int64_t, 3> workgroupSize = {cudaWarpSize, 1, 1};
 
   // Tiling along partitioned loops with size 1.
@@ -137,14 +137,14 @@ static LogicalResult setFftConfig(FuncOp entryPoint, linalg_ext::FftOp op) {
   TileSizesListType tileSizes = {workgroupTileSize};
   return setOpConfigAndEntryPointFnTranslation(
       entryPoint, op, tileSizes, /*nativeVectorSizes=*/ArrayRef<int64_t>{},
-      IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute,
+      IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute,
       workgroupSize);
 }
 
 // Basic default properties for linalg ops that haven't been tuned.
 static LogicalResult setRootDefaultConfig(FuncOp entryPoint, Operation *op) {
-  IREE::HAL::DispatchLoweringPassPipeline passPipeline =
-      IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute;
+  IREE::Codegen::DispatchLoweringPassPipeline passPipeline =
+      IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute;
   TileSizesListType tileSizes;
   SmallVector<unsigned> partitionedLoops = getPartitionedLoops(op);
   if (partitionedLoops.empty()) {
@@ -210,43 +210,34 @@ static LogicalResult setRootDefaultConfig(FuncOp entryPoint, Operation *op) {
   tileSizes.emplace_back(std::move(workgroupTileSizes));  // Workgroup level
   return setOpConfigAndEntryPointFnTranslation(
       entryPoint, op, tileSizes, /*nativeVectorSizes=*/ArrayRef<int64_t>{},
-      IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUVectorize, workgroupSize);
+      IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUVectorize,
+      workgroupSize);
 }
 
 /// Propagate the configuration annotated in the incoming IR.
-static LogicalResult setUserConfig(FuncOp entryPointFn, Operation *computeOp,
-                                   IREE::HAL::LoweringConfig config) {
-  IREE::HAL::DispatchLoweringPassPipeline passPipeline =
-      IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUVectorize;
-  if (auto setPassPipeline = getLoweringPassPipeline(config)) {
-    passPipeline = setPassPipeline.getValue();
-  }
-  SmallVector<int64_t, 4> workgroupSize;
-  if (auto workgroupSizeAttr = config.workgroupSize()) {
-    workgroupSize = llvm::to_vector<4>(
-        llvm::map_range(workgroupSizeAttr, [](Attribute intAttr) {
-          return intAttr.cast<IntegerAttr>().getInt();
-        }));
+static LogicalResult setUserConfig(
+    FuncOp entryPointFn, Operation *computeOp,
+    IREE::Codegen::CompilationInfoAttr compilationInfo) {
+  if (auto translationInfo = getTranslationInfo(entryPointFn)) {
+    return computeOp->emitOpError(
+        "multiple ops within dispatch trying to set the translation "
+        "info");
   }
-  if (failed(setOpConfigAndEntryPointFnTranslation(
-          entryPointFn, computeOp, config, passPipeline, workgroupSize))) {
-    return failure();
-  }
-  // Reset the op configuration to drop the pass-pipeline and workgroup size
-  // info. The op does not carry that information anymore.
-  auto resetConfig = IREE::HAL::LoweringConfig::get(
-      config.tileSizes(), config.nativeVectorSize(),
-      /*passPipeline =*/nullptr,
-      /*workgroupSize =*/nullptr, computeOp->getContext());
-  setLoweringConfig(computeOp, resetConfig);
+
+  SmallVector<int64_t> workgroupSize = compilationInfo.getWorkgroupSizeVals();
+  setTranslationInfo(entryPointFn, compilationInfo.getTranslationInfo(),
+                     workgroupSize);
+  setLoweringConfig(computeOp, compilationInfo.getLoweringConfig());
+  eraseCompilationInfo(computeOp);
   return success();
 }
 
 static LogicalResult setRootConfig(FuncOp entryPointFn, Operation *computeOp) {
-  if (IREE::HAL::LoweringConfig config = getLoweringConfig(computeOp)) {
+  if (IREE::Codegen::CompilationInfoAttr compilationInfo =
+          getCompilationInfo(computeOp)) {
     // If the op already has a lowering config coming from the IR use this and
     // bypass the heuristic.
-    return setUserConfig(entryPointFn, computeOp, config);
+    return setUserConfig(entryPointFn, computeOp, compilationInfo);
   }
   if (auto linalgOp = dyn_cast<linalg::LinalgOp>(computeOp)) {
     if (linalg::isaContractionOpInterface(linalgOp) &&
@@ -294,8 +285,9 @@ LogicalResult initGPULaunchConfig(ModuleOp moduleOp) {
       // anything. Without any compute ops, this shouldnt be using tile and
       // distribute.
       setTranslationInfo(
-          funcOp, IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute,
-          workgroupSize, workloadPerWorkgroup);
+          funcOp,
+          IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute,
+          workloadPerWorkgroup, workgroupSize);
       continue;
     }
 
@@ -330,8 +322,9 @@ LogicalResult initGPULaunchConfig(ModuleOp moduleOp) {
       // anything. Without any compute ops, this shouldnt be using tile and
       // distribute.
       setTranslationInfo(
-          funcOp, IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute,
-          {1, 1, 1}, /*workloadPerWorkgroup=*/{});
+          funcOp,
+          IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute,
+          /*workloadPerWorkgroup=*/{}, {1, 1, 1});
       continue;
     }
     if (failed(setRootConfig(funcOp, rootOperation))) continue;
@@ -342,7 +335,7 @@ LogicalResult initGPULaunchConfig(ModuleOp moduleOp) {
     // and distributed. The rest of the compilation must be structured to either
     // use `TileAndFuse` or they are independent configurations that are
     // determined based on the op.
-    IREE::HAL::LoweringConfig config = getLoweringConfig(rootOperation);
+    IREE::Codegen::LoweringConfigAttr config = getLoweringConfig(rootOperation);
     for (auto op : computeOps) {
       if (op == rootOperation) continue;
       setLoweringConfig(op, config);
diff --git a/iree/compiler/Codegen/LLVMGPU/KernelConfig.h b/iree/compiler/Codegen/LLVMGPU/KernelConfig.h
index 2717b90b7eb0..d085245cf93b 100644
--- a/iree/compiler/Codegen/LLVMGPU/KernelConfig.h
+++ b/iree/compiler/Codegen/LLVMGPU/KernelConfig.h
@@ -7,7 +7,6 @@
 #ifndef IREE_COMPILER_CODEGEN_LLVMGPU_KERNELCONFIG_H_
 #define IREE_COMPILER_CODEGEN_LLVMGPU_KERNELCONFIG_H_
 
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
 #include "mlir/IR/BuiltinOps.h"
 
 namespace mlir {
diff --git a/iree/compiler/Codegen/LLVMGPU/LLVMGPUDistributeSharedMemoryCopy.cpp b/iree/compiler/Codegen/LLVMGPU/LLVMGPUDistributeSharedMemoryCopy.cpp
index a07d472f342c..5dfd252dcb19 100644
--- a/iree/compiler/Codegen/LLVMGPU/LLVMGPUDistributeSharedMemoryCopy.cpp
+++ b/iree/compiler/Codegen/LLVMGPU/LLVMGPUDistributeSharedMemoryCopy.cpp
@@ -7,6 +7,7 @@
 #include <algorithm>
 #include <numeric>
 
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/LLVMGPU/LLVMGPUUtils.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
diff --git a/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp b/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp
index 371d33c3ee4d..5ef123937d43 100644
--- a/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp
+++ b/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp
@@ -4,10 +4,11 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/LLVMGPU/KernelConfig.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
-#include "iree/compiler/Codegen/Utils/Utils.h"
 #include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
 #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtDialect.h"
@@ -32,9 +33,9 @@ class LLVMGPULowerExecutableTargetPass
           LLVMGPULowerExecutableTargetPass> {
  public:
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<IREE::HAL::HALDialect, linalg::LinalgDialect,
-                    linalg_ext::LinalgExtDialect, vector::VectorDialect,
-                    gpu::GPUDialect>();
+    registry.insert<IREE::Codegen::IREECodegenDialect, IREE::HAL::HALDialect,
+                    linalg::LinalgDialect, linalg_ext::LinalgExtDialect,
+                    vector::VectorDialect, gpu::GPUDialect>();
   }
 
   LLVMGPULowerExecutableTargetPass() = default;
@@ -72,16 +73,15 @@ void LLVMGPULowerExecutableTargetPass::runOnOperation() {
   // is fine.
   llvm::StringMap<IREE::HAL::ExecutableEntryPointOp> entryPoints =
       getAllEntryPoints(moduleOp);
-  Optional<IREE::HAL::DispatchLoweringPassPipeline> passPipeline;
+  Optional<IREE::Codegen::DispatchLoweringPassPipeline> passPipeline;
   for (auto &it : entryPoints) {
     auto entryPointOp = it.second;
-    if (IREE::HAL::TranslationInfo translationInfo =
+    if (IREE::Codegen::TranslationInfoAttr translationInfo =
             getTranslationInfo(entryPointOp)) {
-      Optional<IREE::HAL::DispatchLoweringPassPipeline> currPipeline =
-          getLoweringPassPipeline(translationInfo);
-      if (!currPipeline) continue;
+      IREE::Codegen::DispatchLoweringPassPipeline currPipeline =
+          translationInfo.getDispatchLoweringPassPipeline();
       if (passPipeline) {
-        if (currPipeline.getValue() != passPipeline.getValue()) {
+        if (currPipeline != passPipeline.getValue()) {
           moduleOp.emitError(
               "unhandled compilation of entry point function with different "
               "pass pipelines within a module");
@@ -98,13 +98,13 @@ void LLVMGPULowerExecutableTargetPass::runOnOperation() {
   if (!testLoweringConfiguration && passPipeline.hasValue()) {
     OpPassManager &nestedModulePM = executableLoweringPipeline.nest<ModuleOp>();
     switch (*passPipeline) {
-      case IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute:
+      case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute:
         addGPUSimpleDistributePassPipeline(nestedModulePM);
         break;
-      case IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUVectorize:
+      case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUVectorize:
         addGPUVectorizationPassPipeline(nestedModulePM);
         break;
-      case IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt:
+      case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt:
         addGPUMatmulSimtPassPipeline(nestedModulePM);
         break;
       default:
diff --git a/iree/compiler/Codegen/LLVMGPU/LLVMGPURemoveTrivialLoops.cpp b/iree/compiler/Codegen/LLVMGPU/LLVMGPURemoveTrivialLoops.cpp
index 383b74dd697e..df0e622bcaa1 100644
--- a/iree/compiler/Codegen/LLVMGPU/LLVMGPURemoveTrivialLoops.cpp
+++ b/iree/compiler/Codegen/LLVMGPU/LLVMGPURemoveTrivialLoops.cpp
@@ -4,6 +4,7 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/LLVMGPU/LLVMGPUUtils.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
@@ -77,11 +78,8 @@ static SmallVector<int64_t> getNumWorkgroup(
   auto translationInfo = getTranslationInfo(entryPointOp);
   if (!translationInfo) return SmallVector<int64_t>();
 
-  ArrayAttr workloadPerWorkgroupAttr = translationInfo.workloadPerWorkgroup();
-  if (!workloadPerWorkgroupAttr) return SmallVector<int64_t>();
-  auto workloadPerWorkgroup = llvm::to_vector<4>(llvm::map_range(
-      workloadPerWorkgroupAttr,
-      [](Attribute attr) { return attr.cast<IntegerAttr>().getInt(); }));
+  SmallVector<int64_t> workloadPerWorkgroup =
+      translationInfo.getWorkloadPerWorkgroupVals();
   if (workloadSize.size() != workloadPerWorkgroup.size())
     return SmallVector<int64_t>();
   SmallVector<int64_t> numWorkgroups;
diff --git a/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp b/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp
index 4409c334ab70..2a701f8d165c 100644
--- a/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp
+++ b/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp
@@ -4,14 +4,13 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/LLVMGPU/KernelConfig.h"
 #include "iree/compiler/Codegen/LLVMGPU/LLVMGPUUtils.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
 #include "iree/compiler/Codegen/Transforms/Transforms.h"
 #include "iree/compiler/Codegen/Utils/MarkerUtils.h"
-#include "iree/compiler/Codegen/Utils/Utils.h"
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
 #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h"
 #include "iree/compiler/Dialect/LinalgExt/Transforms/Transforms.h"
 #include "iree/compiler/Dialect/Util/IR/UtilOps.h"
@@ -38,19 +37,14 @@ static void populateTilingReductionPatterns(
   auto tileSizesFn = [&](OpBuilder &builder,
                          Operation *op) -> SmallVector<Value, 4> {
     SmallVector<unsigned> partitionedLoops = getPartitionedLoops(op);
-    SmallVector<int64_t, 4> tileSizes = getTileSizes(op, 0);
-    Location loc = op->getLoc();
-    auto tileSizesVal =
-        llvm::to_vector<4>(llvm::map_range(tileSizes, [&](int64_t v) -> Value {
-          return builder.create<arith::ConstantIndexOp>(loc, v);
-        }));
-    auto zero = builder.create<arith::ConstantIndexOp>(loc, 0);
+    SmallVector<Value, 4> tileSizes = getTileSizes(builder, op, 0);
+    auto zero = builder.create<arith::ConstantIndexOp>(op->getLoc(), 0);
     for (unsigned depth : partitionedLoops) {
-      if (depth < tileSizesVal.size()) {
-        tileSizesVal[depth] = zero;
+      if (depth < tileSizes.size()) {
+        tileSizes[depth] = zero;
       }
     }
-    return tileSizesVal;
+    return tileSizes;
   };
 
   auto tilingOptions = linalg::LinalgTilingOptions()
@@ -69,8 +63,8 @@ static void populateTilingReductionPatterns(
 /// Patterns for thread level tiling.
 static void populateTilingToInvocationPatterns(
     MLIRContext *context, OwningRewritePatternList &patterns,
-    SmallVector<int64_t, 4> &workgroupSize,
-    SmallVector<int64_t, 4> &workloadPerWorkgroup) {
+    SmallVectorImpl<int64_t> &workgroupSize,
+    SmallVectorImpl<int64_t> &workloadPerWorkgroup) {
   linalg::TileSizeComputationFunction getInnerTileSizeFn =
       [&](OpBuilder &builder, Operation *operation) {
         SmallVector<Value, 4> tileSizesVal;
@@ -95,7 +89,7 @@ static void populateTilingToInvocationPatterns(
         return tileSizesVal;
       };
 
-  auto getThreadProcInfoFn = [workgroupSize](
+  auto getThreadProcInfoFn = [&workgroupSize](
                                  OpBuilder &builder, Location loc,
                                  ArrayRef<Range> parallelLoopRanges) {
     return getGPUThreadIdsAndCounts(builder, loc, parallelLoopRanges.size(),
@@ -240,11 +234,8 @@ struct LLVMGPUTileAndDistributePass
     auto workgroupSize = llvm::to_vector<4>(llvm::map_range(
         getEntryPoint(funcOp).workgroup_size().getValue(),
         [&](Attribute attr) { return attr.cast<IntegerAttr>().getInt(); }));
-    auto workloadPerWorkgroup = llvm::to_vector<4>(llvm::map_range(
-        getTranslationInfo(getEntryPoint(funcOp))
-            .workloadPerWorkgroup()
-            .getValue(),
-        [&](Attribute attr) { return attr.cast<IntegerAttr>().getInt(); }));
+    auto workloadPerWorkgroup =
+        getTranslationInfo(getEntryPoint(funcOp)).getWorkloadPerWorkgroupVals();
 
     int64_t flatWorkgroupSize =
         workgroupSize[0] * workgroupSize[1] * workgroupSize[2];
diff --git a/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir b/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir
index 2b7e6da2a411..e7aaf8d7a374 100644
--- a/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir
+++ b/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir
@@ -1,6 +1,7 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-llvmgpu-tile-and-distribute))))' %s | IreeFileCheck %s
 
-#config = {tileSizes = [[2, 256, 4]]}
+#config = #iree_codegen.lowering.config<tile_sizes = [[2, 256, 4]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"LLVMGPUMatmulSimt", workload_per_wg = [256, 2]>
 #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb">
 #map0 = affine_map<()[s0] -> (s0 * 2)>
 #map1 = affine_map<()[s0] -> (s0 * 256)>
@@ -12,7 +13,7 @@ hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb {
   hal.executable.entry_point @dot_dispatch_0 attributes {
     interface = @legacy_io,
     ordinal = 0 : index,
-    translation.info = {passPipeline = "LLVMGPUMatmulSimt" : i32, workloadPerWorkgroup = [256, 2]},
+    translation.info = #translation,
     workgroup_size = [64 : index, 1 : index, 1 : index]}
   builtin.module  {
     builtin.func @dot_dispatch_0() {
@@ -86,14 +87,15 @@ hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb {
 
 // -----
 
-#config = {tileSizes = [[]]}
+#config = #iree_codegen.lowering.config<tile_sizes = [[]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"LLVMGPUVectorize", workload_per_wg = []>
 // Pure reducion case, skip tiling.
 hal.executable @reduction_dispatch {
 hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
     hal.executable.entry_point @predict_dispatch_153 attributes {
       interface = @io,
       ordinal = 0 : index,
-      translation.info = {passPipeline = "LLVMGPUVectorize" : i32},
+      translation.info = #translation,
       workgroup_size = [1: index, 1: index, 1: index]}
     builtin.module  {
       builtin.func @predict_dispatch_153() {
@@ -120,7 +122,7 @@ hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvpt
     }
   }
 }
-//      CHECK: #[[CONFIG:.+]] = {tileSizes = {{\[}}[]{{\]}}}
+//      CHECK: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[]{{\]}}, native_vector_size = []>
 //      CHECK: hal.executable public @reduction_dispatch
 //      CHECK: linalg.fill
 // CHECK-SAME:     lowering.config = #[[CONFIG]]
diff --git a/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir b/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
index b67847a345eb..8690542645b8 100644
--- a/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
+++ b/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir
@@ -33,11 +33,11 @@ hal.executable @add_dispatch_0 {
   }
 }
 
-//  CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[256]{{\]}}}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[256]{{\]}}, native_vector_size = []>
 //  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 256)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUVectorize", workload_per_wg = [256]>
 //      CHECK: hal.executable.entry_point public @add_dispatch_0
-// CHECK-SAME:     passPipeline = "LLVMGPUVectorize"
-// CHECK-SAME:     workloadPerWorkgroup = [256]
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-SAME:     workgroup_size = [64 : index, 1 : index, 1 : index]
 // CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index,
 //  CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
@@ -92,12 +92,12 @@ hal.executable private @dot_dispatch_1  {
     }
   }
 }
-//  CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[4, 2, 4]{{\]}}}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[4, 2, 4]{{\]}}, native_vector_size = []>
 //  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)>
 //  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUMatmulSimt", workload_per_wg = [2, 4]>
 //      CHECK: hal.executable.entry_point public @dot_dispatch_1
-// CHECK-SAME:     passPipeline = "LLVMGPUMatmulSimt"
-// CHECK-SAME:     workloadPerWorkgroup = [2, 4]
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-SAME:     workgroup_size = [2 : index, 4 : index, 1 : index]
 // CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index,
 //  CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
@@ -143,18 +143,18 @@ hal.executable @reduction_dispatch {
   }
 }
 
-//  CHECK-DAG: #[[CONFIG0:.+]] = {passPipeline = "LLVMGPUDistribute"}
-//  CHECK-DAG: #[[CONFIG1:.+]] = {tileSizes = {{\[}}[]{{\]}}}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = []>
 //      CHECK: hal.executable.entry_point public @predict_dispatch_153
-// CHECK-SAME:     translation.info = #[[CONFIG0]]
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-SAME:     workgroup_size = [1 : index, 1 : index, 1 : index]
 // CHECK-NEXT:   ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index,
 //  CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
 //      CHECK:     hal.return %[[C1]], %[[C1]], %[[C1]]
 //      CHECK: linalg.fill
-// CHECK-SAME:   lowering.config = #[[CONFIG1]]
+// CHECK-SAME:   lowering.config = #[[CONFIG]]
 //      CHECK: linalg.generic
-// CHECK-SAME:   lowering.config = #[[CONFIG1]]
+// CHECK-SAME:   lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -199,9 +199,10 @@ hal.executable @tensor_insert {
     }
   }
 }
-//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 128)>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 128)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [128, 1]>
 //      CHECK: hal.executable.entry_point public @tensor_insert_slice
-// CHECK-SAME:   translation.info = {passPipeline = "LLVMGPUDistribute", workloadPerWorkgroup = [128, 1]}
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   %[[ARG0:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
 //  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
@@ -246,10 +247,11 @@ hal.executable @tensor_insert {
     }
   }
 }
-//  CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[1, 256]{{\]}}}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[1, 256]{{\]}}, native_vector_size = []>
 //  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 256)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUVectorize", workload_per_wg = [256, 1]>
 //      CHECK: hal.executable.entry_point public @tensor_insert_slice
-// CHECK-SAME:   translation.info = {passPipeline = "LLVMGPUVectorize", workloadPerWorkgroup = [256, 1]}
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   %[[ARG0:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
 //  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
@@ -286,11 +288,11 @@ hal.executable private @static_1d_fft_stage2  {
   }
 }
 
-//   CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[4]]}
+//   CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[4]{{\]}}, native_vector_size = []>
 //   CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)>
+//   CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [4]>
 //       CHECK: hal.executable.entry_point public @static_1d_fft_stage2
-//  CHECK-SAME:   translation.info = {passPipeline = "LLVMGPUDistribute"
-//  CHECK-SAME:   workloadPerWorkgroup = [4]}
+//  CHECK-SAME:   translation.info = #[[TRANSLATION]]
 //  CHECK-SAME:   workgroup_size = [32 : index, 1 : index, 1 : index]
 //  CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %{{.+}}: index, %{{.+}}: index):
 //  CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
@@ -351,11 +353,11 @@ hal.executable private @static_3d_fft_stage3  {
   }
 }
 
-//   CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[1, 1, 8]]}
+//   CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[1, 1, 8]{{\]}}, native_vector_size = []>
 //   CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
+//   CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [8, 1, 1]>
 //       CHECK: hal.executable.entry_point public @static_3d_fft_stage3
-//  CHECK-SAME:   translation.info = {passPipeline = "LLVMGPUDistribute"
-//  CHECK-SAME:   workloadPerWorkgroup = [8, 1, 1]}
+//  CHECK-SAME:   translation.info = #[[TRANSLATION]]
 //  CHECK-SAME:   workgroup_size = [32 : index, 1 : index, 1 : index]
 //  CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index):
 //  CHECK-NEXT:   %[[T:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]]
@@ -367,6 +369,10 @@ hal.executable private @static_3d_fft_stage3  {
 
 // -----
 
+#compilation = #iree_codegen.compilation.info<
+    #iree_codegen.lowering.config<tile_sizes = [[32, 256, 64]], native_vector_size = []>,
+    #iree_codegen.translation.info<"LLVMGPUMatmulSimt", workload_per_wg = [256, 32]>,
+    workgroup_size = [16, 8, 1]>
 hal.executable @user_config {
 hal.executable.variant public @cuda_nvptx_fb, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
   hal.executable.entry_point public @_lowering_config_test_dispatch_1 attributes {interface = @io, ordinal = 0 : index}
@@ -401,7 +407,7 @@ hal.executable.variant public @cuda_nvptx_fb, target = #hal.executable.target<"c
           %14 = affine.min affine_map<(d0)[s0] -> (-d0 + 1024, s0)>(%arg1)[%workgroup_size_x]
           %15 = linalg.init_tensor [%13, %14] : tensor<?x?xf32>
           %16 = linalg.fill(%cst, %15) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
-          %17 = linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = {passPipeline = "LLVMGPUMatmulSimt", tileSizes = [[32, 256, 64]], workgroupSize = [16, 8, 1]}} ins(%8, %10 : tensor<?x256xf32>, tensor<256x?xf32>) outs(%16 : tensor<?x?xf32>) -> tensor<?x?xf32>
+          %17 = linalg.matmul {__internal_linalg_transform__ = "workgroup", compilation.info = #compilation} ins(%8, %10 : tensor<?x256xf32>, tensor<256x?xf32>) outs(%16 : tensor<?x?xf32>) -> tensor<?x?xf32>
           flow.dispatch.tensor.store %17, %2, offsets = [%arg0, %arg1], sizes = [%11, %12], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:128x1024xf32>
         }
       }
@@ -416,10 +422,10 @@ hal.executable.variant public @cuda_nvptx_fb, target = #hal.executable.target<"c
 }
 }
 
-//  CHECK-DAG: #[[CONFIG:.+]] = {{{.*}}tileSizes = {{\[}}[32, 256, 64]{{\]}}}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[32, 256, 64]{{\]}}
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUMatmulSimt", workload_per_wg = [256, 32]>
 //      CHECK: hal.executable.entry_point public @_lowering_config_test_dispatch_1
-// CHECK-SAME:     passPipeline = "LLVMGPUMatmulSimt"
-// CHECK-SAME:     workloadPerWorkgroup = [256, 32]
+// CHECK-SAME:     translation.info = #[[TRANSLATION]]
 // CHECK-SAME:     workgroup_size = [16 : index, 8 : index, 1 : index]
 //      CHECK: func @_lowering_config_test_dispatch_1
 //      CHECK:   linalg.fill
diff --git a/iree/compiler/Codegen/LLVMGPU/test/remove_loops.mlir b/iree/compiler/Codegen/LLVMGPU/test/remove_loops.mlir
index 46b6931e93c2..bed9ee396197 100644
--- a/iree/compiler/Codegen/LLVMGPU/test/remove_loops.mlir
+++ b/iree/compiler/Codegen/LLVMGPU/test/remove_loops.mlir
@@ -42,12 +42,13 @@ hal.executable private @dispatch_0  {
 // -----
 
 // CHECK-LABEL: func @workgroup_tile_loop()
+#translation = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [32]>
 hal.executable private @workgroup_tile_loop  {
   hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
     hal.executable.entry_point @workgroup_tile_loop attributes {
       interface = @io,
       ordinal = 0 : index,
-      translation.info = {passPipeline = "LLVMGPUDistribute", workloadPerWorkgroup = [32]}
+      translation.info = #translation
     }
     builtin.module {
       builtin.func @workgroup_tile_loop() {
@@ -71,12 +72,13 @@ hal.executable private @workgroup_tile_loop  {
 // -----
 
 // CHECK-LABEL: func @workgroup_tile_loop_negative()
+#translation = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [16]>
 hal.executable private @workgroup_tile_loop_negative  {
   hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> {
     hal.executable.entry_point @workgroup_tile_loop_negative attributes {
       interface = @io,
       ordinal = 0 : index,
-      translation.info = {passPipeline = "LLVMGPUDistribute", workloadPerWorkgroup = [16]}
+      translation.info = #translation
     }
     builtin.module {
       builtin.func @workgroup_tile_loop_negative() {
diff --git a/iree/compiler/Codegen/Passes.h b/iree/compiler/Codegen/Passes.h
index b513b99e292a..d550517c3a32 100644
--- a/iree/compiler/Codegen/Passes.h
+++ b/iree/compiler/Codegen/Passes.h
@@ -10,7 +10,6 @@
 #include <memory>
 
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassOptions.h"
@@ -235,10 +234,6 @@ std::unique_ptr<OperationPass<FuncOp>> createLLVMGPUPipeliningPass();
 /// distribution to threads without vectorization.
 void addSPIRVTileAndDistributePassPipeline(OpPassManager &pm);
 
-/// Pass pipeline to lower IREE HAL executables that contain Linalg ops that are
-/// not tiled/distributed. Performs distribution to global invocations.
-void addSPIRVDistributeToGlobalIDPassPipeline(OpPassManager &pm);
-
 /// Pass pipeline to lower IREE HAL executables with workgroup tiled and
 /// distributed Linalg ops to SPIR-V scalar and vector code. Additionally
 /// performs distribution to threads with vectorization.
@@ -256,9 +251,6 @@ void addSPIRVTileAndVectorizeToCooperativeOpsPassPipeline(OpPassManager &pm);
 /// corresponding SPIR-V ops.
 std::unique_ptr<OperationPass<ModuleOp>> createConvertToSPIRVPass();
 
-/// Pass to distribute Linalg ops with buffer semantics to global invocations.
-std::unique_ptr<OperationPass<FuncOp>> createSPIRVDistributeToGlobalIDPass();
-
 /// Creates a pass to fold processor ID uses where possible.
 std::unique_ptr<OperationPass<FuncOp>> createSPIRVFoldProcessorIDUsesPass();
 
diff --git a/iree/compiler/Codegen/Passes.td b/iree/compiler/Codegen/Passes.td
index eba35a657f71..83e376fb5898 100644
--- a/iree/compiler/Codegen/Passes.td
+++ b/iree/compiler/Codegen/Passes.td
@@ -206,15 +206,6 @@ def ConvertToSPIRV : Pass<"iree-convert-to-spirv", "ModuleOp"> {
   let constructor = "mlir::iree_compiler::createConvertToSPIRVPass()";
 }
 
-// TODO: Rename argument to be fully qualified.
-def SPIRVDistributeToGlobalID :
-    Pass<"iree-spirv-distribute-to-global-id", "FuncOp"> {
-  let summary = "Distribute Linalg ops with buffer semantics to global "
-                "invocations";
-  let constructor =
-      "mlir::iree_compiler::createSPIRVDistributeToGlobalIDPass()";
-}
-
 // TODO: Rename argument to be fully qualified.
 def SPIRVFoldProcessorIDUses :
     Pass<"iree-spirv-fold-gpu-procid-uses", "FuncOp"> {
diff --git a/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp b/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp
index 38240a87e672..877cac71d713 100644
--- a/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp
+++ b/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp
@@ -13,7 +13,9 @@
 #include <array>
 
 #include "iree/compiler/Codegen/SPIRV/KernelConfig.h"
+#include "llvm/ADT/TypeSwitch.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/IR/BuiltinOps.h"
 
 namespace mlir {
 namespace iree_compiler {
diff --git a/iree/compiler/Codegen/SPIRV/BUILD b/iree/compiler/Codegen/SPIRV/BUILD
index a63331f7d242..c4dea5d497e2 100644
--- a/iree/compiler/Codegen/SPIRV/BUILD
+++ b/iree/compiler/Codegen/SPIRV/BUILD
@@ -20,7 +20,6 @@ cc_library(
         "NVIDIAConfig.cpp",
         "Passes.cpp",
         "SPIRVCopyToWorkgroupMemory.cpp",
-        "SPIRVDistributeToGlobalID.cpp",
         "SPIRVFoldGPUProcessorIDUses.cpp",
         "SPIRVLowerExecutableTargetPass.cpp",
         "SPIRVRemoveOneTripTiledLoops.cpp",
@@ -39,6 +38,7 @@ cc_library(
     deps = [
         "//iree/compiler/Codegen:PassHeaders",
         "//iree/compiler/Codegen/Common",
+        "//iree/compiler/Codegen/Dialect:IREECodegenDialect",
         "//iree/compiler/Codegen/Transforms",
         "//iree/compiler/Codegen/Utils",
         "//iree/compiler/Dialect/Flow/IR",
diff --git a/iree/compiler/Codegen/SPIRV/CMakeLists.txt b/iree/compiler/Codegen/SPIRV/CMakeLists.txt
index f6bbb282df2e..0c0bd96a15a3 100644
--- a/iree/compiler/Codegen/SPIRV/CMakeLists.txt
+++ b/iree/compiler/Codegen/SPIRV/CMakeLists.txt
@@ -25,7 +25,6 @@ iree_cc_library(
     "NVIDIAConfig.cpp"
     "Passes.cpp"
     "SPIRVCopyToWorkgroupMemory.cpp"
-    "SPIRVDistributeToGlobalID.cpp"
     "SPIRVFoldGPUProcessorIDUses.cpp"
     "SPIRVLowerExecutableTargetPass.cpp"
     "SPIRVRemoveOneTripTiledLoops.cpp"
@@ -71,6 +70,7 @@ iree_cc_library(
     MLIRVectorInterfaces
     MLIRVectorToSPIRV
     iree::compiler::Codegen::Common
+    iree::compiler::Codegen::Dialect::IREECodegenDialect
     iree::compiler::Codegen::PassHeaders
     iree::compiler::Codegen::Transforms
     iree::compiler::Codegen::Utils
diff --git a/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp b/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp
index a956c04c08c4..4f9253a2b317 100644
--- a/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp
+++ b/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp
@@ -15,11 +15,11 @@
 
 #include <tuple>
 
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
 #include "iree/compiler/Codegen/SPIRV/Utils.h"
 #include "iree/compiler/Codegen/Utils/MarkerUtils.h"
-#include "iree/compiler/Codegen/Utils/Utils.h"
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
 #include "iree/compiler/Dialect/Util/IR/UtilOps.h"
 #include "llvm/ADT/DenseMapInfo.h"
diff --git a/iree/compiler/Codegen/SPIRV/KernelConfig.cpp b/iree/compiler/Codegen/SPIRV/KernelConfig.cpp
index 612947b40c09..d2bb4e14d20a 100644
--- a/iree/compiler/Codegen/SPIRV/KernelConfig.cpp
+++ b/iree/compiler/Codegen/SPIRV/KernelConfig.cpp
@@ -6,11 +6,10 @@
 
 #include "iree/compiler/Codegen/SPIRV/KernelConfig.h"
 
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/SPIRV/Utils.h"
 #include "iree/compiler/Codegen/Transforms/Transforms.h"
 #include "iree/compiler/Codegen/Utils/MarkerUtils.h"
-#include "iree/compiler/Codegen/Utils/Utils.h"
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
 #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
@@ -18,6 +17,7 @@
 #include "mlir/Dialect/SPIRV/IR/SPIRVAttributes.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h"
 #include "mlir/Dialect/SPIRV/IR/TargetAndABI.h"
+#include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Matchers.h"
 
 #define DEBUG_TYPE "iree-spirv-kernel-config"
@@ -25,37 +25,6 @@
 namespace mlir {
 namespace iree_compiler {
 
-//===----------------------------------------------------------------------===//
-// Utilities
-//===----------------------------------------------------------------------===//
-
-/// Defines the workgroup count region on entry point ops for the
-/// `SPIRVDistributeToGlobalID` pipeline.
-// TODO(ravishankarm): Remove this when that pipeline is deprecated.
-static LogicalResult setTranslationUsingDistributeToGlobalId(
-    FuncOp funcOp, ArrayRef<int64_t> workgroupSize) {
-  auto entryPointOp = getEntryPoint(funcOp);
-  MLIRContext *context = entryPointOp.getContext();
-  auto translationInfo = buildTranslationInfo(
-      IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistributeToGlobalID,
-      /*workloadPerWorkgroup =*/{}, context);
-  setTranslationInfo(entryPointOp, translationInfo, workgroupSize);
-  OpBuilder builder(context);
-  int64_t workgroupSizeX = workgroupSize[0];
-  auto numWorkgroupsFn = [workgroupSizeX](OpBuilder &b, Location loc,
-                                          std::array<Value, 3> workload) {
-    AffineExpr e1, e2, e3;
-    bindSymbols(b.getContext(), e1, e2, e3);
-    AffineExpr expr = e1 * e2 * e3;
-    expr = expr.ceilDiv(workgroupSizeX);
-    Value numWorkgroupsX = linalg::applyMapToValues(
-        b, loc, AffineMap::get(0, 3, expr), workload)[0];
-    Value one = b.create<arith::ConstantIndexOp>(loc, 1);
-    return std::array<Value, 3>{numWorkgroupsX, one, one};
-  };
-  return defineWorkgroupCountRegion(builder, funcOp, numWorkgroupsFn);
-}
-
 //===----------------------------------------------------------------------===//
 // Convolution Default Configuration
 //===----------------------------------------------------------------------===//
@@ -107,9 +76,9 @@ LogicalResult setConvOpConfig(linalg::LinalgOp linalgOp,
   int64_t residualThreads = subgroupSize;
   int64_t residualTilingFactor = bestTilingFactor;
 
-  SmallVector<int64_t, 3> workgroupSize(3, 1);        // (X, Y, Z)
-  SmallVector<int64_t, 4> workgroupTileSizes(4, 0);   // (N, OH, OW, OC)
-  SmallVector<int64_t, 4> invocationTileSizes(4, 0);  // (N, OH, OW, OC)
+  SmallVector<int64_t, 3> workgroupSize(3, 1);     // (X, Y, Z)
+  SmallVector<int64_t> workgroupTileSizes(4, 0);   // (N, OH, OW, OC)
+  SmallVector<int64_t> invocationTileSizes(4, 0);  // (N, OH, OW, OC)
 
   // Deduce the configuration for the OC dimension.
   for (int64_t x = residualThreads; x >= 2; x >>= 1) {
@@ -181,7 +150,7 @@ LogicalResult setConvOpConfig(linalg::LinalgOp linalgOp,
     }
   }
 
-  auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize;
+  auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize;
   TileSizesListType tileSizes;
   tileSizes.push_back(workgroupTileSizes);
   tileSizes.push_back(invocationTileSizes);
@@ -244,10 +213,10 @@ LogicalResult setMatmulOpConfig(linalg::LinalgOp op,
   int64_t residualThreads = bestX * bestY;
   int64_t residualTilingFactor = (bestThreadM + bestThreadK) * bestThreadN;
 
-  SmallVector<int64_t, 3> workgroupSize(3, 1);               // (X, Y, Z)
-  SmallVector<int64_t, 4> workgroupTileSizes(2 + isBM, 0);   // (B, M, N)
-  SmallVector<int64_t, 4> invocationTileSizes(2 + isBM, 0);  // (B, M, N)
-  SmallVector<int64_t, 4> reductionTileSizes(3 + isBM, 0);   // (B, M, N, K)
+  SmallVector<int64_t, 3> workgroupSize(3, 1);            // (X, Y, Z)
+  SmallVector<int64_t> workgroupTileSizes(2 + isBM, 0);   // (B, M, N, K)
+  SmallVector<int64_t> invocationTileSizes(2 + isBM, 0);  // (B, M, N, K)
+  SmallVector<int64_t> reductionTileSizes(3 + isBM, 0);   // (B, M, N, K)
 
   if (isBM) workgroupTileSizes[0] = invocationTileSizes[0] = 1;
 
@@ -302,7 +271,7 @@ LogicalResult setMatmulOpConfig(linalg::LinalgOp op,
   }
   if (reductionTileSizes[2 + isBM] == 0) return success();
 
-  auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize;
+  auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize;
   TileSizesListType tileSizes;
   tileSizes.push_back(workgroupTileSizes);
   tileSizes.push_back(invocationTileSizes);
@@ -321,13 +290,13 @@ LogicalResult setMatmulOpConfig(linalg::LinalgOp op,
 static LogicalResult setOpConfig(spirv::ResourceLimitsAttr limits,
                                  linalg_ext::FftOp op) {
   const int64_t subgroupSize = limits.subgroup_size().getValue().getSExtValue();
-  auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistribute;
+  auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistribute;
 
   std::array<int64_t, 3> workgroupSize = {subgroupSize, 1, 1};
 
   auto partitionedLoops = getPartitionedLoops(op);
   unsigned loopDepth = partitionedLoops.back() + 1;
-  SmallVector<int64_t, 4> workgroupTileSize(loopDepth, 0);
+  SmallVector<int64_t> workgroupTileSize(loopDepth, 0);
 
   // Tiling along partitioned loops with size 1.
   for (int64_t loopIndex : partitionedLoops) {
@@ -357,7 +326,7 @@ static LogicalResult setDefaultOpConfig(spirv::ResourceLimitsAttr limits,
                                         Operation *op) {
   auto partitionedLoops = getPartitionedLoops(op);
   if (partitionedLoops.empty()) {
-    auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize;
+    auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize;
     std::array<int64_t, 3> workgroupSize = {1, 1, 1};
     auto funcOp = op->getParentOfType<FuncOp>();
     return setOpConfigAndEntryPointFnTranslation(funcOp, op, {}, {}, pipeline,
@@ -367,7 +336,7 @@ static LogicalResult setDefaultOpConfig(spirv::ResourceLimitsAttr limits,
   const int64_t subgroupSize = limits.subgroup_size().getValue().getSExtValue();
   int64_t numElementsPerWorkgroup = subgroupSize;
   int64_t numElementsPerThread = 1;
-  auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistribute;
+  auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistribute;
 
   // Returns true if the given `operand` has 32-bit element type.
   auto has32BitElementType = [](Value operand) {
@@ -415,15 +384,15 @@ static LogicalResult setDefaultOpConfig(spirv::ResourceLimitsAttr limits,
 
     if (vectorize) {
       numElementsPerThread = numElementsPerWorkgroup / subgroupSize;
-      pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize;
+      pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize;
     }
   }
 
   std::array<int64_t, 3> workgroupSize = {subgroupSize, 1, 1};
 
   unsigned loopDepth = partitionedLoops.back() + 1;
-  SmallVector<int64_t, 4> workgroupTileSize(loopDepth, 0);
-  SmallVector<int64_t, 4> threadTileSize(loopDepth, 0);
+  SmallVector<int64_t> workgroupTileSize(loopDepth, 0);
+  SmallVector<int64_t> threadTileSize(loopDepth, 0);
 
   // Tiling along partitioned loops with size 1.
   for (int64_t loopIndex : partitionedLoops) {
@@ -534,24 +503,7 @@ LogicalResult initSPIRVLaunchConfig(ModuleOp module) {
       return funcOp.emitOpError("failed to get compute ops");
     }
 
-    int64_t subgroupSize = limits.subgroup_size().getValue().getSExtValue();
-
-    // If the dispatch region does not contain tiled and distributed Linalg ops,
-    // invoke the pipeline to distribute to global invocations.
-    if (tiledLoops.empty() && llvm::none_of(computeOps, [](Operation *op) {
-          return hasMarker(op, getWorkgroupMarker());
-        })) {
-      std::array<int64_t, 3> workgroupSize = {subgroupSize, 1, 1};
-      if (failed(
-              setTranslationUsingDistributeToGlobalId(funcOp, workgroupSize))) {
-        return computeOps[0]->emitOpError(
-            "failed to set translation info for distributing to global IDs");
-      }
-      continue;
-    }
-
     Operation *rootOperation = nullptr;
-
     // Try to find a configuration according to a matmul/convolution op and use
     // it as the root op.
     for (Operation *computeOp : computeOps) {
@@ -597,8 +549,9 @@ LogicalResult initSPIRVLaunchConfig(ModuleOp module) {
         SmallVector<int64_t> workloadPerWorkgroup(tiledLoops.size(), 1);
         workloadPerWorkgroup.front() = subgroupSize * 4;
         setTranslationInfo(
-            funcOp, IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistribute,
-            workgroupSize, workloadPerWorkgroup);
+            funcOp,
+            IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistribute,
+            workloadPerWorkgroup, workgroupSize);
         return success();
       }
       return funcOp.emitError("contains no root Linalg operation");
@@ -610,7 +563,7 @@ LogicalResult initSPIRVLaunchConfig(ModuleOp module) {
     // and distributed. The rest of the compilation must be structured to either
     // use `TileAndFuse` or they are independent configurations that are
     // determined based on the op.
-    IREE::HAL::LoweringConfig config = getLoweringConfig(rootOperation);
+    IREE::Codegen::LoweringConfigAttr config = getLoweringConfig(rootOperation);
     for (auto op : computeOps) {
       if (op == rootOperation) continue;
       setLoweringConfig(op, config);
diff --git a/iree/compiler/Codegen/SPIRV/KernelConfig.h b/iree/compiler/Codegen/SPIRV/KernelConfig.h
index c0d4f31c8f24..81858940884d 100644
--- a/iree/compiler/Codegen/SPIRV/KernelConfig.h
+++ b/iree/compiler/Codegen/SPIRV/KernelConfig.h
@@ -17,9 +17,9 @@
 
 #include <array>
 
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
 #include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h"
 #include "mlir/Dialect/SPIRV/IR/TargetAndABI.h"
+#include "mlir/IR/BuiltinOps.h"
 
 namespace mlir {
 namespace iree_compiler {
diff --git a/iree/compiler/Codegen/SPIRV/MaliConfig.cpp b/iree/compiler/Codegen/SPIRV/MaliConfig.cpp
index 1c19955f33ad..9577d43d1838 100644
--- a/iree/compiler/Codegen/SPIRV/MaliConfig.cpp
+++ b/iree/compiler/Codegen/SPIRV/MaliConfig.cpp
@@ -13,7 +13,9 @@
 #include <array>
 
 #include "iree/compiler/Codegen/SPIRV/KernelConfig.h"
+#include "llvm/ADT/TypeSwitch.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/IR/BuiltinOps.h"
 
 namespace mlir {
 namespace iree_compiler {
diff --git a/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp b/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp
index 3d07b75e57a9..dc1e4470dce7 100644
--- a/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp
+++ b/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp
@@ -10,10 +10,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/SPIRV/KernelConfig.h"
 #include "iree/compiler/Codegen/Utils/Utils.h"
 #include "llvm/Support/Debug.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/IR/BuiltinOps.h"
 
 #define DEBUG_TYPE "iree-spirv-nvidia-config"
 
@@ -80,8 +82,8 @@ static LogicalResult setOpConfig(const spirv::TargetEnv &targetEnv,
       getElementType(init), lhsShape[0], rhsShape[1], lhsShape[1]);
   if (!coopMatSize) return success();
 
-  auto pipeline =
-      IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorizeToCooperativeOps;
+  auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::
+      SPIRVVectorizeToCooperativeOps;
 
   // For now only support one subgroup per workgroup because in the above
   // configuration deduction step we only consider whether the input workload is
diff --git a/iree/compiler/Codegen/SPIRV/Passes.cpp b/iree/compiler/Codegen/SPIRV/Passes.cpp
index 02964f7e3d76..bf98d6796eaa 100644
--- a/iree/compiler/Codegen/SPIRV/Passes.cpp
+++ b/iree/compiler/Codegen/SPIRV/Passes.cpp
@@ -150,10 +150,6 @@ void addSPIRVTileAndDistributePassPipeline(OpPassManager &pm) {
   addLoopMaterializationPasses(pm);
 }
 
-void addSPIRVDistributeToGlobalIDPassPipeline(OpPassManager &pm) {
-  pm.addNestedPass<FuncOp>(createSPIRVDistributeToGlobalIDPass());
-}
-
 void buildSPIRVCodegenPassPipeline(OpPassManager &pm) {
   addLinalgBufferizePasses(pm.nest<ModuleOp>(), gpuAllocationFunction);
   pm.addPass(createSPIRVLowerExecutableTargetPass());
diff --git a/iree/compiler/Codegen/SPIRV/SPIRVDistributeToGlobalID.cpp b/iree/compiler/Codegen/SPIRV/SPIRVDistributeToGlobalID.cpp
deleted file mode 100644
index b8df3b46d159..000000000000
--- a/iree/compiler/Codegen/SPIRV/SPIRVDistributeToGlobalID.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-// Copyright 2020 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-//===- SPIRVDistributeToGlobalIDPass.cpp ----------------------------------===//
-//
-// This pass distributes Linalg ops with buffer semantics to global invocations.
-//
-//===----------------------------------------------------------------------===//
-
-#include <array>
-#include <numeric>
-
-#include "iree/compiler/Codegen/PassDetail.h"
-#include "iree/compiler/Codegen/Passes.h"
-#include "iree/compiler/Codegen/SPIRV/Utils.h"
-#include "iree/compiler/Codegen/Transforms/Transforms.h"
-#include "iree/compiler/Codegen/Utils/MarkerUtils.h"
-#include "iree/compiler/Codegen/Utils/Utils.h"
-#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
-#include "iree/compiler/Dialect/Shape/IR/ShapeDialect.h"
-#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
-#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/Dialect/SPIRV/IR/TargetAndABI.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/FunctionSupport.h"
-#include "mlir/IR/PatternMatch.h"
-#include "mlir/Support/LLVM.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/LoopUtils.h"
-
-namespace mlir {
-namespace iree_compiler {
-
-//===----------------------------------------------------------------------===//
-// Loop utilities
-//===----------------------------------------------------------------------===//
-
-/// Serializes the dimensions of the scf.parallel specified in
-/// `serializedDimensions`, by creating an nested scf.for operation for each
-/// dimension.
-// TODO(ravishankarm): Move this into LoopUtils.h in MLIR.
-static Operation *serializeDimensions(ConversionPatternRewriter &rewriter,
-                                      scf::ParallelOp pLoopOp,
-                                      ArrayRef<unsigned> serializedDimensions) {
-  assert(!serializedDimensions.empty() &&
-         "unhandled corner case of no serializing dims");
-  OpBuilder::InsertionGuard guard(rewriter);
-  DenseSet<unsigned> serializedDimSet;
-  serializedDimSet.insert(serializedDimensions.begin(),
-                          serializedDimensions.end());
-  assert(serializedDimSet.size() == serializedDimensions.size() &&
-         "cannot repeat dimensions during serialization of scf.parallel");
-  SmallVector<LoopBounds, 2> newPLoopBounds, forBounds;
-  SmallVector<unsigned, 2> permutation;
-  auto lbs = pLoopOp.lowerBound();
-  auto ubs = pLoopOp.upperBound();
-  auto steps = pLoopOp.step();
-  for (unsigned i : llvm::seq<unsigned>(0, pLoopOp.getNumLoops())) {
-    if (serializedDimSet.count(i)) {
-      forBounds.push_back({lbs[i], ubs[i], steps[i]});
-    } else {
-      newPLoopBounds.push_back({lbs[i], ubs[i], steps[i]});
-      permutation.push_back(i);
-    }
-  }
-  permutation.append(serializedDimensions.begin(), serializedDimensions.end());
-  return replacePLoopOp(rewriter, pLoopOp, newPLoopBounds, forBounds,
-                        permutation);
-}
-
-/// Serialize all inner dimensions of a `pLoopOp` starting from `serializeFrom`.
-static Operation *serializeDimensionsFrom(ConversionPatternRewriter &rewriter,
-                                          scf::ParallelOp pLoopOp,
-                                          unsigned serializeFrom) {
-  unsigned numLoops = pLoopOp.getNumLoops();
-  assert(serializeFrom < numLoops &&
-         "unhandled corner case of no serialization");
-  SmallVector<unsigned, 2> serializedDimensions;
-  for (unsigned dim : llvm::seq(serializeFrom, numLoops))
-    serializedDimensions.push_back(dim);
-  return serializeDimensions(rewriter, pLoopOp, serializedDimensions);
-}
-
-//===----------------------------------------------------------------------===//
-// GPU processor ID mapping utilities
-//===----------------------------------------------------------------------===//
-
-/// Distributes scf.parallel to processors where `IdOp` is used to get the
-/// processor ID and `DimOp` is used to get the number of processors along a
-/// dimension. Assumes that the number of processors will be less than equal to
-/// the number of iterations of the pLoopOp along all dimensions.
-template <typename GPUIdOp, typename GPUCountOp>
-static LogicalResult distributeSingleIterationPerProcessor(
-    ConversionPatternRewriter &rewriter, scf::ParallelOp pLoopOp,
-    bool generateGuard = true) {
-  unsigned numLoops = pLoopOp.getNumLoops();
-  if (numLoops > 3) {
-    pLoopOp =
-        cast<scf::ParallelOp>(serializeDimensionsFrom(rewriter, pLoopOp, 3));
-    numLoops = 3;
-  }
-  auto procInfo = getGPUProcessorIdsAndCounts<GPUIdOp, GPUCountOp>(
-      rewriter, pLoopOp.getLoc(), numLoops);
-  return distributeSingleIterationPerProcessor(rewriter, pLoopOp, procInfo,
-                                               generateGuard);
-}
-
-//===----------------------------------------------------------------------===//
-// Pass and patterns.
-//===----------------------------------------------------------------------===//
-
-namespace {
-/// Pass to convert from tiled and fused linalg ops into gpu.func.
-struct SPIRVDistributeToGlobalIDPass
-    : public SPIRVDistributeToGlobalIDBase<SPIRVDistributeToGlobalIDPass> {
-  void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<AffineDialect, gpu::GPUDialect, memref::MemRefDialect,
-                    scf::SCFDialect, ShapeDialect>();
-  }
-  void runOnOperation() override;
-};
-
-/// Given the workload return the workgroup count along X obtained by
-/// linearizing the workload and dividing by the workgroup size.
-static Value getWorkgroupCountX(OpBuilder &builder, Location loc,
-                                ArrayRef<Value> values,
-                                int64_t workgroupSizeX) {
-  AffineExpr expr = builder.getAffineConstantExpr(1);
-  for (auto val : enumerate(values)) {
-    expr = expr * builder.getAffineSymbolExpr(val.index());
-  }
-  expr = expr.ceilDiv(workgroupSizeX);
-  return linalg::applyMapToValues(
-      builder, loc, AffineMap::get(0, values.size(), expr), values)[0];
-}
-
-/// Map linalg operation to execute on GPU in parallel by mapping the parallel
-/// loops to "GlobalInvocationId".
-template <typename LinalgOpTy>
-struct MapLinalgOpToGlobalInvocationId
-    : public OpConversionPattern<LinalgOpTy> {
-  MapLinalgOpToGlobalInvocationId(MLIRContext *context,
-                                  PatternBenefit benefit = 1)
-      : OpConversionPattern<LinalgOpTy>(context, benefit) {}
-
-  LogicalResult matchAndRewrite(
-      LinalgOpTy linalgOp, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
-    // If marker exists do nothing.
-    if (hasMarker(linalgOp)) return failure();
-    FuncOp funcOp = linalgOp->template getParentOfType<FuncOp>();
-    if (!funcOp) return failure();
-    Optional<linalg::LinalgLoops> loops =
-        linalg::linalgOpToParallelLoops(rewriter, linalgOp);
-    if (!loops) return failure();
-
-    if (!loops.getValue().empty()) {
-      scf::ParallelOp pLoopOp = dyn_cast<scf::ParallelOp>(loops.getValue()[0]);
-      // If there are parallel loops partition them to threads using global
-      // invocation ID.
-      if (pLoopOp) {
-        pLoopOp = collapseParallelLoops(rewriter, pLoopOp);
-        if (!pLoopOp) return failure();
-        if (failed(distributeSingleIterationPerProcessor<GPUGlobalId,
-                                                         GPUGlobalCount>(
-                rewriter, pLoopOp))) {
-          return rewriter.notifyMatchFailure(
-              linalgOp, "mapping to GlobalInvocationID failed");
-        }
-      }
-    }
-    rewriter.eraseOp(linalgOp);
-    return success();
-  }
-};
-
-}  // namespace
-
-void SPIRVDistributeToGlobalIDPass::runOnOperation() {
-  FuncOp funcOp = getOperation();
-  if (!isEntryPoint(funcOp)) return;
-
-  MLIRContext *context = &getContext();
-  ConversionTarget target(*context);
-  // After this pass Linalg and scf.parallel ops should be gone.
-  target.addIllegalOp<scf::ParallelOp>();
-  target.addIllegalDialect<linalg::LinalgDialect>();
-  // Reshape ops are treated legal since they just change the way the underlying
-  // buffer is viewed. These are legalized downstream. They become no ops when
-  // lowering to SPIR-V since the SPIR-V code uses linearized arrays.
-  target.addLegalOp<memref::CollapseShapeOp, memref::ExpandShapeOp>();
-  // Let the rest fall through.
-  target.markUnknownOpDynamicallyLegal([](Operation *) { return true; });
-
-  OwningRewritePatternList patterns(&getContext());
-
-  patterns.insert<MapLinalgOpToGlobalInvocationId<linalg::CopyOp>,
-                  MapLinalgOpToGlobalInvocationId<linalg::FillOp>,
-                  MapLinalgOpToGlobalInvocationId<linalg::GenericOp>>(context);
-  FrozenRewritePatternSet frozenPatterns(std::move(patterns));
-
-  Region &body = funcOp.getBody();
-  if (!llvm::hasSingleElement(body)) {
-    funcOp.emitError("unhandled dispatch function with multiple blocks");
-    return signalPassFailure();
-  }
-  if (failed(applyFullConversion(funcOp, target, frozenPatterns)))
-    return signalPassFailure();
-}
-
-std::unique_ptr<OperationPass<FuncOp>> createSPIRVDistributeToGlobalIDPass() {
-  return std::make_unique<SPIRVDistributeToGlobalIDPass>();
-}
-
-}  // namespace iree_compiler
-}  // namespace mlir
diff --git a/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp b/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp
index fbc151eb6054..49de3abea2c9 100644
--- a/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp
+++ b/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp
@@ -4,10 +4,11 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
 #include "iree/compiler/Codegen/SPIRV/KernelConfig.h"
-#include "iree/compiler/Codegen/Utils/Utils.h"
 #include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
 #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtDialect.h"
@@ -36,7 +37,8 @@ class SPIRVLowerExecutableTargetPass
   SPIRVLowerExecutableTargetPass(const SPIRVLowerExecutableTargetPass &pass) {}
 
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<AffineDialect, gpu::GPUDialect, IREE::HAL::HALDialect,
+    registry.insert<IREE::Codegen::IREECodegenDialect, AffineDialect,
+                    gpu::GPUDialect, IREE::HAL::HALDialect,
                     linalg::LinalgDialect, linalg_ext::LinalgExtDialect,
                     memref::MemRefDialect, scf::SCFDialect, ShapeDialect,
                     spirv::SPIRVDialect, vector::VectorDialect>();
@@ -71,16 +73,15 @@ void SPIRVLowerExecutableTargetPass::runOnOperation() {
   // is fine.
   llvm::StringMap<IREE::HAL::ExecutableEntryPointOp> entryPoints =
       getAllEntryPoints(moduleOp);
-  Optional<IREE::HAL::DispatchLoweringPassPipeline> passPipeline;
+  Optional<IREE::Codegen::DispatchLoweringPassPipeline> passPipeline;
   for (auto &it : entryPoints) {
     auto entryPointOp = it.second;
-    if (IREE::HAL::TranslationInfo translationInfo =
+    if (IREE::Codegen::TranslationInfoAttr translationInfo =
             getTranslationInfo(entryPointOp)) {
-      Optional<IREE::HAL::DispatchLoweringPassPipeline> currPipeline =
-          getLoweringPassPipeline(translationInfo);
-      if (!currPipeline) continue;
+      IREE::Codegen::DispatchLoweringPassPipeline currPipeline =
+          translationInfo.getDispatchLoweringPassPipeline();
       if (passPipeline) {
-        if (currPipeline.getValue() != passPipeline.getValue()) {
+        if (currPipeline != passPipeline.getValue()) {
           moduleOp.emitError(
               "unhandled compilation of entry point function with different "
               "pass pipelines within a module");
@@ -97,16 +98,13 @@ void SPIRVLowerExecutableTargetPass::runOnOperation() {
   if (!testLoweringConfiguration && passPipeline.hasValue()) {
     OpPassManager &nestedModulePM = executableLoweringPipeline.nest<ModuleOp>();
     switch (*passPipeline) {
-      case IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistribute:
+      case IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistribute:
         addSPIRVTileAndDistributePassPipeline(nestedModulePM);
         break;
-      case IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistributeToGlobalID:
-        addSPIRVDistributeToGlobalIDPassPipeline(nestedModulePM);
-        break;
-      case IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize:
+      case IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize:
         addSPIRVTileAndVectorizePassPipeline(nestedModulePM);
         break;
-      case IREE::HAL::DispatchLoweringPassPipeline::
+      case IREE::Codegen::DispatchLoweringPassPipeline::
           SPIRVVectorizeToCooperativeOps:
         addSPIRVTileAndVectorizeToCooperativeOpsPassPipeline(nestedModulePM);
         break;
diff --git a/iree/compiler/Codegen/SPIRV/SPIRVRemoveOneTripTiledLoops.cpp b/iree/compiler/Codegen/SPIRV/SPIRVRemoveOneTripTiledLoops.cpp
index 079f8953052d..21b7e62da190 100644
--- a/iree/compiler/Codegen/SPIRV/SPIRVRemoveOneTripTiledLoops.cpp
+++ b/iree/compiler/Codegen/SPIRV/SPIRVRemoveOneTripTiledLoops.cpp
@@ -4,6 +4,7 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
 #include "iree/compiler/Codegen/Transforms/Transforms.h"
@@ -112,11 +113,7 @@ class SPIRVRemoveOneTripTiledLoopPass
     auto translationInfo = getTranslationInfo(entryPointOp);
     if (!translationInfo) return;
 
-    ArrayAttr workloadPerWorkgroupAttr = translationInfo.workloadPerWorkgroup();
-    if (!workloadPerWorkgroupAttr) return;
-    auto workloadPerWorkgroup = llvm::to_vector<4>(llvm::map_range(
-        workloadPerWorkgroupAttr,
-        [](Attribute attr) { return attr.cast<IntegerAttr>().getInt(); }));
+    auto workloadPerWorkgroup = translationInfo.getWorkloadPerWorkgroupVals();
 
     MLIRContext *context = &getContext();
     removeOneTripTiledLoops(context, funcOp, cast<linalg::LinalgOp>(rootOp[0]),
diff --git a/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp b/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp
index 07b3a62864c4..15dc4fe54ced 100644
--- a/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp
+++ b/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
 #include "iree/compiler/Codegen/SPIRV/Utils.h"
 #include "iree/compiler/Codegen/Transforms/Transforms.h"
 #include "iree/compiler/Codegen/Utils/MarkerUtils.h"
-#include "iree/compiler/Codegen/Utils/Utils.h"
 #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h"
 #include "iree/compiler/Dialect/LinalgExt/Transforms/Transforms.h"
 #include "llvm/ADT/STLExtras.h"
@@ -79,11 +79,7 @@ static void populateTilingToInvocationPatterns(MLIRContext *context,
                                                RewritePatternSet &patterns) {
   linalg::TileSizeComputationFunction getInnerTileSizeFn =
       [&](OpBuilder &builder, Operation *op) {
-        SmallVector<int64_t> tileSizes = getTileSizes(op, 1);
-        return llvm::to_vector<4>(
-            llvm::map_range(tileSizes, [&](int64_t v) -> Value {
-              return builder.create<arith::ConstantIndexOp>(op->getLoc(), v);
-            }));
+        return getTileSizes(builder, op, 1);
       };
 
   auto getThreadProcInfoFn = [](OpBuilder &builder, Location loc,
@@ -161,11 +157,7 @@ static void populateTilingReductionPatterns(
     MLIRContext *context, RewritePatternSet &patterns,
     linalg::LinalgTransformationFilter marker) {
   auto getTileSizeFn = [&](OpBuilder &builder, Operation *op) {
-    SmallVector<int64_t> tileSizes = getTileSizes(op, 2);
-    return llvm::to_vector<4>(
-        llvm::map_range(tileSizes, [&](int64_t v) -> Value {
-          return builder.create<arith::ConstantIndexOp>(op->getLoc(), v);
-        }));
+    return getTileSizes(builder, op, 2);
   };
 
   auto tilingOptions = linalg::LinalgTilingOptions()
diff --git a/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp b/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp
index dc611dec4cdc..3a3c10b5696c 100644
--- a/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp
+++ b/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp
@@ -13,6 +13,7 @@
 
 #include <algorithm>
 
+#include "iree/compiler/Codegen/Dialect/LoweringConfig.h"
 #include "iree/compiler/Codegen/PassDetail.h"
 #include "iree/compiler/Codegen/Passes.h"
 #include "iree/compiler/Codegen/SPIRV/KernelConfig.h"
@@ -20,7 +21,6 @@
 #include "iree/compiler/Codegen/Transforms/Transforms.h"
 #include "iree/compiler/Codegen/Utils/MarkerUtils.h"
 #include "iree/compiler/Codegen/Utils/Utils.h"
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
 #include "llvm/Support/Debug.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
diff --git a/iree/compiler/Codegen/SPIRV/test/BUILD b/iree/compiler/Codegen/SPIRV/test/BUILD
index f3c95cb5c061..c57ce23e6644 100644
--- a/iree/compiler/Codegen/SPIRV/test/BUILD
+++ b/iree/compiler/Codegen/SPIRV/test/BUILD
@@ -28,7 +28,6 @@ iree_lit_test_suite(
             "config_mali_matmul.mlir",
             "config_nvidia_matmul_cooperative_ops.mlir",
             "convert_to_spirv.mlir",
-            "distribute_to_global_id.mlir",
             "fold_gpu_procid_uses.mlir",
             "pipeline_matmul_cooperative_ops.mlir",
             "pipeline_matmul_vectorization.mlir",
diff --git a/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt b/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt
index c3aefd1eb29b..6fbb76274e56 100644
--- a/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt
+++ b/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt
@@ -23,7 +23,6 @@ iree_lit_test_suite(
     "config_mali_matmul.mlir"
     "config_nvidia_matmul_cooperative_ops.mlir"
     "convert_to_spirv.mlir"
-    "distribute_to_global_id.mlir"
     "fold_gpu_procid_uses.mlir"
     "pipeline_matmul_cooperative_ops.mlir"
     "pipeline_matmul_vectorization.mlir"
diff --git a/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir b/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir
index aadabbccff45..c1781560ce43 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
 
 // Conv - large OC - distribute to only one workgroup dimension.
 
@@ -74,18 +74,20 @@ hal.executable @conv_112x112x512 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @conv_112x112x512
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [256, 8, 1]}
-//           CHECK-SAME:   workgroup_size = [64 : index, 1 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[C2:.+]] = arith.constant 2 : index
-//           CHECK-NEXT:   %[[C14:.+]] = arith.constant 14 : index
-//           CHECK-NEXT:   %[[C112:.+]] = arith.constant 112 : index
-//           CHECK-NEXT:   hal.return %[[C2]], %[[C14]], %[[C112]]
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[0, 1, 8, 256], [0, 1, 8, 4], [0, 0, 0, 0, 1, 1, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [256, 8, 1]>
+//      CHECK: hal.executable.entry_point public @conv_112x112x512
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [64 : index, 1 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[C2:.+]] = arith.constant 2 : index
+// CHECK-NEXT:   %[[C14:.+]] = arith.constant 14 : index
+// CHECK-NEXT:   %[[C112:.+]] = arith.constant 112 : index
+// CHECK-NEXT:   hal.return %[[C2]], %[[C14]], %[[C112]]
 
-//                CHECK: func @conv_112x112x512()
-//                CHECK:   linalg.conv_2d_nhwc_hwcf
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[0, 1, 8, 256], [0, 1, 8, 4], [0, 0, 0, 0, 1, 1, 4]]}
+//      CHECK: func @conv_112x112x512()
+//      CHECK:   linalg.conv_2d_nhwc_hwcf
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -163,18 +165,20 @@ hal.executable @conv_112x112x32 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @conv_112x112x32
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 16, 4]}
-//           CHECK-SAME:   workgroup_size = [8 : index, 8 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[C7:.+]] = arith.constant 7 : index
-//           CHECK-NEXT:   %[[C28:.+]] = arith.constant 28 : index
-//           CHECK-NEXT:   hal.return %[[C1]], %[[C7]], %[[C28]]
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[0, 4, 16, 32], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 16, 4]>
+//      CHECK: hal.executable.entry_point public @conv_112x112x32
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [8 : index, 8 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[C7:.+]] = arith.constant 7 : index
+// CHECK-NEXT:   %[[C28:.+]] = arith.constant 28 : index
+// CHECK-NEXT:   hal.return %[[C1]], %[[C7]], %[[C28]]
 
-//                CHECK: func @conv_112x112x32()
-//                CHECK:   linalg.conv_2d_nhwc_hwcf
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[0, 4, 16, 32], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4]]}
+//      CHECK: func @conv_112x112x32()
+//      CHECK:   linalg.conv_2d_nhwc_hwcf
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -251,17 +255,19 @@ hal.executable @conv_16x16x16 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @conv_16x16x16
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 8, 8]}
-//           CHECK-SAME:   workgroup_size = [4 : index, 4 : index, 4 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[C2:.+]] = arith.constant 2 : index
-//           CHECK-NEXT:   hal.return %[[C1]], %[[C2]], %[[C2]]
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[0, 8, 8, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 8, 8]>
+//      CHECK: hal.executable.entry_point public @conv_16x16x16
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [4 : index, 4 : index, 4 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[C2:.+]] = arith.constant 2 : index
+// CHECK-NEXT:   hal.return %[[C1]], %[[C2]], %[[C2]]
 
-//                CHECK: func @conv_16x16x16()
-//                CHECK:   linalg.conv_2d_nhwc_hwcf
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[0, 8, 8, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4]]}
+//      CHECK: func @conv_16x16x16()
+//      CHECK:   linalg.conv_2d_nhwc_hwcf
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -340,17 +346,19 @@ hal.executable @dwconv_28x28x144 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @dwconv_28x28x144
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]}
-//           CHECK-SAME:   workgroup_size = [4 : index, 4 : index, 4 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[C9:.+]] = arith.constant 9 : index
-//           CHECK-NEXT:   %[[C7:.+]] = arith.constant 7 : index
-//           CHECK-NEXT:   hal.return %[[C9]], %[[C7]], %[[C7]]
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[0, 4, 4, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]>
+//      CHECK: hal.executable.entry_point public @dwconv_28x28x144
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [4 : index, 4 : index, 4 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[C9:.+]] = arith.constant 9 : index
+// CHECK-NEXT:   %[[C7:.+]] = arith.constant 7 : index
+// CHECK-NEXT:   hal.return %[[C9]], %[[C7]], %[[C7]]
 
-//                CHECK: func @dwconv_28x28x144()
-//                CHECK:   linalg.depthwise_conv2D_nhw
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[0, 4, 4, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]]}
+//      CHECK: func @dwconv_28x28x144()
+//      CHECK:   linalg.depthwise_conv2D_nhw
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -428,14 +436,15 @@ hal.executable @dwconv_4x4x8 {
     }
   }
 }
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[0, 4, 4, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 4, 4]>
+//      CHECK: hal.executable.entry_point public @dwconv_4x4x8
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [2 : index, 4 : index, 4 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   hal.return %[[C1]], %[[C1]], %[[C1]]
 
-//          CHECK-LABEL: hal.executable.entry_point public @dwconv_4x4x8
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 4, 4]}
-//           CHECK-SAME:   workgroup_size = [2 : index, 4 : index, 4 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   hal.return %[[C1]], %[[C1]], %[[C1]]
-
-//                CHECK: func @dwconv_4x4x8()
-//                CHECK:   linalg.depthwise_conv2D_nhw
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[0, 4, 4, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]]}
+//      CHECK: func @dwconv_4x4x8()
+//      CHECK:   linalg.depthwise_conv2D_nhw
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
diff --git a/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir
index 8412a2808af6..251f21fcf408 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
 
 // Large matmul that can match the best tiling scheme.
 
@@ -62,18 +62,22 @@ hal.executable @matmul_1024x2048x512 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @matmul_1024x2048x512
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [128, 32]}
-//           CHECK-SAME:   workgroup_size = [32 : index, 2 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 128)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
-
-//                CHECK: func @matmul_1024x2048x512()
-//                CHECK:   linalg.matmul
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[32, 128], [16, 4], [0, 0, 4]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[32, 128], [16, 4], [0, 0, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 128)
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [128, 32]>
+//      CHECK: hal.executable.entry_point public @matmul_1024x2048x512
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [32 : index, 2 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
+
+//      CHECK: func @matmul_1024x2048x512()
+//      CHECK:   linalg.matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -139,18 +143,22 @@ hal.executable @matmul_3136x24x96 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @matmul_3136x24x96
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 448]}
-//           CHECK-SAME:   workgroup_size = [2 : index, 32 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 448)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
-
-//                CHECK: func @matmul_3136x24x96()
-//                CHECK:   linalg.matmul
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[448, 8], [14, 4], [0, 0, 4]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[448, 8], [14, 4], [0, 0, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 448)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 448]>
+//      CHECK: hal.executable.entry_point public @matmul_3136x24x96
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [2 : index, 32 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
+
+//      CHECK: func @matmul_3136x24x96()
+//      CHECK:   linalg.matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -216,18 +224,22 @@ hal.executable @matmul_196x64x192 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @matmul_196x64x192
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 28]}
-//           CHECK-SAME:   workgroup_size = [16 : index, 4 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 64)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 28)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
-
-//                CHECK: func @matmul_196x64x192()
-//                CHECK:   linalg.matmul
-//  CHECK-SAME{LITERAL}:      lowering.config = {tileSizes = [[28, 64], [7, 4], [0, 0, 8]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[28, 64], [7, 4], [0, 0, 8]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 28)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 28]>
+//      CHECK: hal.executable.entry_point public @matmul_196x64x192
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [16 : index, 4 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
+
+//      CHECK: func @matmul_196x64x192()
+//      CHECK:   linalg.matmul
+// CHECK-SAME:      lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -288,18 +300,22 @@ hal.executable @matmul_12544x96x16 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @matmul_12544x96x16
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 128]}
-//           CHECK-SAME:   workgroup_size = [8 : index, 8 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 128)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
-
-//                CHECK: func @matmul_12544x96x16()
-//                CHECK:   linalg.matmul
-//  CHECK-SAME{LITERAL}:     lowering.config =  {tileSizes = [[128, 32], [16, 4], [0, 0, 4]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[128, 32], [16, 4], [0, 0, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 128)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 128]>
+//      CHECK: hal.executable.entry_point public @matmul_12544x96x16
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [8 : index, 8 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
+
+//      CHECK: func @matmul_12544x96x16()
+//      CHECK:   linalg.matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -365,18 +381,22 @@ hal.executable @matmul_49x160x576 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @matmul_49x160x576
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 7]}
-//           CHECK-SAME:   workgroup_size = [8 : index, 1 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 7)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
-
-//                CHECK: func @matmul_49x160x576()
-//                CHECK:   linalg.matmul
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[7, 32], [7, 4], [0, 0, 8]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[7, 32], [7, 4], [0, 0, 8]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 7)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 7]>
+//      CHECK: hal.executable.entry_point public @matmul_49x160x576
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [8 : index, 1 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
+
+//      CHECK: func @matmul_49x160x576()
+//      CHECK:   linalg.matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -453,17 +473,21 @@ hal.executable @batch_matmul_4x384x384 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @batch_matmul_4x384x384
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [128, 32, 1]}
-//           CHECK-SAME:   workgroup_size = [32 : index, 2 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index):
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 128)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]]
-
-//                CHECK: func @batch_matmul_4x384x384()
-//                CHECK:   linalg.batch_matmul
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[1, 32, 128], [1, 16, 4], [0, 0, 0, 4]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[1, 32, 128], [1, 16, 4], [0, 0, 0, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 128)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [128, 32, 1]>
+//      CHECK: hal.executable.entry_point public @batch_matmul_4x384x384
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [32 : index, 2 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index):
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]]
+
+//      CHECK: func @batch_matmul_4x384x384()
+//      CHECK:   linalg.batch_matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -540,14 +564,17 @@ hal.executable @batch_matmul_4x8x8 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @batch_matmul_4x8x8
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 8, 1]}
-//           CHECK-SAME:   workgroup_size = [2 : index, 8 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index):
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]]
-
-//                CHECK: func @batch_matmul_4x8x8()
-//                CHECK:   linalg.batch_matmul
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[1, 8, 8], [1, 1, 4], [0, 0, 0, 16]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[1, 8, 8], [1, 1, 4], [0, 0, 0, 16]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 8, 1]>
+//      CHECK: hal.executable.entry_point public @batch_matmul_4x8x8
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [2 : index, 8 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index):
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]]
+
+//      CHECK: func @batch_matmul_4x8x8()
+//      CHECK:   linalg.batch_matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
\ No newline at end of file
diff --git a/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir
index 0b584bfc1da5..10dc64f3c81d 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
 
 // Odd K that forbids vectorization.
 
@@ -74,16 +74,19 @@ hal.executable @batch_matmul_1x3x32 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @batch_matmul_1x3x32
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [4, 1, 1]}
-//           CHECK-SAME:   workgroup_size = [4 : index, 1 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index):
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%[[X]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y]], %[[Z]]
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[1, 1, 4], [1, 1, 1]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [4, 1, 1]>
+//      CHECK: hal.executable.entry_point public @batch_matmul_1x3x32
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [4 : index, 1 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index):
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y]], %[[Z]]
 
-//                CHECK: func @batch_matmul_1x3x32()
-//                CHECK:   linalg.batch_matmul
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[1, 1, 4], [1, 1, 1]]}
+//      CHECK: func @batch_matmul_1x3x32()
+//      CHECK:   linalg.batch_matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -149,14 +152,17 @@ hal.executable private @matmul_64x16 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @matmul_64x16
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [4, 1]}
-//           CHECK-SAME:   workgroup_size = [4 : index, 1 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%[[X]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y]], %[[ONE]]
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[1, 4], [1, 1]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [4, 1]>
+//      CHECK: hal.executable.entry_point public @matmul_64x16
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [4 : index, 1 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y]], %[[ONE]]
 
-//                CHECK: func @matmul_64x16()
-//                CHECK:   linalg.matmul
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[1, 4], [1, 1]]}
+//      CHECK: func @matmul_64x16()
+//      CHECK:   linalg.matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
diff --git a/iree/compiler/Codegen/SPIRV/test/config_linalg_ext_ops.mlir b/iree/compiler/Codegen/SPIRV/test/config_linalg_ext_ops.mlir
index ae68820d73f0..57ea3d876000 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_linalg_ext_ops.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_linalg_ext_ops.mlir
@@ -1,5 +1,4 @@
-// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
-
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
 hal.executable private @static_1d_sort  {
   hal.interface @io {
     hal.interface.binding @s0b0_rw_external, set=0, binding=0, type="StorageBuffer", access="Read|Write"
@@ -34,8 +33,10 @@ hal.executable private @static_1d_sort  {
 
 // Check that the workgroup count and size are (1, 1, 1) for serializing the computation.
 
-// CHECK-LABEL: hal.executable.entry_point public @static_1d_sort
-//  CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize"}
+//   CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = [], native_vector_size = []>
+//   CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = []>
+//       CHECK: hal.executable.entry_point public @static_1d_sort
+//  CHECK-SAME:   translation.info = #[[TRANSLATION]]
 //  CHECK-SAME:   workgroup_size = [1 : index, 1 : index, 1 : index]
 //  CHECK-NEXT: ^{{.+}}(%{{.+}}: index, %{{.+}}: index, %{{.+}}: index):
 //  CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
@@ -43,7 +44,7 @@ hal.executable private @static_1d_sort  {
 
 //       CHECK: func @static_1d_sort()
 //       CHECK:   linalg_ext.sort
-//  CHECK-SAME:     lowering.config = {}
+//  CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -98,17 +99,20 @@ hal.executable private @static_3d_sort  {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @static_3d_sort
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [16, 1]}
-//           CHECK-SAME:   workgroup_size = [16 : index, 1 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[DIV:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%[[X]]]
-//           CHECK-NEXT:   hal.return %[[DIV]], %[[Y]], %[[ONE]]
-
-//                CHECK: func @static_3d_sort()
-//                CHECK:   linalg_ext.sort
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[1, 0, 16], [1, 0, 1]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[1, 0, 16], [1, 0, 1]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [16, 1]>
+//      CHECK: hal.executable.entry_point public @static_3d_sort
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [16 : index, 1 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[DIV:.+]] = affine.apply #[[MAP]]()[%[[X]]]
+// CHECK-NEXT:   hal.return %[[DIV]], %[[Y]], %[[ONE]]
+
+//      CHECK: func @static_3d_sort()
+//      CHECK:   linalg_ext.sort
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -144,18 +148,20 @@ hal.executable private @static_1d_fft_stage2  {
   }
 }
 
-// CHECK-LABEL: hal.executable.entry_point public @static_1d_fft_stage2
-//  CHECK-SAME:   translation.info = {passPipeline = "SPIRVDistribute"
-//  CHECK-SAME:   workloadPerWorkgroup = [4]}
+//   CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[4]{{\]}}, native_vector_size = []>
+//   CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)>
+//   CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [4]>
+//       CHECK: hal.executable.entry_point public @static_1d_fft_stage2
+//  CHECK-SAME:   translation.info = #[[TRANSLATION]]
 //  CHECK-SAME:   workgroup_size = [16 : index, 1 : index, 1 : index]
 //  CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %{{.+}}: index, %{{.+}}: index):
 //  CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
-//  CHECK-NEXT:   %[[T:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%[[ARG0]]]
+//  CHECK-NEXT:   %[[T:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
 //  CHECK-NEXT:   hal.return %[[T]], %[[ONE]], %[[ONE]]
 
 //       CHECK: func @static_1d_fft_stage2()
 //       CHECK:   linalg_ext.fft
-//  CHECK-SAME:     lowering.config = {tileSizes = {{\[}}[4]]}
+//  CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -214,14 +220,16 @@ hal.executable private @static_3d_fft_stage3  {
 }
 
 
-// CHECK-LABEL: hal.executable.entry_point public @static_3d_fft_stage3
-//  CHECK-SAME:   translation.info = {passPipeline = "SPIRVDistribute"
-//  CHECK-SAME:   workloadPerWorkgroup = [8, 1, 1]}
+//   CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[1, 1, 8]{{\]}}, native_vector_size = []>
+//   CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
+//   CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [8, 1, 1]>
+//       CHECK: hal.executable.entry_point public @static_3d_fft_stage3
+//  CHECK-SAME:   translation.info = #[[TRANSLATION]]
 //  CHECK-SAME:   workgroup_size = [16 : index, 1 : index, 1 : index]
 //  CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index):
-//  CHECK-NEXT:   %[[T:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[ARG0]]]
+//  CHECK-NEXT:   %[[T:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]]
 //  CHECK-NEXT:   hal.return %[[T]], %[[ARG1]], %[[ARG2]]
 
 //       CHECK: func @static_3d_fft_stage3()
 //       CHECK:   linalg_ext.fft
-//  CHECK-SAME:     lowering.config = {tileSizes = {{\[}}[1, 1, 8]]}
+//  CHECK-SAME:     lowering.config = #[[CONFIG]]
diff --git a/iree/compiler/Codegen/SPIRV/test/config_linalg_ops.mlir b/iree/compiler/Codegen/SPIRV/test/config_linalg_ops.mlir
index 0cb6a62c3827..4ed74419e78d 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_linalg_ops.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_linalg_ops.mlir
@@ -47,9 +47,10 @@ hal.executable @tensor_insert {
     }
   }
 }
-//      CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [64, 1]>
 //      CHECK: hal.executable.entry_point public @tensor_insert_slice
-// CHECK-SAME:   translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [64, 1]}
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   %[[ARG0:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
 //  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
@@ -100,10 +101,11 @@ hal.executable @tensor_insert {
     }
   }
 }
-//  CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[1, 16], [1, 1]{{\]}}}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[1, 16], [1, 1]{{\]}}, native_vector_size = []>
 //  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [16, 1]>
 //      CHECK: hal.executable.entry_point public @tensor_insert_slice
-// CHECK-SAME:   translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [16, 1]}
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
 // CHECK-NEXT:   %[[ARG0:[a-zA-Z0-9_]+]]: index
 // CHECK-SAME:   %[[ARG1:[a-zA-Z0-9_]+]]: index
 //  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
diff --git a/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir b/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir
index 02000f274de1..644ab49b27f7 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
 
 // Conv - large OC - distribute to only one workgroup dimension.
 
@@ -74,18 +74,20 @@ hal.executable @conv_112x112x512 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @conv_112x112x512
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 4, 1]}
-//           CHECK-SAME:   workgroup_size = [16 : index, 1 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[C8:.+]] = arith.constant 8 : index
-//           CHECK-NEXT:   %[[C28:.+]] = arith.constant 28 : index
-//           CHECK-NEXT:   %[[C112:.+]] = arith.constant 112 : index
-//           CHECK-NEXT:   hal.return %[[C8]], %[[C28]], %[[C112]]
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[0, 1, 4, 64], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 4, 1]>
+//      CHECK: hal.executable.entry_point public @conv_112x112x512
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [16 : index, 1 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[C8:.+]] = arith.constant 8 : index
+// CHECK-NEXT:   %[[C28:.+]] = arith.constant 28 : index
+// CHECK-NEXT:   %[[C112:.+]] = arith.constant 112 : index
+// CHECK-NEXT:   hal.return %[[C8]], %[[C28]], %[[C112]]
 
-//                CHECK: func @conv_112x112x512()
-//                CHECK:   linalg.conv_2d_nhwc_hwcf
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[0, 1, 4, 64], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4]]}
+//      CHECK: func @conv_112x112x512()
+//      CHECK:   linalg.conv_2d_nhwc_hwcf
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -163,18 +165,20 @@ hal.executable @conv_112x112x32 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @conv_112x112x32
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 8, 1]}
-//           CHECK-SAME:   workgroup_size = [8 : index, 2 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[C14:.+]] = arith.constant 14 : index
-//           CHECK-NEXT:   %[[C112:.+]] = arith.constant 112 : index
-//           CHECK-NEXT:   hal.return %[[C1]], %[[C14]], %[[C112]]
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[0, 1, 8, 32], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 8, 1]>
+//      CHECK: hal.executable.entry_point public @conv_112x112x32
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [8 : index, 2 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[C14:.+]] = arith.constant 14 : index
+// CHECK-NEXT:   %[[C112:.+]] = arith.constant 112 : index
+// CHECK-NEXT:   hal.return %[[C1]], %[[C14]], %[[C112]]
 
-//                CHECK: func @conv_112x112x32()
-//                CHECK:   linalg.conv_2d_nhwc_hwcf
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[0, 1, 8, 32], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4]]}
+//      CHECK: func @conv_112x112x32()
+//      CHECK:   linalg.conv_2d_nhwc_hwcf
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -251,17 +255,19 @@ hal.executable @conv_16x16x16 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @conv_16x16x16
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]}
-//           CHECK-SAME:   workgroup_size = [4 : index, 2 : index, 2 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[C4:.+]] = arith.constant 4 : index
-//           CHECK-NEXT:   hal.return %[[C1]], %[[C4]], %[[C4]]
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[0, 4, 4, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]>
+//      CHECK: hal.executable.entry_point public @conv_16x16x16
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [4 : index, 2 : index, 2 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[C4:.+]] = arith.constant 4 : index
+// CHECK-NEXT:   hal.return %[[C1]], %[[C4]], %[[C4]]
 
-//                CHECK: func @conv_16x16x16()
-//                CHECK:   linalg.conv_2d_nhwc_hwcf
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[0, 4, 4, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4]]}
+//      CHECK: func @conv_16x16x16()
+//      CHECK:   linalg.conv_2d_nhwc_hwcf
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -340,17 +346,19 @@ hal.executable @dwconv_28x28x144 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @dwconv_28x28x144
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]}
-//           CHECK-SAME:   workgroup_size = [4 : index, 2 : index, 2 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[C9:.+]] = arith.constant 9 : index
-//           CHECK-NEXT:   %[[C7:.+]] = arith.constant 7 : index
-//           CHECK-NEXT:   hal.return %[[C9]], %[[C7]], %[[C7]]
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[0, 4, 4, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]>
+//      CHECK: hal.executable.entry_point public @dwconv_28x28x144
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [4 : index, 2 : index, 2 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[C9:.+]] = arith.constant 9 : index
+// CHECK-NEXT:   %[[C7:.+]] = arith.constant 7 : index
+// CHECK-NEXT:   hal.return %[[C9]], %[[C7]], %[[C7]]
 
-//                CHECK: func @dwconv_28x28x144()
-//                CHECK:   linalg.depthwise_conv2D_nhw
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[0, 4, 4, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1]]}
+//      CHECK: func @dwconv_28x28x144()
+//      CHECK:   linalg.depthwise_conv2D_nhw
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -430,14 +438,16 @@ hal.executable @dwconv_1x2x8 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @dwconv_1x2x8
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 2, 1]}
-//           CHECK-SAME:   workgroup_size = [2 : index, 2 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   hal.return %[[C1]], %[[C1]], %[[C1]]
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[0, 1, 2, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 2, 1]>
+//      CHECK: hal.executable.entry_point public @dwconv_1x2x8
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [2 : index, 2 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   hal.return %[[C1]], %[[C1]], %[[C1]]
 
-//                CHECK: func @dwconv_1x2x8()
-//                CHECK:   linalg.depthwise_conv2D_nhw
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[0, 1, 2, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]]}
+//      CHECK: func @dwconv_1x2x8()
+//      CHECK:   linalg.depthwise_conv2D_nhw
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
diff --git a/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir
index 4d72dc471823..fe03afa14a9f 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
 
 // Large matmul that can match the best tiling scheme.
 
@@ -62,18 +62,22 @@ hal.executable @matmul_1024x2048x512 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @matmul_1024x2048x512
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 8]}
-//           CHECK-SAME:   workgroup_size = [8 : index, 2 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
-
-//                CHECK: func @matmul_1024x2048x512()
-//                CHECK:   linalg.matmul
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[8, 32], [4, 4], [0, 0, 4]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[8, 32], [4, 4], [0, 0, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 8]>
+//      CHECK: hal.executable.entry_point public @matmul_1024x2048x512
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [8 : index, 2 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
+
+//      CHECK: func @matmul_1024x2048x512()
+//      CHECK:   linalg.matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -139,18 +143,22 @@ hal.executable @matmul_3136x24x96 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @matmul_3136x24x96
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 32]}
-//           CHECK-SAME:   workgroup_size = [2 : index, 8 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
-
-//                CHECK: func @matmul_3136x24x96()
-//                CHECK:   linalg.matmul
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[32, 8], [4, 4], [0, 0, 4]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[32, 8], [4, 4], [0, 0, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 32]>
+//      CHECK: hal.executable.entry_point public @matmul_3136x24x96
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [2 : index, 8 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
+
+//      CHECK: func @matmul_3136x24x96()
+//      CHECK:   linalg.matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -216,18 +224,22 @@ hal.executable @matmul_196x64x192 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @matmul_196x64x192
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 4]}
-//           CHECK-SAME:   workgroup_size = [8 : index, 2 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
-
-//                CHECK: func @matmul_196x64x192()
-//                CHECK:   linalg.matmul
-//  CHECK-SAME{LITERAL}:      lowering.config = {tileSizes = [[4, 32], [2, 4], [0, 0, 8]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[4, 32], [2, 4], [0, 0, 8]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4]>
+//      CHECK: hal.executable.entry_point public @matmul_196x64x192
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [8 : index, 2 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
+
+//      CHECK: func @matmul_196x64x192()
+//      CHECK:   linalg.matmul
+// CHECK-SAME:      lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -288,18 +300,22 @@ hal.executable @matmul_12544x96x16 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @matmul_12544x96x16
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 8]}
-//           CHECK-SAME:   workgroup_size = [8 : index, 2 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
-
-//                CHECK: func @matmul_12544x96x16()
-//                CHECK:   linalg.matmul
-//  CHECK-SAME{LITERAL}:     lowering.config =  {tileSizes = [[8, 32], [4, 4], [0, 0, 4]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[8, 32], [4, 4], [0, 0, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 8]>
+//      CHECK: hal.executable.entry_point public @matmul_12544x96x16
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [8 : index, 2 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]]
+
+//      CHECK: func @matmul_12544x96x16()
+//      CHECK:   linalg.matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -365,17 +381,20 @@ hal.executable @matmul_49x160x576 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @matmul_49x160x576
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 1]}
-//           CHECK-SAME:   workgroup_size = [8 : index, 1 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y]], %[[ONE]]
-
-//                CHECK: func @matmul_49x160x576()
-//                CHECK:   linalg.matmul
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[1, 32], [1, 4], [0, 0, 8]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[1, 32], [1, 4], [0, 0, 8]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 1]>
+//      CHECK: hal.executable.entry_point public @matmul_49x160x576
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [8 : index, 1 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[ONE:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y]], %[[ONE]]
+
+//      CHECK: func @matmul_49x160x576()
+//      CHECK:   linalg.matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -452,17 +471,21 @@ hal.executable @batch_matmul_4x384x384 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @batch_matmul_4x384x384
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 12, 1]}
-//           CHECK-SAME:   workgroup_size = [8 : index, 2 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index):
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 12)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]]
-
-//                CHECK: func @batch_matmul_4x384x384()
-//                CHECK:   linalg.batch_matmul
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[1, 12, 32], [1, 6, 4], [0, 0, 0, 4]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[1, 12, 32], [1, 6, 4], [0, 0, 0, 4]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 12)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 12, 1]>
+//      CHECK: hal.executable.entry_point public @batch_matmul_4x384x384
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [8 : index, 2 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index):
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]]
+
+//      CHECK: func @batch_matmul_4x384x384()
+//      CHECK:   linalg.batch_matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -540,14 +563,18 @@ hal.executable @batch_matmul_4x2x8 {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @batch_matmul_4x2x8
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 2, 1]}
-//           CHECK-SAME:   workgroup_size = [2 : index, 2 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index):
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]]
-
-//                CHECK: func @batch_matmul_4x2x8()
-//                CHECK:   linalg.batch_matmul
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[1, 2, 8], [1, 1, 4], [0, 0, 0, 8]]}
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[1, 2, 8], [1, 1, 4], [0, 0, 0, 8]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 2, 1]>
+//      CHECK: hal.executable.entry_point public @batch_matmul_4x2x8
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [2 : index, 2 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index):
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]]
+
+//      CHECK: func @batch_matmul_4x2x8()
+//      CHECK:   linalg.batch_matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
diff --git a/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir b/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
index 3968d729923b..1097c3caaae3 100644
--- a/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s
 
 #map0 = affine_map<()[s0, s1] -> (s0 * s1)>
 #map1 = affine_map<(d0)[s0] -> (s0, -d0 + 256)>
@@ -100,18 +100,21 @@ hal.executable public @matmul_256x1024x128_div_sub {
   }
 }
 
-//          CHECK-LABEL: hal.executable.entry_point public @matmul_256x1024x128_div_sub
-//           CHECK-SAME:   translation.info = {passPipeline = "SPIRVVectorizeToCooperativeOps", workloadPerWorkgroup = [16, 16]}
-//           CHECK-SAME:   workgroup_size = [32 : index, 1 : index, 1 : index]
-//           CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
-//           CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
-//           CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%[[X]]]
-//           CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%[[Y]]]
-//           CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[C1]]
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config<tile_sizes = {{\[}}[16, 16, 16], [16, 16, 16]{{\]}}, native_vector_size = []>
+//  CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)>
+//  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorizeToCooperativeOps", workload_per_wg = [16, 16]>
+//      CHECK: hal.executable.entry_point public @matmul_256x1024x128_div_sub
+// CHECK-SAME:   translation.info = #[[TRANSLATION]]
+// CHECK-SAME:   workgroup_size = [32 : index, 1 : index, 1 : index]
+// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index):
+// CHECK-NEXT:   %[[C1:.+]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]]
+// CHECK-NEXT:   %[[Y_COUNT:.+]] = affine.apply #[[MAP]]()[%[[Y]]]
+// CHECK-NEXT:   hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[C1]]
 
-//                CHECK: func @matmul_256x1024x128_div_sub()
-//                CHECK:   linalg.matmul
-//  CHECK-SAME{LITERAL}:     lowering.config = {tileSizes = [[16, 16, 16], [16, 16, 16]]}
+//      CHECK: func @matmul_256x1024x128_div_sub()
+//      CHECK:   linalg.matmul
+// CHECK-SAME:     lowering.config = #[[CONFIG]]
 
 // -----
 
@@ -194,5 +197,6 @@ hal.executable public @matmul_256x1024x8 {
   }
 }
 
-// CHECK-LABEL: hal.executable.entry_point public @matmul_256x1024x8
-//  CHECK-SAME:   passPipeline = "SPIRVVectorize"
+//   CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize"
+//       CHECK: hal.executable.entry_point public @matmul_256x1024x8
+//  CHECK-SAME:   translation.info = #[[TRANSLATION]]
diff --git a/iree/compiler/Codegen/SPIRV/test/distribute_to_global_id.mlir b/iree/compiler/Codegen/SPIRV/test/distribute_to_global_id.mlir
deleted file mode 100644
index c7d9c882fd62..000000000000
--- a/iree/compiler/Codegen/SPIRV/test/distribute_to_global_id.mlir
+++ /dev/null
@@ -1,236 +0,0 @@
-// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-distribute-to-global-id))))' -canonicalize -cse %s | IreeFileCheck %s
-
-#map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
-hal.executable private @parallel_4D  {
-  hal.interface @io {
-    hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-    hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
-    hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
-  }
-  hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
-    hal.executable.entry_point @parallel_4D attributes {interface = @io, ordinal = 0 : index}
-    builtin.module {
-      func @parallel_4D() {
-        %c0 = arith.constant 0 : index
-        %dim0 = hal.interface.load.constant offset = 0 : index
-        %dim1 = hal.interface.load.constant offset = 1 : index
-        %dim2 = hal.interface.load.constant offset = 2 : index
-        %dim3 = hal.interface.load.constant offset = 3 : index
-        %arg0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<?x?x?x?xf32>{%dim0, %dim1, %dim2, %dim3}
-        %arg1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<?x?x?x?xf32>{%dim0, %dim1, %dim2, %dim3}
-        %arg2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<?x?x?x?xf32>{%dim0, %dim1, %dim2, %dim3}
-        linalg.generic {
-           indexing_maps = [#map0, #map0, #map0],
-           iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-          ins(%arg0, %arg1 : memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-         outs(%arg2 : memref<?x?x?x?xf32>) {
-        ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
-          %0 = arith.addf %arg3, %arg4 : f32
-          linalg.yield %0 : f32
-        }
-        return
-      }
-      func private @parallel_4D__num_workgroups__
-        (!shapex.ranked_shape<[?,?,?,?]>, !shapex.ranked_shape<[?,?,?,?]>,
-         !shapex.ranked_shape<[?,?,?,?]>) -> (index, index, index)
-      hal.interface private @io  {
-        hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-        hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
-        hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
-      }
-    }
-  }
-}
-// CHECK-LABEL: func @parallel_4D
-//   CHECK-DAG:     %[[C0:.+]] = arith.constant 0 : index
-//   CHECK-DAG:     %[[C1:.+]] = arith.constant 1 : index
-//   CHECK-DAG:     %[[C2:.+]] = arith.constant 2 : index
-//   CHECK-DAG:     %[[C3:.+]] = arith.constant 3 : index
-//   CHECK-DAG:     %[[UB0:.+]] = memref.dim %{{.+}}, %[[C0]]
-//   CHECK-DAG:     %[[UB1:.+]] = memref.dim %{{.+}}, %[[C1]]
-//   CHECK-DAG:     %[[UB2:.+]] = memref.dim %{{.+}}, %[[C2]]
-//   CHECK-DAG:     %[[UB3:.+]] = memref.dim %{{.+}}, %[[C3]]
-//       CHECK:     %[[T4:.+]] = arith.muli %[[UB3]], %[[UB2]]
-//       CHECK:     %[[T5:.+]] = arith.muli %[[T4]], %[[UB1]]
-//       CHECK:     %[[UB:.+]] = arith.muli %[[T5]], %[[UB0]]
-//   CHECK-DAG:     %[[BID:.+]] = "gpu.block_id"() {dimension = "x"}
-//   CHECK-DAG:     %[[BDIM:.+]] = "gpu.block_dim"() {dimension = "x"}
-//   CHECK-DAG:     %[[TID:.+]] = "gpu.thread_id"() {dimension = "x"}
-//       CHECK:     %[[BOFFSET:.+]] = arith.muli %[[BID]], %[[BDIM]]
-//       CHECK:     %[[IV:.+]] = arith.addi %[[BOFFSET]], %[[TID]]
-//       CHECK:     %[[COND:.+]] = arith.cmpi slt, %[[IV]], %[[UB]]
-//       CHECK:     scf.if %[[COND]]
-//       CHECK:       %[[IV0:.+]] = arith.divsi %[[IV]], %[[T5]]
-//       CHECK:       %[[T14:.+]] = arith.remsi %[[IV]], %[[T5]]
-//       CHECK:       %[[IV1:.+]] = arith.divsi %[[T14]], %[[T4]]
-//       CHECK:       %[[T16:.+]] = arith.remsi %[[T14]], %[[T4]]
-//       CHECK:       %[[IV2:.+]] = arith.divsi %[[T16]], %[[UB3]]
-//       CHECK:       %[[IV3:.+]] = arith.remsi %[[T16]], %[[UB3]]
-//       CHECK:       load %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
-//       CHECK:       load %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
-//       CHECK:       store %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
-
-// -----
-
-#map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
-hal.executable private @parallel_4D_static  {
-  hal.interface @io {
-    hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-    hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
-    hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
-  }
-  hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
-    hal.executable.entry_point @parallel_4D_static attributes {interface = @io, ordinal = 0 : index}
-    builtin.module {
-      func @parallel_4D_static() {
-        %c0 = arith.constant 0 : index
-        %arg0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<3x4x5x6xf32>
-        %arg1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<3x4x5x6xf32>
-        %arg2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<3x4x5x6xf32>
-        linalg.generic {
-           indexing_maps = [#map0, #map0, #map0],
-           iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-          ins(%arg0, %arg1 : memref<3x4x5x6xf32>, memref<3x4x5x6xf32>)
-         outs(%arg2 : memref<3x4x5x6xf32>) {
-        ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
-          %0 = arith.addf %arg3, %arg4 : f32
-          linalg.yield %0 : f32
-        }
-        return
-      }
-      hal.interface private @io  {
-        hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-        hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
-        hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
-      }
-    }
-  }
-}
-// CHECK-LABEL: func @parallel_4D_static()
-//   CHECK-DAG:     %[[C360:.+]] = arith.constant 360 : index
-//   CHECK-DAG:     %[[C120:.+]] = arith.constant 120 : index
-//   CHECK-DAG:     %[[C30:.+]] = arith.constant 30 : index
-//   CHECK-DAG:     %[[C6:.+]] = arith.constant 6 : index
-//   CHECK-DAG:     %[[BID:.+]] = "gpu.block_id"() {dimension = "x"}
-//   CHECK-DAG:     %[[BDIM:.+]] = "gpu.block_dim"() {dimension = "x"}
-//   CHECK-DAG:     %[[TID:.+]] = "gpu.thread_id"() {dimension = "x"}
-//       CHECK:     %[[BOFFSET:.+]] = arith.muli %[[BID]], %[[BDIM]]
-//       CHECK:     %[[IV:.+]] = arith.addi %[[BOFFSET]], %[[TID]]
-//       CHECK:     %[[COND:.+]] = arith.cmpi slt, %[[IV]], %[[C360]]
-//       CHECK:     scf.if %[[COND]]
-//       CHECK:       %[[IV0:.+]] = arith.divsi %[[IV]], %[[C120]]
-//       CHECK:       %[[T14:.+]] = arith.remsi %[[IV]], %[[C120]]
-//       CHECK:       %[[IV1:.+]] = arith.divsi %[[T14]], %[[C30]]
-//       CHECK:       %[[T16:.+]] = arith.remsi %[[T14]], %[[C30]]
-//       CHECK:       %[[IV2:.+]] = arith.divsi %[[T16]], %[[C6]]
-//       CHECK:       %[[IV3:.+]] = arith.remsi %[[T16]], %[[C6]]
-//       CHECK:       load %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
-//       CHECK:       load %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
-//       CHECK:       store %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
-
-// -----
-
-#map0 = affine_map<() -> ()>
-#accesses = [#map0, #map0, #map0]
-#trait = {
-  indexing_maps = #accesses,
-  iterator_types = []
-}
-
-hal.executable private @scalar_add  {
-  hal.interface @io {
-    hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-    hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
-    hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
-  }
-  hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
-    hal.executable.entry_point @scalar_add attributes {interface = @io, ordinal = 0 : index}
-    builtin.module {
-      func @scalar_add() attributes {hal.num_workgroups_fn = @scalar_add__num_workgroups__} {
-        %c0 = arith.constant 0 : index
-        %arg0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<f32>
-        %arg1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<f32>
-        %arg2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<f32>
-        linalg.generic #trait
-          ins(%arg0, %arg1 : memref<f32>, memref<f32>)
-         outs(%arg2 : memref<f32>) {
-        ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
-          %0 = arith.addf %arg3, %arg4 : f32
-          linalg.yield %0 : f32
-         }
-         return
-      }
-      hal.interface private @io  {
-        hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-        hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
-        hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
-      }
-    }
-  }
-}
-// CHECK-LABEL: func @scalar_add()
-//       CHECK:     load
-//  CHECK-NEXT:     load
-//  CHECK-NEXT:     addf
-//  CHECK-NEXT:     store
-//  CHECK-NEXT:     return
-
-// -----
-
-// TODO(GH-4901): Convert these tests back to use dynamic shapes when linalg on tensors becomes default.
-hal.executable private @reduce_sum  {
-  hal.interface @io {
-    hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-    hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
-    hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
-  }
-  hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
-    hal.executable.entry_point @reduce_sum attributes {
-      interface = @io,
-      ordinal = 0 : index
-    }
-    builtin.module {
-      func @reduce_sum() {
-        %c0 = arith.constant 0 : index
-        %arg0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<40x50x75xf32>
-        %arg1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<f32>
-        %arg2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<40xf32>
-        linalg.generic {
-          indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
-                           affine_map<(d0, d1, d2) -> ()>,
-                           affine_map<(d0, d1, d2) -> (d0)>],
-          iterator_types = ["parallel", "reduction", "reduction"]}
-          ins(%arg0, %arg1 : memref<40x50x75xf32>, memref<f32>)
-          outs(%arg2 : memref<40xf32>) {
-        ^bb0(%arg6: f32, %arg7: f32, %arg8: f32):   // no predecessors
-          %idx1 = linalg.index 1 : index
-          %idx2 = linalg.index 2 : index
-          %zero = arith.constant 0 : index
-          %0 = arith.cmpi eq, %idx2, %zero : index
-          %1 = arith.cmpi eq, %idx1, %zero : index
-          %2 = arith.andi %0, %1 : i1
-          %3 = select %2, %arg7, %arg8 : f32
-          %4 = arith.addf %arg6, %3 : f32
-          linalg.yield %4 : f32
-        }
-        return
-      }
-      hal.interface private @io  {
-        hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
-        hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read"
-        hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard"
-      }
-    }
-  }
-}
-//CHECK-LABEL: func @reduce_sum
-//   CHECK-DAG:     %[[C0:.+]] = arith.constant 0 : index
-//   CHECK-DAG:     %[[C40:.+]] = arith.constant 40 : index
-//   CHECK-DAG:     %[[C50:.+]] = arith.constant 50 : index
-//   CHECK-DAG:     %[[C75:.+]] = arith.constant 75 : index
-//       CHECK:     %[[COND:.+]] = arith.cmpi slt, %{{.+}}, %[[C40]]
-//       CHECK:     scf.if %[[COND]]
-//       CHECK:       scf.for %[[IV0:.+]] = %{{.+}} to %[[C50]]
-//       CHECK:         scf.for %[[IV1:.+]] = %{{.+}} to %[[C75]]
-//   CHECK-DAG:           %[[ISZERO0:.+]] = arith.cmpi eq, %[[IV0]], %[[C0]]
-//   CHECK-DAG:           %[[ISZERO1:.+]] = arith.cmpi eq, %[[IV1]], %[[C0]]
diff --git a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
index fc1058e1469f..3315774f2237 100644
--- a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir
@@ -1,7 +1,7 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-codegen-linalg-to-spirv-pipeline))' %s | IreeFileCheck %s
 
-#config = {tileSizes = [[8, 64], [8, 4], [0, 0, 4]]}
-
+#config = #iree_codegen.lowering.config<tile_sizes = [[8, 64], [8, 4], [0, 0, 4]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8]>
 hal.executable private @fuse_and_vectorize_fill_matmul  {
   hal.interface @io {
     hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -13,7 +13,7 @@ hal.executable private @fuse_and_vectorize_fill_matmul  {
     hal.executable.entry_point @fuse_and_vectorize_fill_matmul attributes {
       interface = @io, ordinal = 0 : index,
       workgroup_size = [16: index, 1: index, 1: index],
-      translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8]}
+      translation.info = #translation
     }
     builtin.module {
       func @fuse_and_vectorize_fill_matmul() {
@@ -70,8 +70,8 @@ hal.executable private @fuse_and_vectorize_fill_matmul  {
 
 // -----
 
-#config = {tileSizes = [[8, 64], [8, 4], [0, 0, 4]]}
-
+#config = #iree_codegen.lowering.config<tile_sizes = [[8, 64], [8, 4], [0, 0, 4]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8]>
 hal.executable private @fuse_and_vectorize_matmul_add  {
   hal.interface @io {
     hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -83,7 +83,7 @@ hal.executable private @fuse_and_vectorize_matmul_add  {
     hal.executable.entry_point @fuse_and_vectorize_matmul_add attributes {
       interface = @io, ordinal = 0 : index,
       workgroup_size = [16: index, 1: index, 1: index],
-      translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8]}
+      translation.info = #translation
     }
     builtin.module {
       func @fuse_and_vectorize_matmul_add() {
diff --git a/iree/compiler/Codegen/SPIRV/test/remove_one_trip_tiled_loop.mlir b/iree/compiler/Codegen/SPIRV/test/remove_one_trip_tiled_loop.mlir
index 2dd847e3e27a..6b8b7bcc91b7 100644
--- a/iree/compiler/Codegen/SPIRV/test/remove_one_trip_tiled_loop.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/remove_one_trip_tiled_loop.mlir
@@ -1,5 +1,7 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-remove-one-trip-tiled-loop))))' %s | IreeFileCheck %s
 
+#config = #iree_codegen.lowering.config<tile_sizes = [[0, 4, 4, 16], [], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]>
 hal.executable private @static_shaped_conv  {
   hal.interface @io {
     hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -9,7 +11,7 @@ hal.executable private @static_shaped_conv  {
   hal.executable.variant @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb"> {
     hal.executable.entry_point @static_shaped_conv attributes {
       interface = @io, ordinal = 0 : index,
-      translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]},
+      translation.info = #translation,
       workgroup_size = [4 : index, 4 : index, 1 : index]
     }
     builtin.module {
@@ -46,8 +48,8 @@ hal.executable private @static_shaped_conv  {
               %16 = affine.min affine_map<(d0) -> (4, -d0 + 112)>(%arg0)
               %17 = affine.min affine_map<(d0) -> (4, -d0 + 112)>(%arg1)
               %18 = memref.subview %2[0, %arg0, %arg1, %arg2] [1, %16, %17, %14] [1, 1, 1, 1] : memref<1x112x112x32xf32> to memref<1x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 401408 + s0 + d1 * 3584 + d2 * 32 + d3)>>
-              linalg.fill(%cst, %18) {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[0, 4, 4, 16], [], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4]]}} : f32, memref<1x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 401408 + s0 + d1 * 3584 + d2 * 32 + d3)>>
-              linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, lowering.config = {tileSizes = [[0, 4, 4, 16], [], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4]]}, strides = dense<2> : tensor<2xi64>}
+              linalg.fill(%cst, %18) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f32, memref<1x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 401408 + s0 + d1 * 3584 + d2 * 32 + d3)>>
+              linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, lowering.config = #config, strides = dense<2> : tensor<2xi64>}
                 ins(%13, %15 : memref<1x?x?x3xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 151875 + s0 + d1 * 675 + d2 * 3 + d3)>>, memref<3x3x3x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 288 + s0 + d1 * 96 + d2 * 32 + d3)>>)
                 outs(%18 : memref<1x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 401408 + s0 + d1 * 3584 + d2 * 32 + d3)>>)
             }
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir
index bdd9652f5844..b12a0993fe99 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir
@@ -1,5 +1,7 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-tile-and-distribute))))' %s | IreeFileCheck %s
 
+#config = #iree_codegen.lowering.config<tile_sizes = [[1, 16], [1, 1]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [16, 1]>
 hal.executable private @static_scatter_update_slice  {
   hal.interface @io {
     hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -10,7 +12,7 @@ hal.executable private @static_scatter_update_slice  {
   hal.executable.variant @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb"> {
     hal.executable.entry_point @static_scatter_update_slice attributes {
       interface = @io, ordinal = 0 : index,
-      translation.info = {passPipeline = 5 : i32, workloadPerWorkgroup = [16, 1]},
+      translation.info = #translation,
       workgroup_size = [16 : index, 1 : index, 1 : index]
     }
 
@@ -36,7 +38,7 @@ hal.executable private @static_scatter_update_slice  {
             %8 = memref.subview %1[%arg0, 0] [1, 1] [1, 1] : memref<40x1xi32> to memref<1x1xi32, affine_map<(d0, d1)[s0] -> (d0 + s0 + d1)>>
             %9 = memref.cast %8 : memref<1x1xi32, affine_map<(d0, d1)[s0] -> (d0 + s0 + d1)>> to memref<?x1xi32, affine_map<(d0, d1)[s0] -> (d0 + s0 + d1)>>
             %10 = memref.subview %2[0, %arg1] [100, %5] [1, 1] : memref<100x500xi32> to memref<100x?xi32, affine_map<(d0, d1)[s0] -> (d0 * 500 + s0 + d1)>>
-            linalg_ext.scatter {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[1, 16], [1, 1]]}} ins(%7, %9 : memref<?x?xi32, affine_map<(d0, d1)[s0] -> (d0 * 500 + s0 + d1)>>, memref<?x1xi32, affine_map<(d0, d1)[s0] -> (d0 + s0 + d1)>>) outs(%10 : memref<100x?xi32, affine_map<(d0, d1)[s0] -> (d0 * 500 + s0 + d1)>>)  {
+            linalg_ext.scatter {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%7, %9 : memref<?x?xi32, affine_map<(d0, d1)[s0] -> (d0 * 500 + s0 + d1)>>, memref<?x1xi32, affine_map<(d0, d1)[s0] -> (d0 + s0 + d1)>>) outs(%10 : memref<100x?xi32, affine_map<(d0, d1)[s0] -> (d0 * 500 + s0 + d1)>>)  {
             ^bb0(%arg2: i32, %arg3: i32):  // no predecessors
               linalg_ext.yield %arg2 : i32
             }
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir
index 201f344c7665..345c7317a644 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir
@@ -1,5 +1,7 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-tile-and-distribute, cse))))' %s | IreeFileCheck %s
 
+#config = #iree_codegen.lowering.config<tile_sizes = [[1, 0, 16], [1, 0, 1]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [16, 1]>
 hal.executable private @static_3d_sort  {
   hal.interface @io {
     hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -8,7 +10,7 @@ hal.executable private @static_3d_sort  {
   hal.executable.variant @vulkan_spirv_fb, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> {
     hal.executable.entry_point @static_3d_sort attributes {
       interface = @io, ordinal = 0 : index,
-      translation.info = {passPipeline = 5 : i32, workloadPerWorkgroup = [16, 1]},
+      translation.info = #translation,
       workgroup_size = [16 : index, 1 : index, 1 : index]
     }
     builtin.module {
@@ -30,8 +32,8 @@ hal.executable private @static_3d_sort  {
             %5 = memref.cast %4 : memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>> to memref<?x?x?xi32>
             %6 = memref.subview %1[%arg0, 0, %arg1] [1, 32, 16] [1, 1, 1] : memref<64x32x128xi32> to memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>>
             %7 = memref.cast %6 : memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>> to memref<?x32x?xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>>
-            linalg.copy(%5, %6) {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[1, 0, 16], [1, 0, 1]]}} : memref<?x?x?xi32>, memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>>
-            linalg_ext.sort dimension(1) {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[1, 0, 16], [1, 0, 1]]}} outs(%7 : memref<?x32x?xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>>)  {
+            linalg.copy(%5, %6) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : memref<?x?x?xi32>, memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>>
+            linalg_ext.sort dimension(1) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} outs(%7 : memref<?x32x?xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>>)  {
             ^bb0(%arg2: i32, %arg3: i32):  // no predecessors
               %8 = arith.cmpi slt, %arg2, %arg3 : i32
               linalg_ext.yield %8 : i1
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir
index 8bf2d36bced6..958207c71242 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir
@@ -8,7 +8,8 @@
 #map5 = affine_map<(d0, d1, d2) -> (d2, d1)>
 #map6 = affine_map<(d0, d1, d2) -> (d0, d1)>
 
-#config = {tileSizes = [[8, 16], [1, 1], [0, 0, 1]]}
+#config = #iree_codegen.lowering.config<tile_sizes = [[8, 16], [1, 1], [0, 0, 1]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 16]>
 
 hal.executable private @matmul  {
   hal.interface @io {
@@ -20,7 +21,7 @@ hal.executable private @matmul  {
     hal.executable.entry_point @matmul attributes {
       interface = @io, ordinal = 0 : index,
       workgroup_size = [16: index, 8: index, 1: index],
-      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [8, 16]}
+      translation.info = #translation
     }
     builtin.module {
       func @matmul() {
@@ -82,8 +83,8 @@ hal.executable private @matmul  {
 
 // -----
 
-#config = {tileSizes = [[1, 4, 32], [1, 1, 1]]}
-
+#config = #iree_codegen.lowering.config<tile_sizes = [[1, 4, 32], [1, 1, 1]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4, 1]>
 hal.executable private @conv_1d  {
   hal.interface @io {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -94,7 +95,7 @@ hal.executable private @conv_1d  {
     hal.executable.entry_point @conv_1d attributes {
       interface = @io, ordinal = 0 : index,
       workgroup_size = [32: index, 4: index, 1: index],
-      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]}
+      translation.info = #translation
     }
     builtin.module {
       func @conv_1d() {
@@ -165,8 +166,8 @@ hal.executable private @conv_1d  {
 #map6 = affine_map<(d0)[s0] -> (4, -d0 + s0)>
 #map7 = affine_map<(d0)[s0] -> (32, -d0 + s0)>
 
-#config = {tileSizes = [[0, 1, 4, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4]]}
-
+#config = #iree_codegen.lowering.config<tile_sizes = [[0, 1, 4, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4, 1]>
 hal.executable private @conv_no_padding  {
   hal.interface @io {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -177,7 +178,7 @@ hal.executable private @conv_no_padding  {
     hal.executable.entry_point @conv_no_padding attributes {
       interface = @io, ordinal = 0 : index,
       workgroup_size = [32: index, 4: index, 1: index],
-      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]}
+      translation.info = #translation
     }
     builtin.module {
       func @conv_no_padding() {
@@ -292,8 +293,8 @@ hal.executable private @conv_no_padding  {
 
 // -----
 
-#config = {tileSizes = [[0, 0, 1, 4, 32], [0, 0, 1, 1, 1]]}
-
+#config = #iree_codegen.lowering.config<tile_sizes = [[0, 0, 1, 4, 32], [0, 0, 1, 1, 1]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4, 1]>
 hal.executable private @conv_3d  {
   hal.interface @io {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -304,7 +305,7 @@ hal.executable private @conv_3d  {
     hal.executable.entry_point @conv_3d attributes {
       interface = @io, ordinal = 0 : index,
       workgroup_size = [32: index, 4: index, 1: index],
-      translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]}
+      translation.info = #translation
     }
     builtin.module {
       func @conv_3d() {
@@ -365,8 +366,8 @@ hal.executable private @conv_3d  {
 #map6 = affine_map<()[s0] -> (32, s0 * -32 + 13)>
 #map7 = affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 1092 + s0 + d1 * 78 + d2 * 6 + d3)>
 
-#config = {tileSizes = [[1, 4, 32], [1, 1, 1]]}
-
+#config = #iree_codegen.lowering.config<tile_sizes = [[1, 4, 32], [1, 1, 1]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4, 1]>
 module  {
   hal.executable private @pooling_nhwc_max  {
     hal.interface @io {
@@ -378,7 +379,7 @@ module  {
       hal.executable.entry_point @pooling_nhwc_max attributes {
         interface = @io, ordinal = 0 : index,
         workgroup_size = [32: index, 4: index, 1: index],
-        translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]}
+        translation.info = #translation
       }
       builtin.module {
         func @pooling_nhwc_max() {
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
index 2cd1b62aeaf9..6018c6b94794 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir
@@ -1,7 +1,7 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-set-num-workgroups,builtin.module(builtin.func(iree-spirv-tile-and-distribute,iree-spirv-vectorize))))' -canonicalize -cse %s | IreeFileCheck %s
 
-#config = {tileSizes = [[1, 8, 64], [1, 8, 4], [0, 0, 0, 4]]}
-
+#config = #iree_codegen.lowering.config<tile_sizes = [[1, 8, 64], [1, 8, 4], [0, 0, 0, 4]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8, 1]>
 hal.executable private @batch_matmul_static_shape  {
   hal.interface private @io  {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -12,7 +12,7 @@ hal.executable private @batch_matmul_static_shape  {
     hal.executable.entry_point @batch_matmul_static_shape attributes {
       interface = @io, ordinal = 0 : index,
       workgroup_size = [16: index, 1: index, 1: index],
-      translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8, 1]}
+      translation.info = #translation
     }
     builtin.module {
       func @batch_matmul_static_shape() {
@@ -370,8 +370,8 @@ hal.executable private @batch_matmul_static_shape  {
 
 // -----
 
-#config = {tileSizes = [[1, 8, 64], [1, 8, 4], [0, 0, 0, 4]]}
-
+#config = #iree_codegen.lowering.config<tile_sizes = [[1, 8, 64], [1, 8, 4], [0, 0, 0, 4]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8, 1]>
 hal.executable private @fused_fill_batch_matmul  {
   hal.interface private @io  {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -382,7 +382,7 @@ hal.executable private @fused_fill_batch_matmul  {
     hal.executable.entry_point @fused_fill_batch_matmul attributes {
       interface = @io, ordinal = 0 : index,
       workgroup_size = [16: index, 1: index, 1: index],
-      translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8, 1]}
+      translation.info = #translation
     }
     builtin.module {
       func @fused_fill_batch_matmul() {
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir
index ffd3b8f83b13..757dd467d2d2 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir
@@ -1,7 +1,7 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-set-num-workgroups,builtin.module(builtin.func(canonicalize,iree-spirv-remove-one-trip-tiled-loop,iree-spirv-tile-and-distribute,iree-spirv-vectorize))))' -canonicalize -cse %s | IreeFileCheck %s
 
-#config = {tileSizes = [[0, 4, 4, 16], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4]]}
-
+#config = #iree_codegen.lowering.config<tile_sizes = [[0, 4, 4, 16], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]>
 hal.executable private @conv_static_shape_f32  {
   hal.interface @io {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -13,7 +13,7 @@ hal.executable private @conv_static_shape_f32  {
       interface = @io,
       ordinal = 0 : index,
       workgroup_size = [4: index, 4: index, 1: index],
-      translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]}
+      translation.info = #translation
     } {
     ^bb0(%arg0 : index, %arg1 : index, %arg2 : index):
       %x = arith.constant 2: index
@@ -99,8 +99,8 @@ hal.executable private @conv_static_shape_f32  {
 
 // -----
 
-#config = {tileSizes = [[0, 4, 4, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]]}
-
+#config = #iree_codegen.lowering.config<tile_sizes = [[0, 4, 4, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]>
 hal.executable private @depthwise_conv_static_shape_f32  {
   hal.interface @io {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -112,7 +112,7 @@ hal.executable private @depthwise_conv_static_shape_f32  {
       interface = @io,
       ordinal = 0 : index,
       workgroup_size = [4: index, 4: index, 4: index],
-      translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]}
+      translation.info = #translation
     } {
     ^bb0(%arg0 : index, %arg1 : index, %arg2 : index):
       %x = arith.constant 6: index
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
index 0906c264c185..a3c5db1f5a0b 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir
@@ -1,7 +1,7 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-set-num-workgroups,builtin.module(builtin.func(iree-spirv-tile-and-distribute,iree-spirv-vectorize))))' -canonicalize -cse %s | IreeFileCheck %s
 
-#config = {tileSizes = [[8, 64], [8, 4], [0, 0, 4]]}
-
+#config = #iree_codegen.lowering.config<tile_sizes = [[8, 64], [8, 4], [0, 0, 4]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8]>
 hal.executable private @matmul_static_shape_f16  {
   hal.interface private @io  {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -12,7 +12,7 @@ hal.executable private @matmul_static_shape_f16  {
     hal.executable.entry_point @matmul_static_shape_f16 attributes {
       interface = @io, ordinal = 0 : index,
       workgroup_size = [16: index, 1: index, 1: index],
-      translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8]}
+      translation.info = #translation
     }
     builtin.module {
       func @matmul_static_shape_f16() {
@@ -66,8 +66,8 @@ hal.executable private @matmul_static_shape_f16  {
 
 // -----
 
-#config = {tileSizes = [[8, 64], [8, 4], [0, 0, 4]]}
-
+#config = #iree_codegen.lowering.config<tile_sizes = [[8, 64], [8, 4], [0, 0, 4]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8]>
 hal.executable private @matmul_static_shape_f32  {
   hal.interface private @io  {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -78,7 +78,7 @@ hal.executable private @matmul_static_shape_f32  {
     hal.executable.entry_point @matmul_static_shape_f32 attributes {
       interface = @io, ordinal = 0 : index,
       workgroup_size = [16: index, 1: index, 1: index],
-      translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8]}
+      translation.info = #translation
     }
     builtin.module {
       func @matmul_static_shape_f32() {
diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
index e63bd03118d0..42e13de893df 100644
--- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir
@@ -1,5 +1,7 @@
 // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-tile-and-vectorize-to-cooperative-ops))))' %s | IreeFileCheck %s
 
+#config = #iree_codegen.lowering.config<tile_sizes = [[16, 16, 16], [16, 16, 16]], native_vector_size = []>
+#translation = #iree_codegen.translation.info<"SPIRVVectorizeToCooperativeOps", workload_per_wg = [16, 16]>
 hal.executable public @matmul_256x1024x128_div_sub {
   hal.interface public @io {
     hal.interface.binding public @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -28,7 +30,7 @@ hal.executable public @matmul_256x1024x128_div_sub {
            subgroup_size = 32 : i32}>}> {
     hal.executable.entry_point public @matmul_256x1024x128_div_sub attributes {
       interface = @io, ordinal = 0 : index,
-      translation.info = {passPipeline = "SPIRVVectorizeToCooperativeOps", workloadPerWorkgroup = [16, 16]},
+      translation.info = #translation,
       workgroup_size = [32 : index, 1 : index, 1 : index]
     } {
     ^bb0(%arg0: index, %arg1: index, %arg2: index):  // no predecessors
@@ -63,14 +65,14 @@ hal.executable public @matmul_256x1024x128_div_sub {
             %11 = memref.subview %2[%arg0, 0] [16, 128] [1, 1] : memref<256x128xf16> to memref<16x128xf16, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>
             %12 = memref.subview %3[0, %arg1] [128, 16] [1, 1] : memref<128x1024xf16> to memref<128x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>
             %13 = memref.subview %4[%arg0, %arg1] [16, 16] [1, 1] : memref<256x1024xf16> to memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>
-            linalg.fill(%cst, %13) {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[16, 16, 16], [16, 16, 16]]}} : f16, memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>
-            linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[16, 16, 16], [16, 16, 16]]}}
+            linalg.fill(%cst, %13) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f16, memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>
+            linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config}
               ins(%11, %12 : memref<16x128xf16, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>, memref<128x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>)
               outs(%13 : memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>)
             linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]}
               ins(%13, %9, %10 : memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>, memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>, memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>)
               outs(%13 : memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>)
-              attrs =  {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[16, 16, 16], [16, 16, 16]]}} {
+              attrs =  {__internal_linalg_transform__ = "workgroup", lowering.config = #config} {
             ^bb0(%arg2: f16, %arg3: f16, %arg4: f16, %arg5: f16):  // no predecessors
               %14 = arith.divf %arg2, %arg3 : f16
               %15 = arith.subf %14, %arg4 : f16
diff --git a/iree/compiler/Codegen/SPIRV/test/vectorize_elementwise_ops.mlir b/iree/compiler/Codegen/SPIRV/test/vectorize_elementwise_ops.mlir
index 667dab3a02c3..3785956a2c20 100644
--- a/iree/compiler/Codegen/SPIRV/test/vectorize_elementwise_ops.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/vectorize_elementwise_ops.mlir
@@ -5,6 +5,7 @@
 //       CHECK:   vector.transfer_read %{{.+}}[%c0], {{.+}} memref<4xf32, #{{.+}}>, vector<4xf32>
 //       CHECK:   addf %{{.*}}, %{{.*}} : vector<4xf32>
 //       CHECK:   vector.transfer_write {{.*}} : vector<4xf32>, memref<4xf32
+#config = #iree_codegen.lowering.config<tile_sizes = [[128], [4]], native_vector_size = []>
 hal.executable private @elementwise_static_shape  {
   hal.interface @io {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -24,7 +25,7 @@ hal.executable private @elementwise_static_shape  {
         %ret0 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<128xf32>
         linalg.generic {
           __internal_linalg_transform__ = "workgroup",
-          lowering.config = {tileSizes = [[128], [4]]},
+          lowering.config = #config,
           indexing_maps = [affine_map<(i) -> (i)>,
                            affine_map<(i) -> (i)>,
                            affine_map<(i) -> (i)>],
@@ -54,6 +55,7 @@ hal.executable private @elementwise_static_shape  {
 //   CHECK-NOT:   vector.transfer_read
 //       CHECK:   scf.for
 //       CHECK:     scf.for
+#config = #iree_codegen.lowering.config<tile_sizes = [[1, 32], [1, 1]], native_vector_size = []>
 hal.executable private @elementwise_transpose  {
   hal.interface @io {
     hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read"
@@ -73,7 +75,7 @@ hal.executable private @elementwise_transpose  {
         %ret0 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<128x8xf32>
         linalg.generic {
           __internal_linalg_transform__ = "workgroup",
-          lowering.config = {tileSizes = [[1, 32], [1, 1]]},
+          lowering.config = #config,
           indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                            affine_map<(d0, d1) -> (d0)>,
                            affine_map<(d0, d1) -> (d0, d1)>],
diff --git a/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir
index bd16b5d9cacb..618ec1f128e0 100644
--- a/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir
+++ b/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir
@@ -1,5 +1,6 @@
 // RUN: iree-opt -split-input-file -iree-spirv-vectorize %s | IreeFileCheck %s
 
+#config = #iree_codegen.lowering.config<tile_sizes = [[2, 128], [], [1, 4], [0, 0, 4]], native_vector_size = []>
 func @matmul_2x128x4() {
   %c0 = arith.constant 0 : index
   %c128 = arith.constant 128 : index
@@ -25,10 +26,10 @@ func @matmul_2x128x4() {
       %11 = "gpu.thread_id"() {dimension = "y"} : () -> index
       %12 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%10]
       %13 = memref.subview %9[%11, %12] [1, 4] [1, 1] : memref<2x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> to memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>
-      linalg.fill(%cst, %13) {__internal_linalg_transform__ = "vectorize", lowering.config = {tileSizes = [[2, 128], [], [1, 4], [0, 0, 4]]}} : f32, memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>
+      linalg.fill(%cst, %13) {__internal_linalg_transform__ = "vectorize", lowering.config = #config} : f32, memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>
       %17 = memref.subview %7[%11, 0] [1, 4] [1, 1] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 4 + s0 + d1)>> to memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 4 + s0 + d1)>>
       %18 = memref.subview %8[0, %12] [4, 4] [1, 1] : memref<4x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> to memref<4x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>
-      linalg.matmul {__internal_linalg_transform__ = "vectorize", lowering.config = {tileSizes = [[2, 128], [], [1, 4], [0, 0, 4]]}}
+      linalg.matmul {__internal_linalg_transform__ = "vectorize", lowering.config = #config}
         ins(%17, %18 : memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 4 + s0 + d1)>>, memref<4x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>)
         outs(%13 : memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>)
     }
diff --git a/iree/compiler/Codegen/Utils/Utils.cpp b/iree/compiler/Codegen/Utils/Utils.cpp
index 4f55f594f7f6..58a1e0494c35 100644
--- a/iree/compiler/Codegen/Utils/Utils.cpp
+++ b/iree/compiler/Codegen/Utils/Utils.cpp
@@ -19,6 +19,10 @@
 namespace mlir {
 namespace iree_compiler {
 
+//===----------------------------------------------------------------------===//
+// Utility functions to get entry point(s)
+//===----------------------------------------------------------------------===//
+
 bool isEntryPoint(FuncOp func) { return func.isPublic(); }
 
 IREE::HAL::ExecutableEntryPointOp getEntryPoint(FuncOp funcOp) {
@@ -41,21 +45,9 @@ llvm::StringMap<IREE::HAL::ExecutableEntryPointOp> getAllEntryPoints(
   return entryPointOps;
 }
 
-IREE::HAL::TranslationInfo getTranslationInfo(FuncOp funcOp) {
-  auto entryPointOp = getEntryPoint(funcOp);
-  if (!entryPointOp) return nullptr;
-  return getTranslationInfo(entryPointOp);
-}
-
-void setTranslationInfo(FuncOp entryPointFn,
-                        IREE::HAL::DispatchLoweringPassPipeline passPipeline,
-                        ArrayRef<int64_t> workgroupSize,
-                        ArrayRef<int64_t> workloadPerWorkgroup) {
-  auto entryPointOp = getEntryPoint(entryPointFn);
-  auto translationInfo = buildTranslationInfo(
-      passPipeline, workloadPerWorkgroup, entryPointFn.getContext());
-  setTranslationInfo(entryPointOp, translationInfo, workgroupSize);
-}
+//===----------------------------------------------------------------------===//
+// Utility functions used in setting default configurations.
+//===----------------------------------------------------------------------===//
 
 SmallVector<unsigned> getPartitionedLoops(Operation *op) {
   if (auto mmt4dOp = dyn_cast<linalg::Mmt4DOp>(op)) {
@@ -80,45 +72,6 @@ SmallVector<unsigned> getPartitionedLoops(Operation *op) {
   return {};
 }
 
-LogicalResult setOpConfigAndEntryPointFnTranslation(
-    FuncOp entryPointFn, Operation *op, IREE::HAL::LoweringConfig config,
-    IREE::HAL::DispatchLoweringPassPipeline passPipeline,
-    ArrayRef<int64_t> workgroupSize) {
-  auto partitionedLoops = getPartitionedLoops(op);
-  SmallVector<int64_t, 3> workloadPerWorkgroup;
-  auto tileSizes = getTileSizes(config, 0);
-  if (!tileSizes.empty() && !partitionedLoops.empty()) {
-    for (unsigned depth : partitionedLoops) {
-      if (depth >= tileSizes.size()) {
-        return op->emitOpError(
-                   "illegal configuration for lowering op, expect first level "
-                   "tile size to contain at least ")
-               << partitionedLoops.back() << " elements";
-      }
-      if (tileSizes[depth] == 0) {
-        return op->emitOpError("illegal to set tilesize of loop ")
-               << depth
-               << " to zero since it is set to be partitioned at the flow "
-                  "level";
-      }
-      workloadPerWorkgroup.push_back(tileSizes[depth]);
-    }
-    if (!workloadPerWorkgroup.empty()) {
-      workloadPerWorkgroup =
-          llvm::to_vector<3>(llvm::reverse(workloadPerWorkgroup));
-    }
-  }
-  auto entryPointOp = getEntryPoint(entryPointFn);
-  if (!entryPointOp) {
-    return entryPointFn.emitOpError(
-        "unable to find entry point op for entry point function");
-  }
-  IREE::HAL::TranslationInfo translationInfo = buildTranslationInfo(
-      passPipeline, workloadPerWorkgroup, entryPointOp->getContext());
-  setTranslationInfo(entryPointOp, translationInfo, workgroupSize);
-  return success();
-}
-
 /// Walk up the defs of the view, to get the untiled value. Either walks up
 /// `ViewOpInterface` op-chains or the `subtensor` op-chains.
 static Value getViewSource(Value view) {
diff --git a/iree/compiler/Codegen/Utils/Utils.h b/iree/compiler/Codegen/Utils/Utils.h
index 120e2c4e96f3..f0c563a1c00d 100644
--- a/iree/compiler/Codegen/Utils/Utils.h
+++ b/iree/compiler/Codegen/Utils/Utils.h
@@ -8,7 +8,6 @@
 #define IREE_COMPILER_CODEGEN_UTILS_UTILS_H_
 
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
 #include "llvm/ADT/StringMap.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/IR/BuiltinOps.h"
@@ -18,6 +17,10 @@ namespace iree_compiler {
 
 static constexpr unsigned kNumMaxParallelDims = 3;
 
+//===----------------------------------------------------------------------===//
+// Utility functions to get entry point(s)
+//===----------------------------------------------------------------------===//
+
 /// Returns true if the given `func` is a kernel dispatch entry point.
 bool isEntryPoint(FuncOp func);
 
@@ -28,18 +31,9 @@ llvm::StringMap<IREE::HAL::ExecutableEntryPointOp> getAllEntryPoints(
 /// Returns the entry point op for the `funcOp`. Returns `nullptr` on failure.
 IREE::HAL::ExecutableEntryPointOp getEntryPoint(FuncOp funcOp);
 
-/// Returns the translation info for the `funcOp` (by looking at the entry
-/// point). Returns `nullptr` on failure.
-IREE::HAL::TranslationInfo getTranslationInfo(FuncOp funcOp);
-
-/// Sets the translation info on the `hal.executable.entry_point` op
-/// corresponding to the `entryPointFn`. Returns failure if a translation info
-/// is already set on the entry point op and is incompatible with what is being
-/// set.
-void setTranslationInfo(FuncOp entryPointFn,
-                        IREE::HAL::DispatchLoweringPassPipeline passPipeline,
-                        ArrayRef<int64_t> workgroupSize,
-                        ArrayRef<int64_t> workloadPerWorkgroup);
+//===----------------------------------------------------------------------===//
+// Utility functions used in setting default configurations.
+//===----------------------------------------------------------------------===//
 
 /// Returns the loops that are partitioned during dispatch region formations, in
 /// order, i.e. starting from the outer-most to innermost.
@@ -47,23 +41,6 @@ void setTranslationInfo(FuncOp entryPointFn,
 /// formation to tile and distribute the ops.
 SmallVector<unsigned> getPartitionedLoops(Operation *op);
 
-/// Sets translation for the entry-point function based on op configuration.
-LogicalResult setOpConfigAndEntryPointFnTranslation(
-    FuncOp entryPointFn, Operation *op, IREE::HAL::LoweringConfig config,
-    IREE::HAL::DispatchLoweringPassPipeline passPipeline,
-    ArrayRef<int64_t> workgroupSize = {});
-inline LogicalResult setOpConfigAndEntryPointFnTranslation(
-    FuncOp entryPointFn, Operation *op, TileSizesListTypeRef tileSizes,
-    ArrayRef<int64_t> nativeVectorSize,
-    IREE::HAL::DispatchLoweringPassPipeline passPipeline,
-    ArrayRef<int64_t> workgroupSize = {}) {
-  IREE::HAL::LoweringConfig config =
-      buildConfigAttr(tileSizes, nativeVectorSize, op->getContext());
-  setLoweringConfig(op, config);
-  return setOpConfigAndEntryPointFnTranslation(entryPointFn, op, config,
-                                               passPipeline, workgroupSize);
-}
-
 /// Returns the untiled type of a tiled view for both tensor and memref
 /// types. Either walks the `ViewOpInterface` chain (for memrefs) or the
 /// `subtensor` op chain (for tensors).
diff --git a/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp b/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
index 1ceb06673c9a..c009f37ab99f 100644
--- a/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
+++ b/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp
@@ -697,7 +697,8 @@ struct ResolveShapedDim : public OpRewritePattern<tensor::DimOp> {
       return success();
     }
 
-    auto dynamicDims = IREE::Util::findDynamicDims(op.source(), op);
+    auto dynamicDims = IREE::Util::findDynamicDims(
+        op.source(), op->getBlock(), Block::iterator(op.getOperation()));
     if (!dynamicDims.hasValue()) {
       return rewriter.notifyMatchFailure(op, "no dynamic dims found/usable");
     }
@@ -768,6 +769,15 @@ void TensorSplatOp::getCanonicalizationPatterns(
   results.insert<FoldSplatReshapeIntoSplat>(context);
 }
 
+OpFoldResult TensorSplatOp::fold(ArrayRef<Attribute> operands) {
+  if (operands.size() == 1 && operands.front()) {
+    // Splat value is constant and we can fold the operation.
+    return SplatElementsAttr::get(result().getType().cast<ShapedType>(),
+                                  operands[0]);
+  }
+  return {};
+}
+
 OpFoldResult TensorCloneOp::fold(ArrayRef<Attribute> operands) {
   if (operands[0]) {
     // Constants always fold.
diff --git a/iree/compiler/Dialect/Flow/IR/FlowOps.td b/iree/compiler/Dialect/Flow/IR/FlowOps.td
index 102d86190eab..be3caede69f2 100644
--- a/iree/compiler/Dialect/Flow/IR/FlowOps.td
+++ b/iree/compiler/Dialect/Flow/IR/FlowOps.td
@@ -839,6 +839,7 @@ def FLOW_TensorSplatOp : FLOW_PureOp<"tensor.splat", [
   }];
 
   let hasCanonicalizer = 1;
+  let hasFolder = 1;
 }
 
 def FLOW_TensorCloneOp : FLOW_PureOp<"tensor.clone", [
diff --git a/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir b/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir
index bd843fb1a2b7..792b88dfe57c 100644
--- a/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir
+++ b/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir
@@ -154,6 +154,28 @@ func @storeConstScalar() -> tensor<i32> {
 
 // -----
 
+// CHECK-LABEL: @splatConst
+func @splatConst() -> tensor<4xi32> {
+  %0 = arith.constant 4 : i32
+  // CHECK-NEXT: %[[C:.+]] = arith.constant dense<4> : tensor<4xi32>
+  %1 = flow.tensor.splat %0 : tensor<4xi32>
+  // CHECK-NEXT: return %[[C]]
+  return %1 : tensor<4xi32>
+}
+
+// -----
+
+// CHECK-LABEL: @splatConstScalar
+func @splatConstScalar() -> tensor<i32> {
+  %0 = arith.constant 4 : i32
+  // CHECK-NEXT: %[[C:.+]] = arith.constant dense<4> : tensor<i32>
+  %1 = flow.tensor.splat %0 : tensor<i32>
+  // CHECK-NEXT: return %[[C]]
+  return %1 : tensor<i32>
+}
+
+// -----
+
 // CHECK-LABEL: @splatDynamicShape
 //  CHECK-SAME: (%[[DIM0:.+]]: index, %[[DIM1:.+]]: index)
 func @splatDynamicShape(%dim0: index, %dim1: index) -> tensor<?x?xi32> {
diff --git a/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgMatmulToMmt4D.cpp b/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgMatmulToMmt4D.cpp
index 1a103f853901..7e6cecf01c4d 100644
--- a/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgMatmulToMmt4D.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgMatmulToMmt4D.cpp
@@ -112,11 +112,6 @@ class LinalgMatmulOpToLinalgMmt4DOpPattern
       return failure();
     }
 
-    // This is for float only matmul for now. Integer data type might require
-    // r.h.s layout change.
-    if (!lhsType.getElementType().isF32() || !rhsType.getElementType().isF32())
-      return failure();
-
     int m = lhsType.getShape()[0];
     int k = rhsType.getShape()[0];
     int n = rhsType.getShape()[1];
diff --git a/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp b/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp
index 992878b07445..869143aca268 100644
--- a/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp
@@ -81,6 +81,7 @@ struct LinalgFillToFlowTensorSplat final
       // Don't convert linalg.fill ops that were fused together with other ops.
       return failure();
     }
+
     SmallVector<Value, 4> dynamicDims =
         getDynamicDimValues(rewriter, fillOp.getLoc(), fillOp.output());
     rewriter.replaceOpWithNewOp<TensorSplatOp>(
@@ -89,26 +90,6 @@ struct LinalgFillToFlowTensorSplat final
   }
 };
 
-struct ConvertSplatConstantOp : public OpRewritePattern<mlir::ConstantOp> {
-  using OpRewritePattern::OpRewritePattern;
-  LogicalResult matchAndRewrite(mlir::ConstantOp op,
-                                PatternRewriter &rewriter) const override {
-    if (op->getParentOfType<IREE::Flow::DispatchWorkgroupsOp>()) {
-      return rewriter.notifyMatchFailure(op, "ignoring dispatch ops");
-    }
-    auto splatAttr = op.getValue().dyn_cast<SplatElementsAttr>();
-    if (!splatAttr) {
-      return rewriter.notifyMatchFailure(op, "only looking for splats");
-    }
-    auto tensorType = op.getType().cast<TensorType>();
-    auto elementValue = rewriter.createOrFold<mlir::ConstantOp>(
-        op.getLoc(), tensorType.getElementType(), splatAttr.getSplatValue());
-    rewriter.replaceOpWithNewOp<IREE::Flow::TensorSplatOp>(
-        op, tensorType, elementValue, ValueRange{});
-    return success();
-  }
-};
-
 /// Converts linalg operations that can map to flow.tensor.* operations.
 struct ConvertLinalgTensorOpsPass
     : public ConvertLinalgTensorOpsBase<ConvertLinalgTensorOpsPass> {
@@ -135,8 +116,7 @@ struct ConvertLinalgTensorOpsPass
           LinalgTensorReshapeToFlowTensorReshape<linalg::TensorExpandShapeOp>>(
           context);
     } else {
-      patterns.insert<LinalgFillToFlowTensorSplat, ConvertSplatConstantOp>(
-          context);
+      patterns.insert<LinalgFillToFlowTensorSplat>(context);
     }
     IREE::Flow::TensorReshapeOp::getCanonicalizationPatterns(patterns, context);
     if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns)))) {
diff --git a/iree/compiler/Dialect/Flow/Transforms/DispatchLinalgOnTensors.cpp b/iree/compiler/Dialect/Flow/Transforms/DispatchLinalgOnTensors.cpp
index 6ba9c4bac62a..7861e2de71b3 100644
--- a/iree/compiler/Dialect/Flow/Transforms/DispatchLinalgOnTensors.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/DispatchLinalgOnTensors.cpp
@@ -476,7 +476,8 @@ static BlockArgument getTiedOperandBlockArgument(BlockArgument resultArg) {
                 // block argument. Single use can potentially be relaxed.
                 auto loadArg =
                     loadOp.source().template dyn_cast<BlockArgument>();
-                if (!loadArg || !loadArg.hasOneUse()) {
+                if (!loadArg || !loadArg.hasOneUse() ||
+                    loadArg.use_begin()->get() != storeOp.target()) {
                   return nullptr;
                 }
                 return loadArg;
diff --git a/iree/compiler/Dialect/Flow/Transforms/OutlineLargeConstants.cpp b/iree/compiler/Dialect/Flow/Transforms/OutlineLargeConstants.cpp
index 24de6786fcd1..27c3e80b565c 100644
--- a/iree/compiler/Dialect/Flow/Transforms/OutlineLargeConstants.cpp
+++ b/iree/compiler/Dialect/Flow/Transforms/OutlineLargeConstants.cpp
@@ -29,15 +29,11 @@ namespace Flow {
 // more efficient and fewer bindings.
 static bool isConstantLarge(arith::ConstantOp constantOp,
                             size_t minLargeConstantSize) {
-  if (constantOp.value().isa<SplatElementsAttr>()) {
-    // Never outline splats; we want those transient within streams.
-    return false;
-  }
   auto type = constantOp.getType();
   if (auto shapedType = type.dyn_cast<RankedTensorType>()) {
     size_t unpackedByteLength =
         (shapedType.getNumElements() * shapedType.getElementTypeBitWidth()) / 8;
-    if (unpackedByteLength > minLargeConstantSize) {
+    if (unpackedByteLength >= minLargeConstantSize) {
       return true;
     }
   }
@@ -67,6 +63,8 @@ class OutlineLargeConstantsPass
     : public OutlineLargeConstantsBase<OutlineLargeConstantsPass> {
  public:
   OutlineLargeConstantsPass() = default;
+  OutlineLargeConstantsPass(size_t minLargeConstantSize)
+      : minLargeConstantSize(minLargeConstantSize){};
 
   void getDependentDialects(DialectRegistry &registry) const override {
     registry.insert<IREE::Flow::FlowDialect, IREE::Util::UtilDialect>();
@@ -86,7 +84,7 @@ class OutlineLargeConstantsPass
     std::vector<std::pair<arith::ConstantOp, IREE::Util::GlobalOp>>
         replacements;
     for (auto &largeConstantOp :
-         findLargeConstantsInModule(moduleOp, minStorageSize.getValue())) {
+         findLargeConstantsInModule(moduleOp, minLargeConstantSize)) {
       std::string name;
       do {
         name = baseName + std::to_string(uniqueId++);
@@ -116,11 +114,14 @@ class OutlineLargeConstantsPass
       constantOp.erase();
     }
   }
+
+ private:
+  size_t minLargeConstantSize;
 };
 
-std::unique_ptr<OperationPass<mlir::ModuleOp>>
-createOutlineLargeConstantsPass() {
-  return std::make_unique<OutlineLargeConstantsPass>();
+std::unique_ptr<OperationPass<mlir::ModuleOp>> createOutlineLargeConstantsPass(
+    size_t minLargeConstantSize) {
+  return std::make_unique<OutlineLargeConstantsPass>(minLargeConstantSize);
 }
 
 }  // namespace Flow
diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.h b/iree/compiler/Dialect/Flow/Transforms/Passes.h
index dafb8d635262..123cfb05956e 100644
--- a/iree/compiler/Dialect/Flow/Transforms/Passes.h
+++ b/iree/compiler/Dialect/Flow/Transforms/Passes.h
@@ -131,8 +131,12 @@ createPadLinalgOpsToIntegerMultiplePass(int paddingSize = 4);
 //===----------------------------------------------------------------------===//
 
 // Outlines large tensor constants into util.globals at the module level.
-std::unique_ptr<OperationPass<mlir::ModuleOp>>
-createOutlineLargeConstantsPass();
+//
+// TODO(#5493): implement the support for inlining constants into the command
+// buffer and raise this value to one that is measured to be good.
+static constexpr size_t kMinLargeConstantSize = 1;
+std::unique_ptr<OperationPass<mlir::ModuleOp>> createOutlineLargeConstantsPass(
+    size_t minLargeConstantSize = kMinLargeConstantSize);
 
 // Deduplicates equivalent executables.
 std::unique_ptr<OperationPass<mlir::ModuleOp>>
diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.td b/iree/compiler/Dialect/Flow/Transforms/Passes.td
index 4fee8c0ab20d..64941b2d5093 100644
--- a/iree/compiler/Dialect/Flow/Transforms/Passes.td
+++ b/iree/compiler/Dialect/Flow/Transforms/Passes.td
@@ -102,12 +102,8 @@ def OutlineDispatchRegions :
 def OutlineLargeConstants :
     Pass<"iree-flow-outline-large-constants", "mlir::ModuleOp"> {
   let summary = "Outlines large tensor constants into util.globals at the module level.";
-  let constructor = "mlir::iree_compiler::IREE::Flow::createOutlineLargeConstantsPass()";
-  let options = [
-    Option<"minStorageSize", "min-storage-size",
-           "int64_t", /*default=*/"64",
-           "Outlines constants with storage sizes > than this byte size.">
-  ];
+  // TODO(#5493): add a flag for this.
+  let constructor = "mlir::iree_compiler::IREE::Flow::createOutlineLargeConstantsPass(25)";
 }
 
 def PadLinalgOps :
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir b/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir
index c42e9b3be3be..63fbc2f9d5bf 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir
@@ -1114,3 +1114,17 @@ func @dynamic_slice(%arg0 : i32, %arg1 : i32, %arg2 : tensor<?xi32>,
 //  CHECK-SAME:       tensor<?x?xi32>{%[[D1]], %[[D2]]}, tensor<?xi32>{%[[D0]]}
 //  CHECK-NEXT:     %[[ARG4:.+]]: !flow.dispatch.tensor<readwrite:?x?xi32>
 //  CHECK-SAME:     %[[ARG5:.+]]: !flow.dispatch.tensor<readonly:?xi32>
+
+// -----
+
+func @extract_slice(%arg0 : tensor<?x?xf32>, %arg1 : index, %arg2 : index,
+    %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) -> tensor<?x?xf32> {
+  %0 = tensor.extract_slice %arg0[%arg1, %arg2] [%arg3, %arg4] [%arg5, %arg6] :
+      tensor<?x?xf32> to tensor<?x?xf32>
+  return %0 : tensor<?x?xf32>
+}
+//      CHECK: flow.dispatch.workgroups
+// CHECK-NEXT:   %[[INPUT:[a-zA-Z0-9]+]]: !flow.dispatch.tensor<readonly:?x?xf32>
+// CHECK-SAME:   %[[OUTPUT:[a-zA-Z0-9]+]]: !flow.dispatch.tensor<writeonly:?x?xf32>
+//      CHECK:   %[[SLICE:.+]] = flow.dispatch.tensor.load %[[INPUT]]
+//      CHECK:   flow.dispatch.tensor.store %[[SLICE]], %[[OUTPUT]]
diff --git a/iree/compiler/Dialect/Flow/Transforms/test/outline_large_constants.mlir b/iree/compiler/Dialect/Flow/Transforms/test/outline_large_constants.mlir
index 739411928a2c..cdc299f036fd 100644
--- a/iree/compiler/Dialect/Flow/Transforms/test/outline_large_constants.mlir
+++ b/iree/compiler/Dialect/Flow/Transforms/test/outline_large_constants.mlir
@@ -1,12 +1,10 @@
-// RUN: iree-opt -split-input-file -iree-flow-outline-large-constants='min-storage-size=9' %s | IreeFileCheck %s
+// RUN: iree-opt -split-input-file -iree-flow-outline-large-constants %s | IreeFileCheck %s
 
-// CHECK: util.global private @[[LARGE_VARIABLE:.+]] {noinline} = dense<{{.+}}> : tensor<8xf32>
-func @fn1() -> (tensor<2xf32>, tensor<512x128xf32>, tensor<8xf32>) {
+// CHECK: util.global private @[[LARGE_VARIABLE:.+]] {noinline} = dense<1.200000e+00> : tensor<512x128xf32>
+func @fn1() -> (tensor<2xf32>, tensor<512x128xf32>) {
   // CHECK-DAG: %[[SMALL_VALUE:.+]] = arith.constant dense<{{.+}}> : tensor<2xf32>
   %cst_0 = arith.constant dense<[0.0287729427, 0.0297581609]> : tensor<2xf32>
-  // CHECK-DAG: %[[SPLATG_VALUE:.+]] = arith.constant dense<{{.+}}> : tensor<512x128xf32>
+  // CHECK-DAG: %[[LARGE_VALUE:.+]] = util.global.load @[[LARGE_VARIABLE]] : tensor<512x128xf32>
   %cst_1 = arith.constant dense<1.2> : tensor<512x128xf32>
-  // CHECK-DAG: %[[LARGE_VALUE:.+]] = util.global.load @[[LARGE_VARIABLE]] : tensor<8xf32>
-  %cst_2 = arith.constant dense<[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]> : tensor<8xf32>
-  return %cst_0, %cst_1, %cst_2 : tensor<2xf32>, tensor<512x128xf32>, tensor<8xf32>
+  return %cst_0, %cst_1 : tensor<2xf32>, tensor<512x128xf32>
 }
diff --git a/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertStreamOps.cpp b/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertStreamOps.cpp
index 8fe6df3203de..19a03980141f 100644
--- a/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertStreamOps.cpp
+++ b/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertStreamOps.cpp
@@ -949,9 +949,6 @@ static Value splatFillPattern(Location loc, Value baseValue,
   baseValue = builder.createOrFold<arith::BitcastOp>(
       loc, builder.getIntegerType(baseBitWidth), baseValue);
 
-  // Treat i1 as i8.
-  if (baseBitWidth == 1) baseBitWidth = 8;
-
   switch (baseBitWidth) {
     case 8: {
       // (v << 24) | (v << 16) | (v << 8) | v
diff --git a/iree/compiler/Dialect/HAL/IR/BUILD b/iree/compiler/Dialect/HAL/IR/BUILD
index 9586bfc10959..c0cdf752ae22 100644
--- a/iree/compiler/Dialect/HAL/IR/BUILD
+++ b/iree/compiler/Dialect/HAL/IR/BUILD
@@ -27,7 +27,6 @@ td_library(
             "HALDialect.td",
             "HALInterfaces.td",
             "HALOps.td",
-            "LoweringConfig.td",
         ],
         include = ["*.td"],
     ),
@@ -46,14 +45,12 @@ cc_library(
         "HALOpFolders.cpp",
         "HALOps.cpp",
         "HALTypes.cpp",
-        "LoweringConfig.cpp",
     ],
     hdrs = [
         "HALDialect.h",
         "HALOps.h",
         "HALTraits.h",
         "HALTypes.h",
-        "LoweringConfig.h",
     ],
     textual_hdrs = [
         "HALAttrs.cpp.inc",
@@ -70,18 +67,12 @@ cc_library(
         "HALStructs.h.inc",
         "HALTypeInterfaces.cpp.inc",
         "HALTypeInterfaces.h.inc",
-        "LoweringConfig.h.inc",
-        "LoweringConfig.cpp.inc",
-        "LoweringConfigEnums.h.inc",
-        "LoweringConfigEnums.cpp.inc",
     ],
     deps = [
         ":HALInterfacesGen",
         ":HALOpsGen",
         ":HALStructsGen",
         ":HALTypesGen",
-        ":LoweringConfigEnumGen",
-        ":LoweringConfigGen",
         "//iree/compiler/Dialect/Shape/IR",
         "//iree/compiler/Dialect/Util/IR",
         "@llvm-project//llvm:Support",
@@ -221,37 +212,3 @@ iree_tablegen_doc(
     td_file = "HALOps.td",
     deps = [":td_files"],
 )
-
-gentbl_cc_library(
-    name = "LoweringConfigGen",
-    tbl_outs = [
-        (
-            ["-gen-struct-attr-decls"],
-            "LoweringConfig.h.inc",
-        ),
-        (
-            ["-gen-struct-attr-defs"],
-            "LoweringConfig.cpp.inc",
-        ),
-    ],
-    tblgen = "@llvm-project//mlir:mlir-tblgen",
-    td_file = "LoweringConfig.td",
-    deps = [":td_files"],
-)
-
-gentbl_cc_library(
-    name = "LoweringConfigEnumGen",
-    tbl_outs = [
-        (
-            ["-gen-enum-decls"],
-            "LoweringConfigEnums.h.inc",
-        ),
-        (
-            ["-gen-enum-defs"],
-            "LoweringConfigEnums.cpp.inc",
-        ),
-    ],
-    tblgen = "@llvm-project//mlir:mlir-tblgen",
-    td_file = "LoweringConfig.td",
-    deps = [":td_files"],
-)
diff --git a/iree/compiler/Dialect/HAL/IR/CMakeLists.txt b/iree/compiler/Dialect/HAL/IR/CMakeLists.txt
index 341d7ea45396..8d2316a5c360 100644
--- a/iree/compiler/Dialect/HAL/IR/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/IR/CMakeLists.txt
@@ -18,7 +18,6 @@ iree_cc_library(
     "HALOps.h"
     "HALTraits.h"
     "HALTypes.h"
-    "LoweringConfig.h"
   TEXTUAL_HDRS
     "HALAttrInterfaces.cpp.inc"
     "HALAttrInterfaces.h.inc"
@@ -34,22 +33,15 @@ iree_cc_library(
     "HALStructs.h.inc"
     "HALTypeInterfaces.cpp.inc"
     "HALTypeInterfaces.h.inc"
-    "LoweringConfig.cpp.inc"
-    "LoweringConfig.h.inc"
-    "LoweringConfigEnums.cpp.inc"
-    "LoweringConfigEnums.h.inc"
   SRCS
     "HALOpFolders.cpp"
     "HALOps.cpp"
     "HALTypes.cpp"
-    "LoweringConfig.cpp"
   DEPS
     ::HALInterfacesGen
     ::HALOpsGen
     ::HALStructsGen
     ::HALTypesGen
-    ::LoweringConfigEnumGen
-    ::LoweringConfigGen
     LLVMSupport
     MLIRIR
     MLIRMemRef
@@ -145,24 +137,4 @@ iree_tablegen_doc(
     -gen-dialect-doc HALDialect.md
 )
 
-iree_tablegen_library(
-  NAME
-    LoweringConfigGen
-  TD_FILE
-    "LoweringConfig.td"
-  OUTS
-    -gen-struct-attr-decls LoweringConfig.h.inc
-    -gen-struct-attr-defs LoweringConfig.cpp.inc
-)
-
-iree_tablegen_library(
-  NAME
-    LoweringConfigEnumGen
-  TD_FILE
-    "LoweringConfig.td"
-  OUTS
-    -gen-enum-decls LoweringConfigEnums.h.inc
-    -gen-enum-defs LoweringConfigEnums.cpp.inc
-)
-
 ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/compiler/Dialect/HAL/IR/HALDialect.cpp b/iree/compiler/Dialect/HAL/IR/HALDialect.cpp
index 7b38e4a34883..a3e30eba8d37 100644
--- a/iree/compiler/Dialect/HAL/IR/HALDialect.cpp
+++ b/iree/compiler/Dialect/HAL/IR/HALDialect.cpp
@@ -10,7 +10,6 @@
 #include "iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertHALToVM.h"
 #include "iree/compiler/Dialect/HAL/IR/HALOps.h"
 #include "iree/compiler/Dialect/HAL/IR/HALTypes.h"
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
 #include "iree/compiler/Dialect/HAL/hal.imports.h"
 #include "iree/compiler/Dialect/Util/IR/UtilDialect.h"
 #include "iree/compiler/Dialect/VM/Conversion/ConversionDialectInterface.h"
@@ -44,9 +43,6 @@ struct HALOpAsmInterface : public OpAsmDialectInterface {
     } else if (auto targetAttr = attr.dyn_cast<ExecutableTargetAttr>()) {
       os << "executable_target_" << targetAttr.getSymbolNameFragment();
       return AliasResult::OverridableAlias;
-    } else if (attr.isa<LoweringConfig>()) {
-      os << "config";
-      return AliasResult::OverridableAlias;
     }
     return AliasResult::NoAlias;
   }
diff --git a/iree/compiler/Dialect/HAL/IR/LoweringConfig.cpp b/iree/compiler/Dialect/HAL/IR/LoweringConfig.cpp
deleted file mode 100644
index 4bc7e4e32087..000000000000
--- a/iree/compiler/Dialect/HAL/IR/LoweringConfig.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h"
-
-#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-
-static const char kConfigAttrName[] = "lowering.config";
-static const char kTranslationInfoAttrName[] = "translation.info";
-
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.cpp.inc"
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfigEnums.cpp.inc"
-
-namespace mlir {
-namespace iree_compiler {
-
-//===----------------------------------------------------------------------===//
-// Helpers for getting/setting information needed to lower an executable. These
-// are information that are stored as attributes on the
-// `hal.executable.entry_point`
-//===----------------------------------------------------------------------===//
-
-IREE::HAL::TranslationInfo buildTranslationInfo(
-    IREE::HAL::DispatchLoweringPassPipeline passPipeline,
-    ArrayRef<int64_t> workloadPerWorkgroup, MLIRContext *context) {
-  OpBuilder builder(context);
-  auto pipelineAttr = StringAttr::get(context, stringifyEnum(passPipeline));
-  ArrayAttr workloadPerWorkgroupAttr = nullptr;
-  if (!workloadPerWorkgroup.empty()) {
-    workloadPerWorkgroupAttr = builder.getI64ArrayAttr(workloadPerWorkgroup);
-  }
-  return IREE::HAL::TranslationInfo::get(pipelineAttr, workloadPerWorkgroupAttr,
-                                         context);
-}
-
-IREE::HAL::TranslationInfo getTranslationInfo(
-    IREE::HAL::ExecutableEntryPointOp entryPointOp) {
-  return entryPointOp->getAttrOfType<IREE::HAL::TranslationInfo>(
-      kTranslationInfoAttrName);
-}
-
-SmallVector<int64_t> getWorkgroupSize(
-    IREE::HAL::ExecutableEntryPointOp entryPointOp) {
-  SmallVector<int64_t> workgroupSize;
-  if (Optional<ArrayAttr> workgroupSizeAttrList =
-          entryPointOp.workgroup_size()) {
-    workgroupSize.resize(workgroupSizeAttrList->size());
-    for (auto attr : llvm::enumerate(workgroupSizeAttrList.getValue())) {
-      workgroupSize[attr.index()] = attr.value().cast<IntegerAttr>().getInt();
-    }
-  }
-  return workgroupSize;
-}
-
-void setTranslationInfo(IREE::HAL::ExecutableEntryPointOp entryPointOp,
-                        IREE::HAL::TranslationInfo translationInfo,
-                        ArrayRef<int64_t> workgroupSize) {
-  entryPointOp->setAttr(kTranslationInfoAttrName, translationInfo);
-  // The workgroup size is set on the entry point op directly.
-  if (!workgroupSize.empty()) {
-    MLIRContext *context = entryPointOp->getContext();
-    auto indexType = IndexType::get(context);
-    auto attrs = llvm::to_vector<4>(
-        llvm::map_range(workgroupSize, [&](int64_t v) -> Attribute {
-          return IntegerAttr::get(indexType, v);
-        }));
-    entryPointOp.workgroup_sizeAttr(ArrayAttr::get(context, attrs));
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Helpers for getting/setting the `hal.lowering.*` attributes that drive the
-// linalg-based lowering.
-// ===----------------------------------------------------------------------===//
-
-IREE::HAL::LoweringConfig getLoweringConfig(Operation *op) {
-  return op->getAttrOfType<IREE::HAL::LoweringConfig>(kConfigAttrName);
-}
-
-void setLoweringConfig(Operation *op, IREE::HAL::LoweringConfig config) {
-  op->setAttr(kConfigAttrName, config);
-}
-
-void eraseLoweringConfig(Operation *op) { op->removeAttr(kConfigAttrName); }
-
-//===----------------------------------------------------------------------===//
-// Helpers for accessing values from the LoweringConfig attribute.
-//===----------------------------------------------------------------------===//
-
-IREE::HAL::LoweringConfig buildConfigAttr(TileSizesListTypeRef tileSizes,
-                                          ArrayRef<int64_t> nativeVectorSize,
-                                          MLIRContext *context) {
-  OpBuilder builder(context);
-  ArrayAttr tileSizesAttr = nullptr;
-  if (!tileSizes.empty()) {
-    auto attrList = llvm::to_vector<4>(
-        llvm::map_range(tileSizes, [&](ArrayRef<int64_t> sizes) -> Attribute {
-          return builder.getI64ArrayAttr(sizes);
-        }));
-    tileSizesAttr = builder.getArrayAttr(attrList);
-  }
-  ArrayAttr nativeVectorSizeAttr = nullptr;
-  if (!nativeVectorSize.empty()) {
-    nativeVectorSizeAttr = builder.getI64ArrayAttr(nativeVectorSize);
-  }
-  return IREE::HAL::LoweringConfig::get(tileSizesAttr, nativeVectorSizeAttr,
-                                        /*passPipeline = */ nullptr,
-                                        /*workgroupSize = */ nullptr, context);
-}
-
-TileSizesListType getTileSizes(IREE::HAL::LoweringConfig config) {
-  auto tileSizesAttr = config.tileSizes();
-  if (!tileSizesAttr) return {};
-  return llvm::to_vector<1>(llvm::map_range(
-      tileSizesAttr, [&](Attribute attr) -> SmallVector<int64_t, 4> {
-        return llvm::to_vector<4>(
-            llvm::map_range(attr.cast<ArrayAttr>(), [&](Attribute intAttr) {
-              return intAttr.cast<IntegerAttr>().getInt();
-            }));
-      }));
-}
-
-SmallVector<int64_t, 4> getTileSizes(IREE::HAL::LoweringConfig config,
-                                     unsigned level) {
-  ArrayAttr tileSizesAttr = config.tileSizes();
-  if (!tileSizesAttr || tileSizesAttr.size() <= level) return {};
-  return llvm::to_vector<4>(llvm::map_range(
-      tileSizesAttr.getValue()[level].cast<ArrayAttr>(),
-      [&](Attribute intAttr) { return intAttr.cast<IntegerAttr>().getInt(); }));
-}
-
-SmallVector<Value, 4> getTileSizes(OpBuilder &b, Operation *op,
-                                   unsigned level) {
-  return llvm::to_vector<4>(
-      llvm::map_range(getTileSizes(op, level), [&](int64_t t) -> Value {
-        return b.create<arith::ConstantIndexOp>(op->getLoc(), t);
-      }));
-}
-
-SmallVector<int64_t, 4> getNativeVectorSize(IREE::HAL::LoweringConfig config) {
-  ArrayAttr nativeVectorSizeAttr = config.nativeVectorSize();
-  if (!nativeVectorSizeAttr) return {};
-  return llvm::to_vector<4>(llvm::map_range(
-      nativeVectorSizeAttr,
-      [&](Attribute intAttr) { return intAttr.cast<IntegerAttr>().getInt(); }));
-}
-
-}  // namespace iree_compiler
-}  // namespace mlir
diff --git a/iree/compiler/Dialect/HAL/IR/LoweringConfig.h b/iree/compiler/Dialect/HAL/IR/LoweringConfig.h
deleted file mode 100644
index a48d60ae45fb..000000000000
--- a/iree/compiler/Dialect/HAL/IR/LoweringConfig.h
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-//===- LoweringConfig.h - Declares configuration for lowering Linalg ops --===//
-//
-// This file declares an attribute that drives how a dispatch region containing
-// a set of operations are lowered. The attribute itself is attached to Linalg
-// operations, and help converting a Linalg operation into "scalar code".
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_
-#define IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_
-
-#include "iree/compiler/Dialect/HAL/IR/HALOps.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinAttributes.h"
-#include "mlir/IR/BuiltinTypes.h"
-
-// clang-format off
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfigEnums.h.inc"
-#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h.inc"
-// clang-format on
-
-namespace mlir {
-namespace iree_compiler {
-
-namespace IREE {
-namespace HAL {
-
-inline bool operator==(const TranslationInfo &lhs, const TranslationInfo &rhs) {
-  return lhs.passPipeline() == rhs.passPipeline() &&
-         lhs.workloadPerWorkgroup() == rhs.workloadPerWorkgroup();
-}
-
-inline bool operator!=(const TranslationInfo &lhs, const TranslationInfo &rhs) {
-  return !(lhs == rhs);
-}
-
-}  // namespace HAL
-}  // namespace IREE
-
-//===----------------------------------------------------------------------===//
-// Helpers for getting/setting information needed to lower an executable. These
-// are information that are stored as attributes on the
-// `hal.executable.entry_point`
-//===----------------------------------------------------------------------===//
-
-/// Builder method for IREE::HAL::TranslationInfoAttr.
-IREE::HAL::TranslationInfo buildTranslationInfo(
-    IREE::HAL::DispatchLoweringPassPipeline passPipeline,
-    ArrayRef<int64_t> workloadPerWorkgroup, MLIRContext *context);
-
-/// Gets the translate executable info attribute value associated with
-/// `entryPointOp`.
-IREE::HAL::TranslationInfo getTranslationInfo(
-    IREE::HAL::ExecutableEntryPointOp entryPointOp);
-
-/// Get the pass pipeline specified in the `translationInfo`
-inline Optional<IREE::HAL::DispatchLoweringPassPipeline>
-getLoweringPassPipeline(IREE::HAL::TranslationInfo translationInfo) {
-  return IREE::HAL::symbolizeDispatchLoweringPassPipeline(
-      translationInfo.passPipeline().getValue());
-}
-
-/// Returns the workgroup size specified on the `entryPointOp`.
-SmallVector<int64_t> getWorkgroupSize(
-    IREE::HAL::ExecutableEntryPointOp entryPointOp);
-
-/// Set the translate executable info with the entry point op. Overwrites the
-/// existing attributes.
-// TODO(ravishankarm, benvanik): Eventually all the information needed for the
-// lowering will be consolidated into a single attribute with richer
-// information.
-void setTranslationInfo(IREE::HAL::ExecutableEntryPointOp entryPointOp,
-                        IREE::HAL::TranslationInfo translationInfo,
-                        ArrayRef<int64_t> workgroupSize = {});
-
-//===----------------------------------------------------------------------===//
-// Helpers for getting/setting the `hal.lowering.*` attributes that drive the
-// linalg-based lowering.
-// ===----------------------------------------------------------------------===//
-
-/// Returns the lowering configuration set for an operation.
-IREE::HAL::LoweringConfig getLoweringConfig(Operation *op);
-
-/// Sets the lowering configuration, overwriting existing attribute values.
-void setLoweringConfig(Operation *op, IREE::HAL::LoweringConfig config);
-
-/// Removes the lowering configuration on the operation if it exists.
-void eraseLoweringConfig(Operation *op);
-
-//===----------------------------------------------------------------------===//
-// Helpers for accessing values from the LoweringConfig attribute.
-//===----------------------------------------------------------------------===//
-
-// TODO(ravishankarm): Struct attributes dont have a way of defining extra class
-// methods. When they do, these could all be moved into the attribute definition
-// itself.
-
-/// Stores the tile sizes to use at different levels of tiling as a vector of
-/// vectors.
-/// - First level tiling maps to workgroups.
-/// - Second level tiling maps to subgroups.
-/// - Third level tiling maps to invocations.
-using TileSizesListType = SmallVector<SmallVector<int64_t, 4>, 1>;
-using TileSizesListTypeRef = ArrayRef<SmallVector<int64_t, 4>>;
-
-/// Construct a lowering configuration.
-IREE::HAL::LoweringConfig buildConfigAttr(TileSizesListTypeRef tileSizes,
-                                          ArrayRef<int64_t> nativeVectorSize,
-                                          MLIRContext *context);
-
-/// Get the tile sizes for all levels.
-TileSizesListType getTileSizes(IREE::HAL::LoweringConfig config);
-
-/// Get the tile sizes for all levels for an operation if the lowering
-/// configuration is set.
-inline TileSizesListType getTileSizes(Operation *op) {
-  auto configAttr = getLoweringConfig(op);
-  if (!configAttr) return {};
-  return getTileSizes(configAttr);
-}
-
-/// Get the tile sizes for level `level`, if it is defined. Returns {} if tile
-/// sizes are not set for that level.
-SmallVector<int64_t, 4> getTileSizes(IREE::HAL::LoweringConfig config,
-                                     unsigned level);
-
-/// Get the tile sizes for level `level` for an operation if the lowering
-/// configuration for the operation is set, and tile sizes are defined for that
-/// level.
-inline SmallVector<int64_t, 4> getTileSizes(Operation *op, unsigned level) {
-  auto configAttr = getLoweringConfig(op);
-  if (!configAttr) return {};
-  return getTileSizes(configAttr, level);
-}
-SmallVector<Value, 4> getTileSizes(OpBuilder &b, Operation *op, unsigned level);
-
-/// Gets the native vector size defined in the lowering configuration.
-SmallVector<int64_t, 4> getNativeVectorSize(IREE::HAL::LoweringConfig config);
-
-/// Gets the native vector size defined for lowering an operation, if the
-/// lowering configuration is defined. If not returns empty vector.
-inline SmallVector<int64_t, 4> getNativeVectorSize(Operation *op) {
-  auto configAttr = getLoweringConfig(op);
-  if (!configAttr) return {};
-  return getNativeVectorSize(configAttr);
-}
-
-/// Get the pass pipeline specified in the `loweringConfig`
-inline Optional<IREE::HAL::DispatchLoweringPassPipeline>
-getLoweringPassPipeline(IREE::HAL::LoweringConfig config) {
-  return IREE::HAL::symbolizeDispatchLoweringPassPipeline(
-      config.passPipeline().getValue());
-}
-
-}  // namespace iree_compiler
-}  // namespace mlir
-#endif  // IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_
diff --git a/iree/compiler/Dialect/HAL/IR/LoweringConfig.td b/iree/compiler/Dialect/HAL/IR/LoweringConfig.td
deleted file mode 100644
index 9e520ceb907e..000000000000
--- a/iree/compiler/Dialect/HAL/IR/LoweringConfig.td
+++ /dev/null
@@ -1,79 +0,0 @@
-// Copyright 2021 The IREE Authors
-//
-// Licensed under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#ifndef IREE_COMPILER_DIALECT_HAL_IR_LOWERINGCONFIG
-#define IREE_COMPILER_DIALECT_HAL_IR_LOWERINGCONFIG
-
-// Putting this in HAL dialect for now.
-include "iree/compiler/Dialect/HAL/IR/HALDialect.td"
-
-// List of pre-existing pipelines for translating executables.
-def CPU_Default
-    : StrEnumAttrCase<"CPUDefault">;
-def CPU_Vectorization
-    : StrEnumAttrCase<"CPUVectorization">;
-def CPU_TensorToVectors
-    : StrEnumAttrCase<"CPUTensorToVectors">;
-
-def LLVMGPU_SimpleDistribute
-    : StrEnumAttrCase<"LLVMGPUDistribute">;
-def LLVMGPU_Vectorize
-    : StrEnumAttrCase<"LLVMGPUVectorize">;
-def LLVMGPU_MatmulSimt
-    : StrEnumAttrCase<"LLVMGPUMatmulSimt">;
-
-def SPIRV_SimpleDistribute
-    : StrEnumAttrCase<"SPIRVDistribute">;
-def SPIRV_DistributeToGlobalID
-    : StrEnumAttrCase<"SPIRVDistributeToGlobalID">;
-def SPIRV_Vectorize
-    : StrEnumAttrCase<"SPIRVVectorize">;
-def SPIRV_VectorizeToCooperativeOps
-    : StrEnumAttrCase<"SPIRVVectorizeToCooperativeOps">;
-def None
-    : StrEnumAttrCase<"None">;
-
-// EnumAttrCase for all known lowerings for ops within dispatch region
-// to scalar/native-vector code.
-def DispatchLoweringPassPipelineEnum : StrEnumAttr<
-    "DispatchLoweringPassPipeline",
-    "identifier for pass pipeline use to lower dispatch region",
-    [CPU_Default, CPU_TensorToVectors, CPU_Vectorization,
-     LLVMGPU_SimpleDistribute, LLVMGPU_Vectorize, LLVMGPU_MatmulSimt,
-     SPIRV_SimpleDistribute, SPIRV_DistributeToGlobalID,
-     SPIRV_Vectorize, SPIRV_VectorizeToCooperativeOps,
-     None]> {
-  let cppNamespace = "::mlir::iree_compiler::IREE::HAL";
-}
-
-def TileSizesListAttr :
-    TypedArrayAttrBase<I64ArrayAttr,
-                       "list of tile sizes for all levels"> { }
-
-// Attribute that captures information needed for translating the executables.
-def TranslationInfoAttr :
-  StructAttr<"TranslationInfo", HAL_Dialect, [
-    StructFieldAttr<"passPipeline", DispatchLoweringPassPipelineEnum>,
-    StructFieldAttr<"workloadPerWorkgroup",
-        DefaultValuedAttr<I64ArrayAttr, "{}">>,
-  ]>;
-
-// Attribute that carries information needed to perform
-// tiling/vectorization, etc.
-def HAL_LoweringConfigAttr :
-  StructAttr<"LoweringConfig", HAL_Dialect, [
-    StructFieldAttr<"tileSizes",
-        DefaultValuedAttr<TileSizesListAttr, "{}">>,
-    StructFieldAttr<"nativeVectorSize",
-        DefaultValuedAttr<I64ArrayAttr, "{}">>,
-    StructFieldAttr<"passPipeline",
-        DefaultValuedAttr<
-            DispatchLoweringPassPipelineEnum,
-            "\"IREE::HAL::DispatchLoweringPassPipeline::None\"">>,
-    StructFieldAttr<"workgroupSize",
-        DefaultValuedAttr<I64ArrayAttr, "{}">>
-  ]>;
-
-#endif // IREE_COMPILER_DIALECT_HAL_IR_LOWERINGCONFIG
diff --git a/iree/compiler/Dialect/HAL/Target/CUDA/BUILD b/iree/compiler/Dialect/HAL/Target/CUDA/BUILD
index 0be0ecd2ddca..bf8438203934 100644
--- a/iree/compiler/Dialect/HAL/Target/CUDA/BUILD
+++ b/iree/compiler/Dialect/HAL/Target/CUDA/BUILD
@@ -42,6 +42,7 @@ cc_library(
     deps = [
         ":cuda_libdevice",
         "//iree/compiler/Codegen:PassHeaders",
+        "//iree/compiler/Codegen/Dialect:IREECodegenDialect",
         "//iree/compiler/Codegen/LLVMGPU",
         "//iree/compiler/Dialect/HAL/Target",
         "//iree/compiler/Utils",
diff --git a/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp b/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp
index 51210f0fc772..4974fa9dc437 100644
--- a/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp
+++ b/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp
@@ -6,6 +6,7 @@
 
 #include "iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.h"
 
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
 #include "iree/compiler/Codegen/Passes.h"
 #include "iree/compiler/Dialect/HAL/Target/CUDA/LLVMPasses.h"
 #include "iree/compiler/Dialect/HAL/Target/CUDA/libdevice.h"
@@ -150,7 +151,7 @@ class CUDATargetBackend final : public TargetBackend {
   std::string name() const override { return "cuda"; }
 
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<gpu::GPUDialect>();
+    registry.insert<gpu::GPUDialect, IREE::Codegen::IREECodegenDialect>();
     mlir::registerLLVMDialectTranslation(registry);
     mlir::registerNVVMDialectTranslation(registry);
   }
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/BUILD b/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
index 398b138f93be..710eb0fb2f48 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/BUILD
@@ -37,6 +37,7 @@ cc_library(
         ":StaticLibraryGenerator",
         "//iree/compiler/Codegen:PassHeaders",
         "//iree/compiler/Codegen/Common",
+        "//iree/compiler/Codegen/Dialect:IREECodegenDialect",
         "//iree/compiler/Codegen/LLVMCPU",
         "//iree/compiler/Codegen/Utils",
         "//iree/compiler/Dialect/HAL/Target",
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
index 7c24113f9522..f6ea0e49115d 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt
@@ -46,6 +46,7 @@ iree_cc_library(
     MLIRLLVMToLLVMIRTranslation
     MLIRTargetLLVMIRExport
     iree::compiler::Codegen::Common
+    iree::compiler::Codegen::Dialect::IREECodegenDialect
     iree::compiler::Codegen::LLVMCPU
     iree::compiler::Codegen::PassHeaders
     iree::compiler::Codegen::Utils
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
index 4cf5cd7887d4..5275258ce1eb 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp
@@ -8,6 +8,7 @@
 
 #include <cstdlib>
 
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
 #include "iree/compiler/Codegen/Passes.h"
 #include "iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h"
 #include "iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.h"
@@ -110,6 +111,7 @@ class LLVMAOTTargetBackend final : public TargetBackend {
 
   void getDependentDialects(DialectRegistry &registry) const override {
     mlir::registerLLVMDialectTranslation(registry);
+    registry.insert<IREE::Codegen::IREECodegenDialect>();
   }
 
   IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp
index 87987a8ec214..cdc1b2111cbf 100644
--- a/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp
+++ b/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp
@@ -62,6 +62,9 @@ class UnixLinkerTool : public LinkerTool {
       // Produce a Mach-O dylib file.
       flags.push_back("-dylib");
       flags.push_back("-flat_namespace");
+      flags.push_back(
+          "-L /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib "
+          "-lSystem");
 
       // HACK: we insert libm calls. This is *not good*.
       // Until the MLIR LLVM lowering paths no longer introduce these,
diff --git a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/BUILD b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/BUILD
index 01f3323acdd7..6fd21ab87791 100644
--- a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/BUILD
+++ b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/BUILD
@@ -28,6 +28,7 @@ cc_library(
         ":SPIRVToMSL",
         "//iree/compiler/Codegen:PassHeaders",
         "//iree/compiler/Codegen/Common",
+        "//iree/compiler/Codegen/Dialect:IREECodegenDialect",
         "//iree/compiler/Codegen/SPIRV",
         "//iree/compiler/Codegen/Utils",
         "//iree/compiler/Dialect/HAL/Target",
diff --git a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/CMakeLists.txt
index d5fecf0b9e2e..d01431a31b30 100644
--- a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/CMakeLists.txt
@@ -30,6 +30,7 @@ iree_cc_library(
     MLIRSPIRVSerialization
     MLIRVector
     iree::compiler::Codegen::Common
+    iree::compiler::Codegen::Dialect::IREECodegenDialect
     iree::compiler::Codegen::PassHeaders
     iree::compiler::Codegen::SPIRV
     iree::compiler::Codegen::Utils
diff --git a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp
index ed74c7079c0f..6d8ec736a34e 100644
--- a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp
+++ b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp
@@ -6,6 +6,7 @@
 
 #include "iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.h"
 
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
 #include "iree/compiler/Codegen/Passes.h"
 #include "iree/compiler/Dialect/HAL/Target/MetalSPIRV/SPIRVToMSL.h"
 #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h"
@@ -44,7 +45,8 @@ class MetalSPIRVTargetBackend : public TargetBackend {
   std::string name() const override { return "metal"; }
 
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<spirv::SPIRVDialect, gpu::GPUDialect>();
+    registry.insert<IREE::Codegen::IREECodegenDialect, spirv::SPIRVDialect,
+                    gpu::GPUDialect>();
   }
 
   IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
diff --git a/iree/compiler/Dialect/HAL/Target/ROCM/BUILD b/iree/compiler/Dialect/HAL/Target/ROCM/BUILD
index 67ffe56895b1..6c1f412886bf 100644
--- a/iree/compiler/Dialect/HAL/Target/ROCM/BUILD
+++ b/iree/compiler/Dialect/HAL/Target/ROCM/BUILD
@@ -31,6 +31,7 @@ cc_library(
     ],
     deps = [
         "//iree/compiler/Codegen:PassHeaders",
+        "//iree/compiler/Codegen/Dialect:IREECodegenDialect",
         "//iree/compiler/Codegen/LLVMGPU",
         "//iree/compiler/Dialect/HAL/Target",
         "//iree/compiler/Utils",
diff --git a/iree/compiler/Dialect/HAL/Target/ROCM/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/ROCM/CMakeLists.txt
index a57a8b868a93..041ca473dbbc 100644
--- a/iree/compiler/Dialect/HAL/Target/ROCM/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Target/ROCM/CMakeLists.txt
@@ -38,6 +38,7 @@ iree_cc_library(
     MLIRROCDLToLLVMIRTranslation
     MLIRSupport
     MLIRTargetLLVMIRExport
+    iree::compiler::Codegen::Dialect::IREECodegenDialect
     iree::compiler::Codegen::LLVMGPU
     iree::compiler::Codegen::PassHeaders
     iree::compiler::Dialect::HAL::Target
diff --git a/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp b/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp
index 08f26434316d..90aaeb32f4c2 100644
--- a/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp
+++ b/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp
@@ -8,6 +8,7 @@
 
 #include <mutex>
 
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
 #include "iree/compiler/Codegen/Passes.h"
 #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h"
 #include "iree/compiler/Utils/FlatbufferUtils.h"
@@ -77,13 +78,13 @@ class ROCMTargetBackend final : public TargetBackend {
   void getDependentDialects(DialectRegistry &registry) const override {
     mlir::registerLLVMDialectTranslation(registry);
     mlir::registerROCDLDialectTranslation(registry);
+    registry.insert<IREE::Codegen::IREECodegenDialect>();
   }
 
   IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
       MLIRContext *context) const override {
     Builder b(context);
     SmallVector<NamedAttribute> configItems;
-    ;
     configItems.emplace_back(b.getIdentifier("executable_targets"),
                              getExecutableTargets(context));
 
diff --git a/iree/compiler/Dialect/HAL/Target/VMVX/BUILD b/iree/compiler/Dialect/HAL/Target/VMVX/BUILD
index fdf19ce140c6..4582466745b0 100644
--- a/iree/compiler/Dialect/HAL/Target/VMVX/BUILD
+++ b/iree/compiler/Dialect/HAL/Target/VMVX/BUILD
@@ -30,6 +30,7 @@ cc_library(
     ],
     deps = [
         "//iree/compiler/Codegen:PassHeaders",
+        "//iree/compiler/Codegen/Dialect:IREECodegenDialect",
         "//iree/compiler/Dialect/Flow/IR",
         "//iree/compiler/Dialect/HAL/Target",
         "//iree/compiler/Dialect/Modules/VMVX/IR:VMVXDialect",
diff --git a/iree/compiler/Dialect/HAL/Target/VMVX/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/VMVX/CMakeLists.txt
index f21e2c09ae4e..757b3fd2bad6 100644
--- a/iree/compiler/Dialect/HAL/Target/VMVX/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Target/VMVX/CMakeLists.txt
@@ -26,6 +26,7 @@ iree_cc_library(
     MLIRIR
     MLIRPass
     MLIRSupport
+    iree::compiler::Codegen::Dialect::IREECodegenDialect
     iree::compiler::Codegen::PassHeaders
     iree::compiler::Dialect::Flow::IR
     iree::compiler::Dialect::HAL::Target
diff --git a/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp b/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp
index 9206b7541607..8b3fa5274e0d 100644
--- a/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp
+++ b/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp
@@ -6,6 +6,7 @@
 
 #include "iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.h"
 
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
 #include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
 #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h"
 #include "iree/compiler/Dialect/Modules/VMVX/IR/VMVXDialect.h"
@@ -35,7 +36,8 @@ class VMVXTargetBackend final : public TargetBackend {
   std::string name() const override { return "vmvx"; }
 
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<VM::VMDialect, VMVX::VMVXDialect>();
+    registry.insert<IREE::Codegen::IREECodegenDialect, VM::VMDialect,
+                    VMVX::VMVXDialect>();
   }
 
   IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
diff --git a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/BUILD b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/BUILD
index 0345228d8572..2d398cc93b0b 100644
--- a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/BUILD
+++ b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/BUILD
@@ -31,6 +31,7 @@ cc_library(
     deps = [
         "//iree/compiler/Codegen:PassHeaders",
         "//iree/compiler/Codegen/Common",
+        "//iree/compiler/Codegen/Dialect:IREECodegenDialect",
         "//iree/compiler/Codegen/SPIRV",
         "//iree/compiler/Codegen/Utils",
         "//iree/compiler/Dialect/Flow/IR",
diff --git a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/CMakeLists.txt
index d7f45902413d..d653bfe7c1a9 100644
--- a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/CMakeLists.txt
+++ b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/CMakeLists.txt
@@ -34,6 +34,7 @@ iree_cc_library(
     MLIRSupport
     MLIRVector
     iree::compiler::Codegen::Common
+    iree::compiler::Codegen::Dialect::IREECodegenDialect
     iree::compiler::Codegen::PassHeaders
     iree::compiler::Codegen::SPIRV
     iree::compiler::Codegen::Utils
diff --git a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp
index 3afdcba1a76a..9af89bc04ae4 100644
--- a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp
+++ b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp
@@ -6,6 +6,7 @@
 
 #include "iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.h"
 
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
 #include "iree/compiler/Codegen/Passes.h"
 #include "iree/compiler/Dialect/Flow/IR/FlowOps.h"
 #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h"
@@ -119,8 +120,8 @@ class VulkanSPIRVTargetBackend : public TargetBackend {
   std::string name() const override { return "vulkan"; }
 
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry
-        .insert<Vulkan::VulkanDialect, spirv::SPIRVDialect, gpu::GPUDialect>();
+    registry.insert<IREE::Codegen::IREECodegenDialect, Vulkan::VulkanDialect,
+                    spirv::SPIRVDialect, gpu::GPUDialect>();
   }
 
   IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget(
diff --git a/iree/compiler/Dialect/Shape/IR/Builders.cpp b/iree/compiler/Dialect/Shape/IR/Builders.cpp
index c4557ee26aa8..8a56f16664b8 100644
--- a/iree/compiler/Dialect/Shape/IR/Builders.cpp
+++ b/iree/compiler/Dialect/Shape/IR/Builders.cpp
@@ -141,8 +141,8 @@ SmallVector<Value, 4> buildOrFindDynamicDimsForValue(Location loc, Value value,
   // This is the first step on the path: we are going to gradually start
   // removing the implementation of the ShapeCarryingInterface on ops and use
   // the new ShapeAwareOpInterface.
-  auto dynamicDims =
-      IREE::Util::findDynamicDims(value, &*builder.getInsertionPoint());
+  auto dynamicDims = IREE::Util::findDynamicDims(value, builder.getBlock(),
+                                                 builder.getInsertionPoint());
   if (dynamicDims.hasValue()) {
     return llvm::to_vector<4>(dynamicDims.getValue());
   }
diff --git a/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp b/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp
index b77c2ef2029e..84a19314f86d 100644
--- a/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp
+++ b/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp
@@ -306,7 +306,9 @@ void ResourceDeallocaOp::getCanonicalizationPatterns(
 OpFoldResult ResourceSizeOp::fold(ArrayRef<Attribute> operands) {
   auto sizeAwareType =
       operand().getType().cast<IREE::Util::SizeAwareTypeInterface>();
-  return sizeAwareType.findSizeValue(operand(), *this);
+  Operation *op = this->getOperation();
+  return sizeAwareType.findSizeValue(operand(), op->getBlock(),
+                                     Block::iterator(op));
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/iree/compiler/Dialect/Util/IR/UtilInterfaces.td b/iree/compiler/Dialect/Util/IR/UtilInterfaces.td
index e092f535b99c..fec6cd44418a 100644
--- a/iree/compiler/Dialect/Util/IR/UtilInterfaces.td
+++ b/iree/compiler/Dialect/Util/IR/UtilInterfaces.td
@@ -367,8 +367,9 @@ def Util_SizeAwareType : TypeInterface<"SizeAwareTypeInterface"> {
   let extraClassDeclaration = [{
     // Walks the SSA use-def chain to find the size of the type.
     // Returns nullptr if the size cannot be found or if it is defined after
-    // |forOp|.
-    static Value findSizeValue(Value resourceValue, Operation *forOp);
+    // {|block|, |insertionPoint|}.
+    static Value findSizeValue(Value resourceValue, Block *block,
+                               Block::iterator insertionPoint);
 
     // Returns an SSA value representing the byte size of |value| or nullptr
     // if not a sized value.
diff --git a/iree/compiler/Dialect/Util/IR/UtilTypes.cpp b/iree/compiler/Dialect/Util/IR/UtilTypes.cpp
index f1edb9240944..20d934649aab 100644
--- a/iree/compiler/Dialect/Util/IR/UtilTypes.cpp
+++ b/iree/compiler/Dialect/Util/IR/UtilTypes.cpp
@@ -294,34 +294,38 @@ void excludeTiedOperandAndResultIndices(
 // IREE::Util::SizeAwareTypeInterface
 //===----------------------------------------------------------------------===//
 
-static bool isValueUsableForOp(Value value, Operation *forOp) {
-  if (forOp->getBlock() == nullptr) {
+static bool isValueUsableForOp(Value value, Block *block,
+                               Block::iterator insertionPoint) {
+  if (block == nullptr) {
     // Op is not in a block; can't analyze (maybe?).
     return false;
   }
   auto *definingBlock = value.getParentBlock();
-  if (definingBlock == forOp->getBlock()) {
+  if (definingBlock == block) {
     // Defined in the same block; ensure block order.
     if (value.isa<BlockArgument>()) return true;
-    if (value.getDefiningOp()->isBeforeInBlock(forOp)) return true;
+    if (insertionPoint == block->end()) return true;
+    if (value.getDefiningOp()->isBeforeInBlock(&*insertionPoint)) {
+      return true;
+    }
   } else if (definingBlock->isEntryBlock()) {
     // Entry block always dominates - fast path for constants.
     return true;
   } else {
     // See if block the value is defined in dominates the forOp block.
     // TODO(benvanik): optimize this, it's terribly expensive to recompute.
-    DominanceInfo dominanceInfo(forOp->getParentOp());
-    return dominanceInfo.dominates(definingBlock, forOp->getBlock());
+    DominanceInfo dominanceInfo(block->getParentOp());
+    return dominanceInfo.dominates(definingBlock, block);
   }
   return false;
 }
 
 // static
-Value SizeAwareTypeInterface::findSizeValue(Value resourceValue,
-                                            Operation *forOp) {
+Value SizeAwareTypeInterface::findSizeValue(Value resourceValue, Block *block,
+                                            Block::iterator insertionPoint) {
   // See if the value is produced by a size-aware op; we can just ask for the
   // size it has tied. Walking upward is always good as we know any size we find
-  // dominates |forOp|.
+  // dominates {|block|, |insertionPoint|}.
   SmallVector<Value> worklist;
   worklist.push_back(resourceValue);
   while (!worklist.empty()) {
@@ -347,7 +351,8 @@ Value SizeAwareTypeInterface::findSizeValue(Value resourceValue,
               use.getOwner())) {
         auto sizeValue = sizeAwareOp.getOperandSize(use.getOperandNumber());
         if (sizeValue) {
-          if (isValueUsableForOp(sizeValue, forOp)) return sizeValue;
+          if (isValueUsableForOp(sizeValue, block, insertionPoint))
+            return sizeValue;
         }
       }
       if (auto tiedOp =
@@ -369,8 +374,8 @@ Value SizeAwareTypeInterface::queryValueSize(Location loc, Value resourceValue,
     return {};  // Not a sized type.
   }
   if (!builder.getInsertionPoint().getNodePtr()->isKnownSentinel()) {
-    Operation &insertionPt = *builder.getInsertionPoint();
-    auto sizeValue = sizeAwareType.findSizeValue(resourceValue, &insertionPt);
+    auto sizeValue = sizeAwareType.findSizeValue(
+        resourceValue, builder.getBlock(), builder.getInsertionPoint());
     if (sizeValue) {
       return sizeValue;  // Found in IR.
     }
@@ -414,9 +419,10 @@ ValueRange findVariadicDynamicDims(unsigned idx, ValueRange values,
   return dynamicDims.slice(offset, shapedType.getNumDynamicDims());
 }
 
-Optional<ValueRange> findDynamicDims(Value shapedValue, Operation *forOp) {
+Optional<ValueRange> findDynamicDims(Value shapedValue, Block *block,
+                                     Block::iterator insertionPoint) {
   // Look up the use-def chain: always safe, as any value we reach dominates
-  // |forOp| implicitly.
+  // {|block|, |insertionPoint|} implicitly.
   SmallVector<Value> worklist;
   worklist.push_back(shapedValue);
   while (!worklist.empty()) {
@@ -432,16 +438,16 @@ Optional<ValueRange> findDynamicDims(Value shapedValue, Operation *forOp) {
     }
   }
 
-  // Look down the use-def chain: not safe at some point because we'll move
-  // past where |forOp| is dominated. This is often fine for a bit, though, as
-  // |forOp| may be a user of |shapedValue| and be able to provide the shape
-  // itself.
+  // Look down the use-def chain: not safe at some point because we'll move past
+  // where {|block|, |insertionPoint|} is dominated. This is often fine for a
+  // bit, though, as {|block|, |insertionPoint|} may be a user of |shapedValue|
+  // and be able to provide the shape itself.
   for (auto &use : shapedValue.getUses()) {
     if (auto shapeAwareOp = dyn_cast<ShapeAwareOpInterface>(use.getOwner())) {
       auto dynamicDims =
           shapeAwareOp.getOperandDynamicDims(use.getOperandNumber());
       if (llvm::all_of(dynamicDims, [&](Value dim) {
-            return isValueUsableForOp(dim, forOp);
+            return isValueUsableForOp(dim, block, insertionPoint);
           })) {
         return dynamicDims;
       }
diff --git a/iree/compiler/Dialect/Util/IR/UtilTypes.h b/iree/compiler/Dialect/Util/IR/UtilTypes.h
index aec114aacded..03faf9c03690 100644
--- a/iree/compiler/Dialect/Util/IR/UtilTypes.h
+++ b/iree/compiler/Dialect/Util/IR/UtilTypes.h
@@ -171,8 +171,9 @@ void excludeTiedOperandAndResultIndices(
 
 // Walks the SSA use-def chain to find the dynamic dimensions of the value.
 // Returns None if the shape cannot be found or if it is defined after
-// |forOp|.
-Optional<ValueRange> findDynamicDims(Value shapedValue, Operation *forOp);
+// {|block|, |insertionPoint|}.
+Optional<ValueRange> findDynamicDims(Value shapedValue, Block *block,
+                                     Block::iterator insertionPoint);
 
 // Returns the dynamic dimensions for the value at |idx|.
 ValueRange findVariadicDynamicDims(unsigned idx, ValueRange values,
diff --git a/iree/compiler/Utils/FlatbufferUtils.cpp b/iree/compiler/Utils/FlatbufferUtils.cpp
index febbde9bdda3..3a00885c0b13 100644
--- a/iree/compiler/Utils/FlatbufferUtils.cpp
+++ b/iree/compiler/Utils/FlatbufferUtils.cpp
@@ -32,6 +32,7 @@ static SmallVector<uint8_t, 32> cloneBufferIntoContiguousBytes(
   void *result =
       flatcc_builder_copy_buffer(fbb, packedData.data(), packedData.size());
   assert(result && "flatcc_emitter_t impl failed (non-default?)");
+  (void)result;
   return packedData;
 }
 
diff --git a/iree/hal/cuda/status_util.c b/iree/hal/cuda/status_util.c
index b6a1b9480cd3..7532ecd22c71 100644
--- a/iree/hal/cuda/status_util.c
+++ b/iree/hal/cuda/status_util.c
@@ -26,7 +26,7 @@ iree_status_t iree_hal_cuda_result_to_status(
   if (syms->cuGetErrorString(result, &error_string) != CUDA_SUCCESS) {
     error_string = "Unknown error.";
   }
-  return iree_make_status(IREE_STATUS_INTERNAL,
-                          "CUDA driver error '%s' (%d): %s", error_name, result,
-                          error_string);
+  return iree_make_status_with_location(file, line, IREE_STATUS_INTERNAL,
+                                        "CUDA driver error '%s' (%d): %s",
+                                        error_name, result, error_string);
 }
diff --git a/iree/hal/vulkan/status_util.c b/iree/hal/vulkan/status_util.c
index 705f299ec213..e61008c44900 100644
--- a/iree/hal/vulkan/status_util.c
+++ b/iree/hal/vulkan/status_util.c
@@ -37,17 +37,19 @@ iree_status_t iree_hal_vulkan_result_to_status(VkResult result,
     // Error codes.
     case VK_ERROR_OUT_OF_HOST_MEMORY:
       // A host memory allocation has failed.
-      return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
-                              "VK_ERROR_OUT_OF_HOST_MEMORY");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_RESOURCE_EXHAUSTED,
+                                            "VK_ERROR_OUT_OF_HOST_MEMORY");
     case VK_ERROR_OUT_OF_DEVICE_MEMORY:
       // A device memory allocation has failed.
-      return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
-                              "VK_ERROR_OUT_OF_DEVICE_MEMORY");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_RESOURCE_EXHAUSTED,
+                                            "VK_ERROR_OUT_OF_DEVICE_MEMORY");
     case VK_ERROR_INITIALIZATION_FAILED:
       // Initialization of an object could not be completed for
       // implementation-specific reasons.
-      return iree_make_status(IREE_STATUS_UNAVAILABLE,
-                              "VK_ERROR_INITIALIZATION_FAILED");
+      return iree_make_status_with_location(file, line, IREE_STATUS_UNAVAILABLE,
+                                            "VK_ERROR_INITIALIZATION_FAILED");
     case VK_ERROR_DEVICE_LOST:
       // The logical or physical device has been lost.
       //
@@ -116,87 +118,101 @@ iree_status_t iree_hal_vulkan_result_to_status(VkResult result,
       // command buffer is in the pending state, or whether resources are
       // considered in-use by the device, a return value of
       // VK_ERROR_DEVICE_LOST is equivalent to VK_SUCCESS.
-      return iree_make_status(IREE_STATUS_INTERNAL, "VK_ERROR_DEVICE_LOST");
+      return iree_make_status_with_location(file, line, IREE_STATUS_INTERNAL,
+                                            "VK_ERROR_DEVICE_LOST");
     case VK_ERROR_MEMORY_MAP_FAILED:
       // Mapping of a memory object has failed.
-      return iree_make_status(IREE_STATUS_INTERNAL,
-                              "VK_ERROR_MEMORY_MAP_FAILED");
+      return iree_make_status_with_location(file, line, IREE_STATUS_INTERNAL,
+                                            "VK_ERROR_MEMORY_MAP_FAILED");
     case VK_ERROR_LAYER_NOT_PRESENT:
       // A requested layer is not present or could not be loaded.
-      return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
-                              "VK_ERROR_LAYER_NOT_PRESENT");
+      return iree_make_status_with_location(
+          file, line, IREE_STATUS_UNIMPLEMENTED, "VK_ERROR_LAYER_NOT_PRESENT");
     case VK_ERROR_EXTENSION_NOT_PRESENT:
       // A requested extension is not supported.
-      return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
-                              "VK_ERROR_EXTENSION_NOT_PRESENT");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_UNIMPLEMENTED,
+                                            "VK_ERROR_EXTENSION_NOT_PRESENT");
     case VK_ERROR_FEATURE_NOT_PRESENT:
       // A requested feature is not supported.
-      return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
-                              "VK_ERROR_FEATURE_NOT_PRESENT");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_UNIMPLEMENTED,
+                                            "VK_ERROR_FEATURE_NOT_PRESENT");
     case VK_ERROR_INCOMPATIBLE_DRIVER:
       // The requested version of Vulkan is not supported by the driver or is
       // otherwise incompatible for implementation-specific reasons.
-      return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
-                              "VK_ERROR_INCOMPATIBLE_DRIVER");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_FAILED_PRECONDITION,
+                                            "VK_ERROR_INCOMPATIBLE_DRIVER");
     case VK_ERROR_TOO_MANY_OBJECTS:
       // Too many objects of the type have already been created.
-      return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
-                              "VK_ERROR_TOO_MANY_OBJECTS");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_RESOURCE_EXHAUSTED,
+                                            "VK_ERROR_TOO_MANY_OBJECTS");
     case VK_ERROR_FORMAT_NOT_SUPPORTED:
       // A requested format is not supported on this device.
-      return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
-                              "VK_ERROR_FORMAT_NOT_SUPPORTED");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_UNIMPLEMENTED,
+                                            "VK_ERROR_FORMAT_NOT_SUPPORTED");
     case VK_ERROR_FRAGMENTED_POOL:
       // A pool allocation has failed due to fragmentation of the pool’s
       // memory. This must only be returned if no attempt to allocate host
       // or device memory was made to accommodate the new allocation.
-      return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
-                              "VK_ERROR_FRAGMENTED_POOL");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_RESOURCE_EXHAUSTED,
+                                            "VK_ERROR_FRAGMENTED_POOL");
     case VK_ERROR_OUT_OF_POOL_MEMORY:
       // A pool memory allocation has failed. This must only be returned if no
       // attempt to allocate host or device memory was made to accommodate the
       // new allocation. If the failure was definitely due to fragmentation of
       // the pool, VK_ERROR_FRAGMENTED_POOL should be returned instead.
-      return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
-                              "VK_ERROR_OUT_OF_POOL_MEMORY");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_RESOURCE_EXHAUSTED,
+                                            "VK_ERROR_OUT_OF_POOL_MEMORY");
     case VK_ERROR_INVALID_EXTERNAL_HANDLE:
       // An external handle is not a valid handle of the specified type.
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "VK_ERROR_INVALID_EXTERNAL_HANDLE");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_INVALID_ARGUMENT,
+                                            "VK_ERROR_INVALID_EXTERNAL_HANDLE");
     case VK_ERROR_SURFACE_LOST_KHR:
       // A surface is no longer available.
-      return iree_make_status(IREE_STATUS_UNAVAILABLE,
-                              "VK_ERROR_SURFACE_LOST_KHR");
+      return iree_make_status_with_location(file, line, IREE_STATUS_UNAVAILABLE,
+                                            "VK_ERROR_SURFACE_LOST_KHR");
     case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR:
       // The requested window is already in use by Vulkan or another API in a
       // manner which prevents it from being used again.
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR");
+      return iree_make_status_with_location(
+          file, line, IREE_STATUS_INVALID_ARGUMENT,
+          "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR");
     case VK_ERROR_OUT_OF_DATE_KHR:
       // A surface has changed in such a way that it is no longer compatible
       // with the swapchain, and further presentation requests using the
       // swapchain will fail. Applications must query the new surface properties
       // and recreate their swapchain if they wish to continue presenting to the
       // surface.
-      return iree_make_status(IREE_STATUS_FAILED_PRECONDITION,
-                              "VK_ERROR_OUT_OF_DATE_KHR");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_FAILED_PRECONDITION,
+                                            "VK_ERROR_OUT_OF_DATE_KHR");
     case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR:
       // The display used by a swapchain does not use the same presentable image
       // layout, or is incompatible in a way that prevents sharing an image.
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR");
+      return iree_make_status_with_location(
+          file, line, IREE_STATUS_INVALID_ARGUMENT,
+          "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR");
     case VK_ERROR_VALIDATION_FAILED_EXT:
       // Validation layer testing failed. It is not expected that an
       // application would see this this error code during normal use of the
       // validation layers.
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "VK_ERROR_VALIDATION_FAILED_EXT");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_INVALID_ARGUMENT,
+                                            "VK_ERROR_VALIDATION_FAILED_EXT");
     case VK_ERROR_INVALID_SHADER_NV:
       // One or more shaders failed to compile or link. More details are
       // reported back to the application when the validation layer is enabled
       // using the extension VK_EXT_debug_report.
-      return iree_make_status(IREE_STATUS_INVALID_ARGUMENT,
-                              "VK_ERROR_INVALID_SHADER_NV");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_INVALID_ARGUMENT,
+                                            "VK_ERROR_INVALID_SHADER_NV");
     case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT:
       // When creating an image with
       // VkImageDrmFormatModifierExplicitCreateInfoEXT, it is the application’s
@@ -208,33 +224,37 @@ iree_status_t iree_hal_vulkan_result_to_status(VkResult result,
       // outside the scope of Vulkan, and therefore not described by Valid Usage
       // requirements). If this validation fails, then vkCreateImage returns
       // VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT.
-      return iree_make_status(
-          IREE_STATUS_INVALID_ARGUMENT,
+      return iree_make_status_with_location(
+          file, line, IREE_STATUS_INVALID_ARGUMENT,
           "VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT");
     case VK_ERROR_FRAGMENTATION_EXT:
       // A descriptor pool creation has failed due to fragmentation.
-      return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED,
-                              "VK_ERROR_FRAGMENTATION_EXT");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_RESOURCE_EXHAUSTED,
+                                            "VK_ERROR_FRAGMENTATION_EXT");
     case VK_ERROR_NOT_PERMITTED_EXT:
       // When creating a queue, the caller does not have sufficient privileges
       // to request to acquire a priority above the default priority
       // (VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT).
-      return iree_make_status(IREE_STATUS_PERMISSION_DENIED,
-                              "VK_ERROR_NOT_PERMITTED_EXT");
+      return iree_make_status_with_location(file, line,
+                                            IREE_STATUS_PERMISSION_DENIED,
+                                            "VK_ERROR_NOT_PERMITTED_EXT");
     case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT:
       // A buffer creation failed because the requested address is not
       // available.
-      return iree_make_status(IREE_STATUS_OUT_OF_RANGE,
-                              "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT");
+      return iree_make_status_with_location(
+          file, line, IREE_STATUS_OUT_OF_RANGE,
+          "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT");
     case VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT:
       // An operation on a swapchain created with
       // VK_FULL_SCREEN_EXCLUSIVE_APPLICATION_CONTROLLED_EXT failed as it did
       // not have exlusive full-screen access. This may occur due to
       // implementation-dependent reasons, outside of the application’s control.
-      return iree_make_status(IREE_STATUS_UNAVAILABLE,
-                              "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT");
+      return iree_make_status_with_location(
+          file, line, IREE_STATUS_UNAVAILABLE,
+          "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT");
     default:
-      return iree_make_status(IREE_STATUS_UNKNOWN, "VkResult=%u",
-                              (uint32_t)result);
+      return iree_make_status_with_location(file, line, IREE_STATUS_UNKNOWN,
+                                            "VkResult=%u", (uint32_t)result);
   }
 }
diff --git a/iree/test/e2e/regression/BUILD b/iree/test/e2e/regression/BUILD
index 77e48c3a3cda..71f6df50d603 100644
--- a/iree/test/e2e/regression/BUILD
+++ b/iree/test/e2e/regression/BUILD
@@ -131,7 +131,6 @@ iree_check_single_backend_test_suite(
     ],
     opt_flags = [
         "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=%d N0=8" % (4 if lhs_rhs_type == "i8" else 1),
-        "--iree-codegen-vectorize-linalg-mmt4d",
     ],
     target_backends_and_drivers = [
         ("dylib-llvm-aot", "dylib"),
@@ -152,7 +151,6 @@ iree_check_single_backend_test_suite(
     ],
     opt_flags = [
         "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=%d N0=8" % (4 if lhs_rhs_type == "i8" else 1),
-        "--iree-codegen-vectorize-linalg-mmt4d",
     ],
     target_backends_and_drivers = [
         ("dylib-llvm-aot", "dylib"),
diff --git a/iree/test/e2e/regression/CMakeLists.txt b/iree/test/e2e/regression/CMakeLists.txt
index 795a8747bb66..309f907987df 100644
--- a/iree/test/e2e/regression/CMakeLists.txt
+++ b/iree/test/e2e/regression/CMakeLists.txt
@@ -176,7 +176,6 @@ iree_generated_trace_runner_test(
     "vmvx"
   OPT_FLAGS
     "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=4 N0=8"
-    "--iree-codegen-vectorize-linalg-mmt4d"
 )
 
 iree_generated_trace_runner_test(
@@ -197,7 +196,6 @@ iree_generated_trace_runner_test(
     "vmvx"
   OPT_FLAGS
     "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=1 N0=8"
-    "--iree-codegen-vectorize-linalg-mmt4d"
 )
 
 iree_generated_trace_runner_test(
@@ -216,7 +214,6 @@ iree_generated_trace_runner_test(
     "dylib"
   OPT_FLAGS
     "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=4 N0=8"
-    "--iree-codegen-vectorize-linalg-mmt4d"
 )
 
 iree_generated_trace_runner_test(
@@ -235,7 +232,6 @@ iree_generated_trace_runner_test(
     "dylib"
   OPT_FLAGS
     "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=1 N0=8"
-    "--iree-codegen-vectorize-linalg-mmt4d"
 )
 
 ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ###
diff --git a/iree/test/e2e/regression/generate_e2e_matmul_tests.py b/iree/test/e2e/regression/generate_e2e_matmul_tests.py
index 26aa509ed6ef..d37d9f1a1bd5 100644
--- a/iree/test/e2e/regression/generate_e2e_matmul_tests.py
+++ b/iree/test/e2e/regression/generate_e2e_matmul_tests.py
@@ -4,118 +4,201 @@
 # Licensed under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-"""iree_generated_check_test generator for end-to-end matrix multiplication.
+"""iree_generated_trace_runner_test generator for e2e matmul tests.
 """
 
 import argparse
-import random
 import os
 import yaml
 import re
-
-
-# Returns lists of shapes as (M, K, N) tuples.
-# For example (M, K, 1) is a matrix*vector product, and (M, 1, N) is an outer
-# product.
-def get_test_shapes():
-  return {
-      "small": [  # Small sizes, square matrices
-          (x, x, x) for x in range(1, 40)
-      ] + [
-          # Small sizes, slightly rectangular matrices
-          (2, 3, 4),
-          (8, 7, 6),
-          (15, 16, 17),
-          (14, 19, 23),
-          (31, 33, 32),
-          (25, 41, 35),
-          # Small sizes, involving vectors (i.e. most rectangular cases)
-          (10, 1, 1),
-          (1, 10, 1),
-          (1, 1, 10),
-          (1, 10, 10),
-          (10, 1, 10),
-          (10, 10, 1),
-          # Small sizes, involving other very small dimensions just above 1
-          (13, 14, 2),
-          (3, 17, 12),
-          (21, 4, 18),
-          # Medium sizes, square matrices
-          (100, 100, 100),
-          # Medium sizes, slightly rectangular matrices
-          (101, 102, 103),
-          # Medium sizes, involving vectors (i.e. most rectangular cases)
-          (10000, 1, 1),
-          (1, 10000, 1),
-          (1, 1, 10000),
-          (1, 1000, 1000),
-          (1000, 1, 1000),
-          (1000, 1000, 1),
-          # Medium sizes, involving other very small dimensions just above 1
-          (1300, 1300, 2),
-          (1300, 1300, 3),
-          (1300, 1300, 4),
-      ],
-      "large": [
-          # Large sizes, powers of two
-          (256, 256, 512),
-          (512, 512, 128),
-          (1024, 512, 512),
-          (512, 1024, 512),
-          # Large sizes, powers of two minus one
-          (127, 63, 511),
-          # Large sizes, powers of two plus one
-          (129, 65, 513),
-          # Large sizes, misc.
-          (200, 300, 400),
-          (123, 456, 789),
-          (500, 500, 50),
-          # Be conservative in adding larger shapes. They can result in
-          # high latency tests. If you have to, consider splitting them
-          # out in a way that constrains the latency impact, e.g. by
-          # running on fewer backends/drivers or with fewer generators
-          # (see get_test_generators).
-      ]
-  }
-
-
-# Returns lists of 'generators', which are tuples of the form
-# (lhs_generator, rhs_generator, acc_generator, dynamicity)
-# The first 3 entries specify how to generate test input data.
-# The dynamicity entry chooses between static, dynamic or mixed shapes.
-#
-# TODO (Issue #7431): turn into enum and dataclass.
-def get_test_generators():
-  return {
-      "small": [
-          # Generators using simple matrices for ease of numerical debugging.
-          # They don't add significant test coverage (all bugs are hit by
-          # tests using random matrices anyway). They are only here to make
-          # the bulk of our debugging easier.
-          ("identity", "identity", "zero", "dynamic"),
-          ("random", "identity", "zero", "dynamic"),
-          ("identity", "random", "zero", "dynamic"),
-          ("identity", "identity", "random", "dynamic"),
-          # Generators using general random matrices
-          ("random", "random", "random", "dynamic"),
-          ("random", "random", "random", "static"),
-          # TODO: enable 'mixed' testcases. For now they cause iree-opt
-          # errors.
-          #("random", "random", "random", "mixed"),
-      ],
-      "large": [
-          # Fewer generators are used for large shapes, to limit the
-          # latency impact. Most bugs are going to be caught on small
-          # shapes anyway.
-          ("random", "random", "random", "dynamic"),
-          ("random", "random", "random", "static"),
-      ]
-  }
-
-
-# Generates a name for a test function in the generated MLIR code.
-def function_name(lhs_rhs_type, accum_type, shape, gen):
-  return f"{lhs_rhs_type}_{gen[3]}_{gen[0]}_{shape[0]}x{shape[1]}_times_{gen[1]}_{shape[1]}x{shape[2]}_plus_{gen[2]}_{accum_type}"
+import enum
+import dataclasses
+import typing
+
+
+# Data type of matrix entries. The string values must match MLIR data types.
+# This is a superset of the values accepted for the --lhs_rhs_types= flag,
+# as this also includes accumulator-specific types like i32.
+@enum.unique
+class MatrixElemTypeId(enum.Enum):
+  I8 = "i8"
+  I32 = "i32"
+  F32 = "f32"
+
+
+# Enumerates of the collections of shapes that we can generate tests for.
+# The values are the accepted values for the --shapes= flag.
+@enum.unique
+class ShapesId(enum.Enum):
+  SMALL = "small"
+  LARGE = "large"
+
+
+# Enumerates ways to construct MLIR tensor types.
+@enum.unique
+class Dynamicity(enum.Enum):
+  DYNAMIC = "dynamic"  # Use '?' everywhere. Example: tensor<?x?xf32>.
+  STATIC = "static"  # Use fixed values everywhere. Example: tensor<4x6xf32>.
+  MIXED = "mixed"  # Randomly mix '?' and values. Example: tensor<?x4xf32>.
+
+
+# Enumerates ways to initialize matrix buffer contents.
+@enum.unique
+class MatrixGenerator(enum.Enum):
+  ZERO = "zero"  # Fill with zeros
+  IDENTITY = "identity"  # Make an identity matrix (generalized to any shape).
+  RANDOM = "random"  # Fill with (deterministic) pseudorandom values.
+
+
+# Describes the shape of a matrix multiplication in the usual convention:
+# the LHS is {m}x{k}, the RHS is {k}x{n}, the accumulator/result is {m}x{n}.
+@dataclasses.dataclass
+class TestShape:
+  m: int
+  k: int
+  n: int
+
+
+# Describes how to construct MLIR tensor types and how to initialize buffer
+# contents for a test case (for an already given TestShape, and already given
+# matrix element data types).
+@dataclasses.dataclass
+class TestGenerator:
+  lhs: MatrixGenerator
+  rhs: MatrixGenerator
+  acc: MatrixGenerator
+  dynamicity: Dynamicity
+
+
+# Returns the list of TestShape's to use for the collection of shapes
+# identified by shapes_id.
+def get_test_shapes(shapes_id: ShapesId):
+  # Notes:
+  # 1. Be conservative in adding more shapes, as that can include both the
+  #    build and execution latency of tests. The build latency is nearly the
+  #    same for all shapes, while execution latency grows cubicly i.e.
+  #    linearly with m*k*n.
+  # 2. Some shapes are commented out: they used to be tested but have been
+  #    disabled to improve the trade-off between test coverage and build
+  #    latency.
+  if shapes_id == ShapesId.SMALL:
+    return [  # Small sizes, square matrices
+        # was range(1, 40) before trimming. The choice of 18 is so that we
+        # exercise a case just above 16, as 16 will be a common kernel width.
+        TestShape(m=x, k=x, n=x) for x in range(1, 18)
+    ] + [
+        # Small sizes, slightly rectangular matrices
+        TestShape(m=2, k=3, n=4),
+        #TestShape(m=8, k=7, n=6),
+        #TestShape(m=15, k=16, n=17),
+        TestShape(m=14, k=19, n=23),
+        #TestShape(m=31, k=33, n=32),
+        TestShape(m=25, k=41, n=35),
+        # Small sizes, involving vectors (i.e. most rectangular cases)
+        TestShape(m=10, k=1, n=1),
+        TestShape(m=1, k=10, n=1),
+        TestShape(m=1, k=1, n=10),
+        #TestShape(m=1, k=10, n=10),
+        #TestShape(m=10, k=1, n=10),
+        #TestShape(m=10, k=10, n=1),
+        # Small sizes, involving other very small dimensions just above 1
+        TestShape(m=13, k=14, n=2),
+        TestShape(m=3, k=17, n=12),
+        TestShape(m=21, k=4, n=18),
+        # Medium sizes, square matrices
+        #TestShape(m=100, k=100, n=100),
+        # Medium sizes, slightly rectangular matrices
+        TestShape(m=101, k=102, n=103),
+        # Medium sizes, involving vectors (i.e. most rectangular cases)
+        TestShape(m=10000, k=1, n=1),
+        TestShape(m=1, k=10000, n=1),
+        TestShape(m=1, k=1, n=10000),
+        #TestShape(m=1, k=1000, n=1000),
+        #TestShape(m=1000, k=1, n=1000),
+        #TestShape(m=1000, k=1000, n=1),
+        # Medium sizes, involving other very small dimensions just above 1
+        TestShape(m=1300, k=1300, n=2),
+        #TestShape(m=1300, k=1300, n=3),
+        #TestShape(m=1300, k=1300, n=4),
+    ]
+  if shapes_id == ShapesId.LARGE:
+    return [
+        # Large sizes, powers of two
+        TestShape(m=256, k=256, n=512),
+        #TestShape(m=512, k=512, n=128),
+        #TestShape(m=1024, k=512, n=512),
+        #TestShape(m=512, k=1024, n=512),
+        # Large sizes, powers of two minus one
+        TestShape(m=127, k=63, n=511),
+        # Large sizes, powers of two plus one
+        TestShape(m=129, k=65, n=513),
+        # Large sizes, misc.
+        #TestShape(m=200, k=300, n=400),
+        TestShape(m=123, k=456, n=789),
+        #TestShape(m=500, k=500, n=50),
+        # Be conservative in adding larger shapes. They can result in
+        # high latency tests. If you have to, consider splitting them
+        # out in a way that constrains the latency impact, e.g. by
+        # running on fewer backends/drivers or with fewer generators
+        # (see get_test_generators).
+    ]
+  raise ValueError(shapes_id)
+
+
+# Returns the list of TestGenerator's to use for the collection of shapes
+# identified by shapes_id.
+def get_test_generators(shapes_id: ShapesId):
+  if shapes_id == ShapesId.SMALL:
+    return [
+        # Generators using simple matrices for ease of numerical debugging.
+        # They don't add significant test coverage (all bugs are hit by
+        # tests using random matrices anyway). They are only here to make
+        # the bulk of our debugging easier.
+        TestGenerator(lhs=MatrixGenerator.IDENTITY,
+                      rhs=MatrixGenerator.IDENTITY,
+                      acc=MatrixGenerator.ZERO,
+                      dynamicity=Dynamicity.DYNAMIC),
+        TestGenerator(lhs=MatrixGenerator.RANDOM,
+                      rhs=MatrixGenerator.IDENTITY,
+                      acc=MatrixGenerator.ZERO,
+                      dynamicity=Dynamicity.DYNAMIC),
+        TestGenerator(lhs=MatrixGenerator.IDENTITY,
+                      rhs=MatrixGenerator.RANDOM,
+                      acc=MatrixGenerator.ZERO,
+                      dynamicity=Dynamicity.DYNAMIC),
+        TestGenerator(lhs=MatrixGenerator.IDENTITY,
+                      rhs=MatrixGenerator.IDENTITY,
+                      acc=MatrixGenerator.RANDOM,
+                      dynamicity=Dynamicity.DYNAMIC),
+        # Generators using general random matrices
+        TestGenerator(lhs=MatrixGenerator.RANDOM,
+                      rhs=MatrixGenerator.RANDOM,
+                      acc=MatrixGenerator.RANDOM,
+                      dynamicity=Dynamicity.DYNAMIC),
+        TestGenerator(lhs=MatrixGenerator.RANDOM,
+                      rhs=MatrixGenerator.RANDOM,
+                      acc=MatrixGenerator.RANDOM,
+                      dynamicity=Dynamicity.STATIC),
+        TestGenerator(lhs=MatrixGenerator.RANDOM,
+                      rhs=MatrixGenerator.RANDOM,
+                      acc=MatrixGenerator.RANDOM,
+                      dynamicity=Dynamicity.MIXED),
+    ]
+  if shapes_id == ShapesId.LARGE:
+    return [
+        # Fewer generators are used for large shapes, to limit the
+        # latency impact. Most bugs are going to be caught on small
+        # shapes anyway.
+        TestGenerator(lhs=MatrixGenerator.RANDOM,
+                      rhs=MatrixGenerator.RANDOM,
+                      acc=MatrixGenerator.RANDOM,
+                      dynamicity=Dynamicity.DYNAMIC),
+        TestGenerator(lhs=MatrixGenerator.RANDOM,
+                      rhs=MatrixGenerator.RANDOM,
+                      acc=MatrixGenerator.RANDOM,
+                      dynamicity=Dynamicity.STATIC),
+    ]
+  raise ValueError(shapes_id)
 
 
 # Intentionally fixed seed! We want full reproducibility here, both across runs
@@ -125,43 +208,120 @@ def function_name(lhs_rhs_type, accum_type, shape, gen):
 local_pseudorandom_state = 1
 
 
+# A static size value, i.e. a size value that could appear in a MLIR type
+# such as 'tensor<?x4xf32>'. None means a dynamic size, similar to '?' in MLIR.
+@dataclasses.dataclass
+class DimSize:
+  value: typing.Optional[int]
+
+
 # Generates a compile-time MLIR size value, i.e. either a fixed positive integer
-# or a '?' depending on dynamicity.
-def static_size(x, dynamicity):
-  if dynamicity == "dynamic":
-    return "?"
-  elif dynamicity == "static":
-    return x
-  elif dynamicity == "mixed":
+# or None (which maps to MLIR '?') depending on dynamicity.
+def static_size(x: int, dynamicity: Dynamicity):
+  if dynamicity == Dynamicity.DYNAMIC:
+    return DimSize(None)
+  elif dynamicity == Dynamicity.STATIC:
+    return DimSize(x)
+  elif dynamicity == Dynamicity.MIXED:
     global local_pseudorandom_state
     # Same as C++ std::minstd_rand.
     # Using a local pseudorandom generator implementation ensures that it's
     # completely reproducible, across runs and across machines.
     local_pseudorandom_state = (local_pseudorandom_state * 48271) % 2147483647
-    return x if local_pseudorandom_state > 1073741824 else "?"
+    return DimSize(x if local_pseudorandom_state > 1073741824 else None)
   else:
     raise ValueError(dynamicity)
 
 
+# Stringification used for generating MLIR types, e.g. tensor<?x?xf32>.
+def int_or_question_mark(s: DimSize):
+  return s.value or "?"
+
+
+# Stringification used for generating alphanumeric identifiers, e.g.
+# func @somefunction_DYNxDYNxf32, where we can't use "?" characters.
+def int_or_DYN(s: DimSize):
+  return s.value or "DYN"
+
+
+# Describes the fully resolved static dimensions of all 3 input matrices,
+# LHS, RHS, and Accumulator, in a testcase.
+# Each value is a string, which may either represent a positive integer such as "123",
+# or a "?" string, meaning a dynamic dimension as in MLIR.
+# These string values are used to generate MLIR function names and tensor shapes.
+@dataclasses.dataclass
+class TestInputMatricesStaticShapes:
+  lhs_rows: DimSize
+  lhs_cols: DimSize
+  rhs_rows: DimSize
+  rhs_cols: DimSize
+  acc_rows: DimSize
+  acc_cols: DimSize
+
+
+# Helper for generate_function. Generates TestInputMatricesStaticShapes, i.e.
+# converts from the runtime shape dimensions in TestShape and given dynamicity to
+# the set of static shapes to be used in a test function's input tensors.
+def generate_static_shapes(shape: TestShape, dynamicity: Dynamicity):
+  return TestInputMatricesStaticShapes(
+      lhs_rows=static_size(shape.m, dynamicity),
+      lhs_cols=static_size(shape.k, dynamicity),
+      rhs_rows=static_size(shape.k, dynamicity),
+      rhs_cols=static_size(shape.n, dynamicity),
+      acc_rows=static_size(shape.m, dynamicity),
+      acc_cols=static_size(shape.n, dynamicity),
+  )
+
+
+# Helper for generate_function.
+# Generates a name for a test function in the generated MLIR code.
+def generate_function_name(lhs_rhs_type: MatrixElemTypeId,
+                           acc_type: MatrixElemTypeId,
+                           static_shapes: TestInputMatricesStaticShapes):
+  input_t = lhs_rhs_type.value
+  acc_t = acc_type.value
+  lhs_m = int_or_DYN(static_shapes.lhs_rows)
+  lhs_k = int_or_DYN(static_shapes.lhs_cols)
+  rhs_k = int_or_DYN(static_shapes.rhs_rows)
+  rhs_n = int_or_DYN(static_shapes.rhs_cols)
+  acc_m = int_or_DYN(static_shapes.acc_rows)
+  acc_n = int_or_DYN(static_shapes.acc_cols)
+  return f"matmul_{lhs_m}x{lhs_k}x{input_t}_times_{rhs_k}x{rhs_n}x{input_t}_into_{acc_m}x{acc_n}x{acc_t}"
+
+
+# Represents a generated test function.
+@dataclasses.dataclass
+class MLIRFunction:
+  name: str
+  definition: str
+
+
 # Generates a test function in the generated MLIR code.
 # The generated function will take the same arguments as linalg.matmul and
 # will just call linalg.matmul with them, returning its result.
-def generate_function(func_name, lhs_rhs_type, accum_type, shape, gen):
-  (m, k, n) = shape
-  lhs_m = static_size(m, gen[3])
-  lhs_k = static_size(k, gen[3])
-  rhs_k = static_size(k, gen[3])
-  rhs_n = static_size(n, gen[3])
-  acc_m = static_size(m, gen[3])
-  acc_n = static_size(n, gen[3])
-  lhs_tensor_type = f"tensor<{lhs_m}x{lhs_k}x{lhs_rhs_type}>"
-  rhs_tensor_type = f"tensor<{rhs_k}x{rhs_n}x{lhs_rhs_type}>"
-  acc_tensor_type = f"tensor<{acc_m}x{acc_n}x{accum_type}>"
-  return (
+def generate_function(lhs_rhs_type: MatrixElemTypeId,
+                      acc_type: MatrixElemTypeId, shape: TestShape,
+                      dynamicity: Dynamicity):
+  static_shapes = generate_static_shapes(shape, dynamicity)
+  func_name = generate_function_name(lhs_rhs_type, acc_type, static_shapes)
+  lhs_m = int_or_question_mark(static_shapes.lhs_rows)
+  lhs_k = int_or_question_mark(static_shapes.lhs_cols)
+  rhs_k = int_or_question_mark(static_shapes.rhs_rows)
+  rhs_n = int_or_question_mark(static_shapes.rhs_cols)
+  acc_m = int_or_question_mark(static_shapes.acc_rows)
+  acc_n = int_or_question_mark(static_shapes.acc_cols)
+  lhs_tensor_type = f"tensor<{lhs_m}x{lhs_k}x{lhs_rhs_type.value}>"
+  rhs_tensor_type = f"tensor<{rhs_k}x{rhs_n}x{lhs_rhs_type.value}>"
+  acc_tensor_type = f"tensor<{acc_m}x{acc_n}x{acc_type.value}>"
+  func_definition = (
       f"func @{func_name}(%lhs: {lhs_tensor_type}, %rhs: {rhs_tensor_type}, %acc: {acc_tensor_type}) -> {acc_tensor_type} {{\n"
       f"  %result = linalg.matmul ins(%lhs, %rhs: {lhs_tensor_type}, {rhs_tensor_type}) outs(%acc: {acc_tensor_type}) -> {acc_tensor_type}\n"
       f"  return %result: {acc_tensor_type}\n"
       f"}}\n")
+  return MLIRFunction(
+      name=func_name,
+      definition=func_definition,
+  )
 
 
 # Intentionally fixed seed! We want full reproducibility here, both across runs
@@ -172,12 +332,12 @@ def generate_function(func_name, lhs_rhs_type, accum_type, shape, gen):
 
 
 # Generates a contents_generator tag to use in the output trace.
-def contents_generator_tag(generator):
-  if generator == "zero":
+def contents_generator_tag(generator: MatrixGenerator):
+  if generator == MatrixGenerator.ZERO:
     return ""
-  elif generator == "identity":
+  elif generator == MatrixGenerator.IDENTITY:
     return "!tag:iree:identity_matrix"
-  elif generator == "random":
+  elif generator == MatrixGenerator.RANDOM:
     global pseudorandom_generator_seed
     pseudorandom_generator_seed = pseudorandom_generator_seed + 1
     return f"!tag:iree:fully_specified_pseudorandom {pseudorandom_generator_seed}"
@@ -187,11 +347,13 @@ def contents_generator_tag(generator):
 
 # Generate a matrix function argument in the output trace, as a dictionary
 # to be passed to yaml.dump.
-def generate_trace_matrix_arg(matrix_shape, element_type, generator):
+def generate_trace_matrix_arg(matrix_shape: list,
+                              element_type: MatrixElemTypeId,
+                              generator: MatrixGenerator):
   result = {
       "type": "hal.buffer_view",
       "shape": matrix_shape,
-      "element_type": element_type,
+      "element_type": element_type.value,
   }
   generator_tag = contents_generator_tag(generator)
   if generator_tag:
@@ -201,12 +363,14 @@ def generate_trace_matrix_arg(matrix_shape, element_type, generator):
 
 # Generates the output trace for a testcase i.e. a single test function call,
 # as a dictionary to be passed to yaml.dump.
-def generate_trace(func_name, lhs_rhs_type, acc_type, shape, gen):
-  (m, k, n) = shape
-  lhs_arg = generate_trace_matrix_arg([m, k], lhs_rhs_type, gen[0])
-  rhs_arg = generate_trace_matrix_arg([k, n], lhs_rhs_type, gen[1])
-  acc_arg = generate_trace_matrix_arg([m, n], acc_type, gen[2])
-  result_arg = generate_trace_matrix_arg([m, n], acc_type, "zero")
+def generate_trace(func_name: str, lhs_rhs_type: MatrixElemTypeId,
+                   acc_type: MatrixElemTypeId, shape: TestShape,
+                   gen: TestGenerator):
+  lhs_arg = generate_trace_matrix_arg([shape.m, shape.k], lhs_rhs_type, gen.lhs)
+  rhs_arg = generate_trace_matrix_arg([shape.k, shape.n], lhs_rhs_type, gen.rhs)
+  acc_arg = generate_trace_matrix_arg([shape.m, shape.n], acc_type, gen.acc)
+  result_arg = generate_trace_matrix_arg([shape.m, shape.n], acc_type,
+                                         MatrixGenerator.ZERO)
   return {
       "type": "call",
       "function": "module." + func_name,
@@ -220,25 +384,24 @@ def generate_trace(func_name, lhs_rhs_type, acc_type, shape, gen):
 
 
 # Generates all output files' contents as strings.
-def generate(args):
-  functions = {}
+def generate(lhs_rhs_type: MatrixElemTypeId, acc_type: MatrixElemTypeId,
+             shapes_id: ShapesId):
+  function_definitions = {}
   traces = []
-  lhs_rhs_type = args.lhs_rhs_type
-  accum_type = 'i32' if lhs_rhs_type == 'i8' else lhs_rhs_type
-  for shape in get_test_shapes()[args.shapes]:
-    for gen in get_test_generators()[args.shapes]:
-      func_name = function_name(lhs_rhs_type, accum_type, shape, gen)
+  for shape in get_test_shapes(shapes_id):
+    for gen in get_test_generators(shapes_id):
+      function = generate_function(lhs_rhs_type, acc_type, shape,
+                                   gen.dynamicity)
       # Different testcases may differ only by runtime parameters but
       # share the same code. For example, dynamic-shapes testcases
       # share the same code involing tensor<?x?xf32> even though the runtime
       # value in the trace are different. That's why we call
       # generate_function conditionally, and generate_trace unconditionally.
-      if func_name not in functions:
-        functions[func_name] = generate_function(func_name, lhs_rhs_type,
-                                                 accum_type, shape, gen)
+      if function.name not in function_definitions:
+        function_definitions[function.name] = function.definition
       traces.append(
-          generate_trace(func_name, lhs_rhs_type, accum_type, shape, gen))
-  return (functions, traces)
+          generate_trace(function.name, lhs_rhs_type, acc_type, shape, gen))
+  return (function_definitions, traces)
 
 
 def parse_arguments():
@@ -258,7 +421,7 @@ def parse_arguments():
                       required=True)
   parser.add_argument("--shapes",
                       type=str,
-                      choices=["small", "large"],
+                      choices=[s.value for s in ShapesId],
                       help="Collection of matrix shapes to test",
                       required=True)
   parser.add_argument(
@@ -271,10 +434,10 @@ def parse_arguments():
   return parser.parse_args()
 
 
-def write_code_file(functions, filename):
+def write_code_file(function_definitions, filename):
   with open(filename, "w") as file:
-    for funcname in functions:
-      file.write(functions[funcname] + "\n")
+    for funcname in function_definitions:
+      file.write(function_definitions[funcname] + "\n")
 
 
 def write_trace_file(traces, filename, module_path):
@@ -310,9 +473,23 @@ def write_trace_file(traces, filename, module_path):
     file.write(processed_yaml)
 
 
+# For now, the accumulator type can always be inferred from the input LHS/RHS
+# type, so we do that. That is temporary: eventually there will be cases
+# where the same input types are used with different accumulator types, e.g.
+# f16 inputs with both f16 and f32 accumulator.
+def infer_acc_type(lhs_rhs_type: MatrixElemTypeId):
+  if lhs_rhs_type == MatrixElemTypeId.I8:
+    return MatrixElemTypeId.I32
+  else:
+    return lhs_rhs_type
+
+
 def main(args):
-  (functions, traces) = generate(args)
-  write_code_file(functions, args.output_code)
+  lhs_rhs_type = MatrixElemTypeId(args.lhs_rhs_type)
+  acc_type = infer_acc_type(lhs_rhs_type)
+  shapes_id = ShapesId(args.shapes)
+  (function_definitions, traces) = generate(lhs_rhs_type, acc_type, shapes_id)
+  write_code_file(function_definitions, args.output_code)
   write_trace_file(traces, args.output_trace, args.module_path)
 
 
diff --git a/iree/test/e2e/regression/lowering_config.mlir b/iree/test/e2e/regression/lowering_config.mlir
index a70c4910f6e5..17b401fec304 100644
--- a/iree/test/e2e/regression/lowering_config.mlir
+++ b/iree/test/e2e/regression/lowering_config.mlir
@@ -1,11 +1,17 @@
-#config1 = {tileSizes = [[32, 32, 32]], passPipeline = 1 : i32}
-#config2 = {tileSizes = [[64, 64, 64]], passPipeline = 1 : i32}
+#compilation0 = #iree_codegen.compilation.info<
+    #iree_codegen.lowering.config<tile_sizes = [[32, 32, 32]], native_vector_size = []>,
+    #iree_codegen.translation.info<"CPUVectorization", workload_per_wg = [32, 32]>,
+    workgroup_size = []>
+#compilation1 = #iree_codegen.compilation.info<
+    #iree_codegen.lowering.config<tile_sizes = [[64, 64, 64]], native_vector_size = []>,
+    #iree_codegen.translation.info<"CPUVectorization", workload_per_wg = [64, 64]>,
+    workgroup_size = []>
 func @lowering_config_test() {
   %a = util.unfoldable_constant dense<1.0> : tensor<128x256xf32>
   %b = util.unfoldable_constant dense<2.0> : tensor<256x512xf32>
   %c = util.unfoldable_constant dense<2.0> : tensor<256x1024xf32>
-  %d = "mhlo.dot"(%a, %b) {lowering.config = #config1} : (tensor<128x256xf32>, tensor<256x512xf32>) -> tensor<128x512xf32>
-  %e = "mhlo.dot"(%a, %c) {lowering.config = #config2} : (tensor<128x256xf32>, tensor<256x1024xf32>) -> tensor<128x1024xf32>
+  %d = "mhlo.dot"(%a, %b) {compilation.info = #compilation0} : (tensor<128x256xf32>, tensor<256x512xf32>) -> tensor<128x512xf32>
+  %e = "mhlo.dot"(%a, %c) {compilation.info = #compilation1} : (tensor<128x256xf32>, tensor<256x1024xf32>) -> tensor<128x1024xf32>
   check.expect_almost_eq_const(%d, dense<512.0> : tensor<128x512xf32>) : tensor<128x512xf32>
   check.expect_almost_eq_const(%e, dense<512.0> : tensor<128x1024xf32>) : tensor<128x1024xf32>
   return
diff --git a/iree/tools/BUILD b/iree/tools/BUILD
index 2723b386a396..9a68ab2a6b1a 100644
--- a/iree/tools/BUILD
+++ b/iree/tools/BUILD
@@ -100,6 +100,7 @@ cc_library(
     deps = [
         "//iree/compiler/Bindings/Native/Transforms",
         "//iree/compiler/Bindings/TFLite/Transforms",
+        "//iree/compiler/Codegen/Dialect:IREECodegenDialect",
         "//iree/compiler/Dialect/Flow/IR",
         "//iree/compiler/Dialect/Flow/Transforms",
         "//iree/compiler/Dialect/HAL/IR:HALDialect",
diff --git a/iree/tools/init_iree_dialects.h b/iree/tools/init_iree_dialects.h
index b37493471664..184af7755894 100644
--- a/iree/tools/init_iree_dialects.h
+++ b/iree/tools/init_iree_dialects.h
@@ -14,6 +14,7 @@
 
 #include "iree-dialects/Dialect/IREE/IREEDialect.h"
 #include "iree-dialects/Dialect/IREEPyDM/IR/Dialect.h"
+#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h"
 #include "iree/compiler/Dialect/Flow/IR/FlowDialect.h"
 #include "iree/compiler/Dialect/HAL/IR/HALDialect.h"
 #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtDialect.h"
@@ -32,7 +33,8 @@ namespace iree_compiler {
 // Add all the IREE dialects to the provided registry.
 inline void registerIreeDialects(DialectRegistry &registry) {
   // clang-format off
-  registry.insert<IREE::Flow::FlowDialect,
+  registry.insert<IREE::Codegen::IREECodegenDialect,
+                  IREE::Flow::FlowDialect,
                   IREE::HAL::HALDialect,
                   ShapeDialect,
                   IREE::Stream::StreamDialect,
diff --git a/iree/vm/bytecode_disasm.c b/iree/vm/bytecode_disasm.c
index 1bdff07a906c..2dbeed0fb134 100644
--- a/iree/vm/bytecode_disasm.c
+++ b/iree/vm/bytecode_disasm.c
@@ -1001,6 +1001,7 @@ iree_status_t iree_vm_bytecode_disasm_op(
       IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", "));
       EMIT_I32_REG_NAME(index_reg);
       EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]);
+      EMIT_TYPE_NAME(type_def);
       break;
     }
 
@@ -1068,6 +1069,7 @@ iree_status_t iree_vm_bytecode_disasm_op(
       IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " : "));
       EMIT_REF_REG_NAME(false_value_reg);
       EMIT_OPTIONAL_VALUE_REF(&regs->ref[false_value_reg]);
+      EMIT_TYPE_NAME(type_def);
       break;
     }