From 95a0478a36da253f2a27b1c17f8d16d4b34baa63 Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Mon, 25 Oct 2021 11:13:00 -0700 Subject: [PATCH 01/22] [docs] Don't imply that venv should be closed immediately --- docs/website/docs/building-from-source/optional-features.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/website/docs/building-from-source/optional-features.md b/docs/website/docs/building-from-source/optional-features.md index 216f63e8f6df..b29458608923 100644 --- a/docs/website/docs/building-from-source/optional-features.md +++ b/docs/website/docs/building-from-source/optional-features.md @@ -79,7 +79,8 @@ package manager ([about](https://docs.python.org/3/library/venv.html), python -m pip install -r bindings\python\build_requirements.txt ``` -When done, close your shell or run `deactivate`. +When you are done with the venv, you can close it by closing your shell +or running `deactivate`. ### Usage From 6e825ae7a65f8984e901cb548dfa01ebce8eadc1 Mon Sep 17 00:00:00 2001 From: MaheshRavishankar <1663364+MaheshRavishankar@users.noreply.github.com> Date: Mon, 25 Oct 2021 13:37:52 -0700 Subject: [PATCH 02/22] Move `LoweringConfigAttr` and `TranslationInfoAttr` to `AttrDef`. (#7385) Previously these were defined using StructAttr which is limited in its capabilities. Moving them to AttrDef allows this to be more robust and verifiable. Also allows specification of custom builder methods that remove a lot of the bioler plate. In addition to the above, this change also adds a new attribute, CompilationInfoAttr. This is the attribute that when added to operations like linalg.matmul/linalg.*conv* (or their mhlo/tosa couterparts) use the information from the specified attribute instead of the default heuristics. This allows an external search to search for better values than the defaults used. While there are some preliminary verifies in place, additional verifiers are needed to ensure that the user-provided attribute has all the information needed (and in a consistent manner) to achieve correct compilation. In addition, all these attributes are moved from the HAL dialect into a new IREECodegenDialect added to compiler/Codegen/Dialect. This change also brings in some code from D111594 to allow for this to land without having to wait for that patch to land. --- iree/compiler/Codegen/Common/BUILD | 1 + iree/compiler/Codegen/Common/CMakeLists.txt | 1 + .../Codegen/Common/SetNumWorkgroupsPass.cpp | 12 +- iree/compiler/Codegen/Dialect/BUILD | 107 +++ iree/compiler/Codegen/Dialect/CMakeLists.txt | 62 ++ .../Codegen/Dialect/IREECodegenAttributes.td | 14 + .../Codegen/Dialect/IREECodegenDialect.cpp | 62 ++ .../Codegen/Dialect/IREECodegenDialect.h | 17 + .../Codegen/Dialect/IREECodegenDialect.td | 44 ++ .../Codegen/Dialect/LoweringConfig.cpp | 632 ++++++++++++++++++ .../compiler/Codegen/Dialect/LoweringConfig.h | 152 +++++ .../Codegen/Dialect/LoweringConfig.td | 192 ++++++ iree/compiler/Codegen/Dialect/test/BUILD | 30 + .../Codegen/Dialect/test/CMakeLists.txt | 23 + .../Dialect/test/lowering_config_attr.mlir | 37 + iree/compiler/Codegen/LLVMCPU/BUILD | 1 + iree/compiler/Codegen/LLVMCPU/CMakeLists.txt | 1 + .../Codegen/LLVMCPU/KernelDispatch.cpp | 48 +- .../compiler/Codegen/LLVMCPU/KernelDispatch.h | 2 +- .../LLVMCPU/LLVMCPULowerExecutableTarget.cpp | 26 +- ...LLVMCPUTileAndVectorizeLinalgTensorOps.cpp | 12 +- .../Codegen/LLVMCPU/LLVMCPUVectorization.cpp | 11 +- .../materialize_launch_configuration.mlir | 109 +-- .../LLVMCPU/test/matmul_vectorization.mlir | 4 +- .../LLVMCPU/test/tile_and_vectorize.mlir | 4 +- iree/compiler/Codegen/LLVMGPU/BUILD | 1 + iree/compiler/Codegen/LLVMGPU/CMakeLists.txt | 1 + .../compiler/Codegen/LLVMGPU/KernelConfig.cpp | 71 +- iree/compiler/Codegen/LLVMGPU/KernelConfig.h | 1 - .../LLVMGPUDistributeSharedMemoryCopy.cpp | 1 + .../LLVMGPU/LLVMGPULowerExecutableTarget.cpp | 26 +- .../LLVMGPU/LLVMGPURemoveTrivialLoops.cpp | 8 +- .../LLVMGPU/LLVMGPUTileAndDistribute.cpp | 31 +- .../LLVMGPU/test/distribute_to_thread.mlir | 12 +- .../LLVMGPU/test/gpu_set_num_workgroups.mlir | 56 +- .../Codegen/LLVMGPU/test/remove_loops.mlir | 6 +- iree/compiler/Codegen/Passes.h | 1 - iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp | 2 + iree/compiler/Codegen/SPIRV/BUILD | 1 + iree/compiler/Codegen/SPIRV/CMakeLists.txt | 1 + .../Codegen/SPIRV/ConvertToSPIRVPass.cpp | 2 +- iree/compiler/Codegen/SPIRV/KernelConfig.cpp | 54 +- iree/compiler/Codegen/SPIRV/KernelConfig.h | 2 +- iree/compiler/Codegen/SPIRV/MaliConfig.cpp | 2 + iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp | 6 +- .../SPIRV/SPIRVLowerExecutableTargetPass.cpp | 26 +- .../SPIRV/SPIRVRemoveOneTripTiledLoops.cpp | 7 +- .../Codegen/SPIRV/SPIRVTileAndDistribute.cpp | 14 +- .../SPIRVTileAndVectorizeToCooperativeOps.cpp | 2 +- .../SPIRV/test/config_adreno_conv.mlir | 115 ++-- .../SPIRV/test/config_adreno_matmul.mlir | 193 +++--- .../SPIRV/test/config_default_matmul.mlir | 46 +- .../SPIRV/test/config_linalg_ext_ops.mlir | 60 +- .../Codegen/SPIRV/test/config_linalg_ops.mlir | 10 +- .../Codegen/SPIRV/test/config_mali_conv.mlir | 114 ++-- .../SPIRV/test/config_mali_matmul.mlir | 191 +++--- .../config_nvidia_matmul_cooperative_ops.mlir | 32 +- .../test/pipeline_matmul_vectorization.mlir | 12 +- .../test/remove_one_trip_tiled_loop.mlir | 8 +- .../test/tile_and_distribute_scatter.mlir | 6 +- .../SPIRV/test/tile_and_distribute_sort.mlir | 8 +- .../SPIRV/test/tile_and_vectorize.mlir | 29 +- .../test/tile_and_vectorize_batch_matmul.mlir | 12 +- .../SPIRV/test/tile_and_vectorize_conv.mlir | 12 +- .../SPIRV/test/tile_and_vectorize_matmul.mlir | 12 +- ...tile_and_vectorize_to_cooperative_ops.mlir | 10 +- .../SPIRV/test/vectorize_elementwise_ops.mlir | 6 +- .../Codegen/SPIRV/test/vectorize_matmul.mlir | 5 +- iree/compiler/Codegen/Utils/Utils.cpp | 61 +- iree/compiler/Codegen/Utils/Utils.h | 37 +- iree/compiler/Dialect/HAL/IR/BUILD | 43 -- iree/compiler/Dialect/HAL/IR/CMakeLists.txt | 28 - iree/compiler/Dialect/HAL/IR/HALDialect.cpp | 4 - .../Dialect/HAL/IR/LoweringConfig.cpp | 153 ----- iree/compiler/Dialect/HAL/IR/LoweringConfig.h | 163 ----- .../compiler/Dialect/HAL/IR/LoweringConfig.td | 79 --- iree/compiler/Dialect/HAL/Target/CUDA/BUILD | 1 + .../Dialect/HAL/Target/CUDA/CUDATarget.cpp | 3 +- iree/compiler/Dialect/HAL/Target/LLVM/BUILD | 1 + .../Dialect/HAL/Target/LLVM/CMakeLists.txt | 1 + .../Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp | 2 + .../Dialect/HAL/Target/MetalSPIRV/BUILD | 1 + .../HAL/Target/MetalSPIRV/CMakeLists.txt | 1 + .../Target/MetalSPIRV/MetalSPIRVTarget.cpp | 4 +- iree/compiler/Dialect/HAL/Target/ROCM/BUILD | 1 + .../Dialect/HAL/Target/ROCM/CMakeLists.txt | 1 + .../Dialect/HAL/Target/ROCM/ROCMTarget.cpp | 3 +- iree/compiler/Dialect/HAL/Target/VMVX/BUILD | 1 + .../Dialect/HAL/Target/VMVX/CMakeLists.txt | 1 + .../Dialect/HAL/Target/VMVX/VMVXTarget.cpp | 4 +- .../Dialect/HAL/Target/VulkanSPIRV/BUILD | 1 + .../HAL/Target/VulkanSPIRV/CMakeLists.txt | 1 + .../Target/VulkanSPIRV/VulkanSPIRVTarget.cpp | 5 +- iree/test/e2e/regression/lowering_config.mlir | 14 +- iree/tools/BUILD | 1 + iree/tools/init_iree_dialects.h | 4 +- 96 files changed, 2201 insertions(+), 1237 deletions(-) create mode 100644 iree/compiler/Codegen/Dialect/BUILD create mode 100644 iree/compiler/Codegen/Dialect/CMakeLists.txt create mode 100644 iree/compiler/Codegen/Dialect/IREECodegenAttributes.td create mode 100644 iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp create mode 100644 iree/compiler/Codegen/Dialect/IREECodegenDialect.h create mode 100644 iree/compiler/Codegen/Dialect/IREECodegenDialect.td create mode 100644 iree/compiler/Codegen/Dialect/LoweringConfig.cpp create mode 100644 iree/compiler/Codegen/Dialect/LoweringConfig.h create mode 100644 iree/compiler/Codegen/Dialect/LoweringConfig.td create mode 100644 iree/compiler/Codegen/Dialect/test/BUILD create mode 100644 iree/compiler/Codegen/Dialect/test/CMakeLists.txt create mode 100644 iree/compiler/Codegen/Dialect/test/lowering_config_attr.mlir delete mode 100644 iree/compiler/Dialect/HAL/IR/LoweringConfig.cpp delete mode 100644 iree/compiler/Dialect/HAL/IR/LoweringConfig.h delete mode 100644 iree/compiler/Dialect/HAL/IR/LoweringConfig.td diff --git a/iree/compiler/Codegen/Common/BUILD b/iree/compiler/Codegen/Common/BUILD index 6f71b73b74a9..0593b9b1b5bb 100644 --- a/iree/compiler/Codegen/Common/BUILD +++ b/iree/compiler/Codegen/Common/BUILD @@ -46,6 +46,7 @@ cc_library( deps = [ "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common:FoldTensorExtractOpIncGen", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/Transforms", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/Flow/IR", diff --git a/iree/compiler/Codegen/Common/CMakeLists.txt b/iree/compiler/Codegen/Common/CMakeLists.txt index 5e77f084ba4b..ce29221d09ec 100644 --- a/iree/compiler/Codegen/Common/CMakeLists.txt +++ b/iree/compiler/Codegen/Common/CMakeLists.txt @@ -56,6 +56,7 @@ iree_cc_library( MLIRTransforms MLIRVector iree::compiler::Codegen::Common::FoldTensorExtractOpIncGen + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::Transforms iree::compiler::Codegen::Utils diff --git a/iree/compiler/Codegen/Common/SetNumWorkgroupsPass.cpp b/iree/compiler/Codegen/Common/SetNumWorkgroupsPass.cpp index 0f70605ec154..06f3f51ff1ca 100644 --- a/iree/compiler/Codegen/Common/SetNumWorkgroupsPass.cpp +++ b/iree/compiler/Codegen/Common/SetNumWorkgroupsPass.cpp @@ -4,14 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/Transforms/Transforms.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/Flow/IR/FlowOps.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -90,14 +89,9 @@ void SetNumWorkgroupsPass::runOnOperation() { if (!workloadPerWorkgroup.empty()) { currWorkloadPerWorkgroup.assign(workloadPerWorkgroup.begin(), workloadPerWorkgroup.end()); - } else if (IREE::HAL::TranslationInfo translationInfo = + } else if (IREE::Codegen::TranslationInfoAttr translationInfo = getTranslationInfo(entryPointOp)) { - if (ArrayAttr workloadPerWorkgroupAttr = - translationInfo.workloadPerWorkgroup()) { - currWorkloadPerWorkgroup = llvm::to_vector<4>(llvm::map_range( - workloadPerWorkgroupAttr, - [](Attribute attr) { return attr.cast().getInt(); })); - } + currWorkloadPerWorkgroup = translationInfo.getWorkloadPerWorkgroupVals(); } if (!currWorkloadPerWorkgroup.empty()) { diff --git a/iree/compiler/Codegen/Dialect/BUILD b/iree/compiler/Codegen/Dialect/BUILD new file mode 100644 index 000000000000..9c6008b7ec0c --- /dev/null +++ b/iree/compiler/Codegen/Dialect/BUILD @@ -0,0 +1,107 @@ +# Copyright 2019 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +load("@llvm-project//mlir:tblgen.bzl", "gentbl_cc_library", "td_library") +load("//build_tools/bazel:enforce_glob.bzl", "enforce_glob") + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], + licenses = ["notice"], # Apache 2.0 +) + +exports_files([ + "IREECodegenAttributes.td", + "IREECodegenDialect.td", + "LoweringConfig.td", +]) + +td_library( + name = "td_files", + srcs = enforce_glob( + [ + "IREECodegenAttributes.td", + "IREECodegenDialect.td", + "LoweringConfig.td", + ], + include = ["*.td"], + ), + deps = [ + "@llvm-project//mlir:OpBaseTdFiles", + ], +) + +cc_library( + name = "IREECodegenDialect", + srcs = [ + "IREECodegenDialect.cpp", + "LoweringConfig.cpp", + ], + hdrs = [ + "IREECodegenDialect.h", + "LoweringConfig.h", + ], + textual_hdrs = [ + "IREECodegenDialect.cpp.inc", + "IREECodegenDialect.h.inc", + "LoweringConfig.cpp.inc", + "LoweringConfig.h.inc", + "LoweringConfigEnums.cpp.inc", + "LoweringConfigEnums.h.inc", + ], + deps = [ + ":IREECodegenDialectGen", + ":LoweringConfigGen", + "//iree/compiler/Codegen/Utils", + "@llvm-project//llvm:Support", + "@llvm-project//mlir:DialectUtils", + "@llvm-project//mlir:IR", + "@llvm-project//mlir:Parser", + "@llvm-project//mlir:StandardOps", + ], +) + +gentbl_cc_library( + name = "IREECodegenDialectGen", + tbl_outs = [ + ( + ["-gen-dialect-decls"], + "IREECodegenDialect.h.inc", + ), + ( + ["-gen-dialect-defs"], + "IREECodegenDialect.cpp.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "IREECodegenAttributes.td", + deps = [":td_files"], +) + +gentbl_cc_library( + name = "LoweringConfigGen", + tbl_outs = [ + ( + ["-gen-attrdef-decls"], + "LoweringConfig.h.inc", + ), + ( + ["-gen-attrdef-defs"], + "LoweringConfig.cpp.inc", + ), + ( + ["-gen-enum-decls"], + "LoweringConfigEnums.h.inc", + ), + ( + ["-gen-enum-defs"], + "LoweringConfigEnums.cpp.inc", + ), + ], + tblgen = "@llvm-project//mlir:mlir-tblgen", + td_file = "LoweringConfig.td", + deps = [":td_files"], +) diff --git a/iree/compiler/Codegen/Dialect/CMakeLists.txt b/iree/compiler/Codegen/Dialect/CMakeLists.txt new file mode 100644 index 000000000000..16f6826cb558 --- /dev/null +++ b/iree/compiler/Codegen/Dialect/CMakeLists.txt @@ -0,0 +1,62 @@ +################################################################################ +# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # +# iree/compiler/Codegen/Dialect/BUILD # +# # +# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # +# CMake-only content. # +# # +# To disable autogeneration for this file entirely, delete this header. # +################################################################################ + +iree_add_all_subdirs() + +iree_cc_library( + NAME + IREECodegenDialect + HDRS + "IREECodegenDialect.h" + "LoweringConfig.h" + TEXTUAL_HDRS + "IREECodegenDialect.cpp.inc" + "IREECodegenDialect.h.inc" + "LoweringConfig.cpp.inc" + "LoweringConfig.h.inc" + "LoweringConfigEnums.cpp.inc" + "LoweringConfigEnums.h.inc" + SRCS + "IREECodegenDialect.cpp" + "LoweringConfig.cpp" + DEPS + ::IREECodegenDialectGen + ::LoweringConfigGen + LLVMSupport + MLIRIR + MLIRParser + MLIRStandard + iree::compiler::Codegen::Utils + PUBLIC +) + +iree_tablegen_library( + NAME + IREECodegenDialectGen + TD_FILE + "IREECodegenAttributes.td" + OUTS + -gen-dialect-decls IREECodegenDialect.h.inc + -gen-dialect-defs IREECodegenDialect.cpp.inc +) + +iree_tablegen_library( + NAME + LoweringConfigGen + TD_FILE + "LoweringConfig.td" + OUTS + -gen-attrdef-decls LoweringConfig.h.inc + -gen-attrdef-defs LoweringConfig.cpp.inc + -gen-enum-decls LoweringConfigEnums.h.inc + -gen-enum-defs LoweringConfigEnums.cpp.inc +) + +### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/iree/compiler/Codegen/Dialect/IREECodegenAttributes.td b/iree/compiler/Codegen/Dialect/IREECodegenAttributes.td new file mode 100644 index 000000000000..e5f8b2607de0 --- /dev/null +++ b/iree/compiler/Codegen/Dialect/IREECodegenAttributes.td @@ -0,0 +1,14 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef IREE_CODEGEN_DIALECT_IREECODEGEN_ATTRIBUTES +#define IREE_CODEGEN_DIALECT_IREECODEGEN_ATTRIBUTES + +include "iree/compiler/Codegen/Dialect/IREECodegenDialect.td" +include "iree/compiler/Codegen/Dialect/LoweringConfig.td" + + +#endif // IREE_CODEGEN_DIALECT_IREECODEGEN_ATTRIBUTES diff --git a/iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp b/iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp new file mode 100644 index 000000000000..8f2cf4769979 --- /dev/null +++ b/iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp @@ -0,0 +1,62 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" + +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.cpp.inc" +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" +#include "mlir/IR/DialectImplementation.h" + +namespace mlir { +namespace iree_compiler { +namespace IREE { +namespace Codegen { + +struct IREECodegenDialectOpAsmInterface : public OpAsmDialectInterface { + using OpAsmDialectInterface::OpAsmDialectInterface; + AliasResult getAlias(Attribute attr, raw_ostream &os) const override { + if (attr.isa()) { + os << "translation"; + return AliasResult::OverridableAlias; + } else if (attr.isa()) { + os << "compilation"; + return AliasResult::OverridableAlias; + } else if (attr.isa()) { + os << "config"; + return AliasResult::OverridableAlias; + } + return AliasResult::NoAlias; + } +}; + +void IREECodegenDialect::initialize() { + initializeCodegenAttrs(); + addInterfaces(); +} + +Attribute IREECodegenDialect::parseAttribute(DialectAsmParser &parser, + Type type) const { + StringRef mnemonic; + if (failed(parser.parseKeyword(&mnemonic))) return {}; + Attribute genAttr; + OptionalParseResult parseResult = + parseCodegenAttrs(parser, mnemonic, type, genAttr); + if (parseResult.hasValue()) return genAttr; + parser.emitError(parser.getNameLoc(), "unknown iree_codegen attribute"); + return Attribute(); +} + +void IREECodegenDialect::printAttribute(Attribute attr, + DialectAsmPrinter &p) const { + if (failed(printCodegenAttrs(attr, p))) { + llvm_unreachable("unhandled iree_codegen attribute"); + } +} + +} // namespace Codegen +} // namespace IREE +} // namespace iree_compiler +} // namespace mlir diff --git a/iree/compiler/Codegen/Dialect/IREECodegenDialect.h b/iree/compiler/Codegen/Dialect/IREECodegenDialect.h new file mode 100644 index 000000000000..bdb94731af1e --- /dev/null +++ b/iree/compiler/Codegen/Dialect/IREECodegenDialect.h @@ -0,0 +1,17 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef IREE_COMPILER_CODEGEN_DIALECT_IREECODEGEN_DIALECT_H_ +#define IREE_COMPILER_CODEGEN_DIALECT_IREECODEGEN_DIALECT_H_ + +#include "mlir/IR/Dialect.h" +#include "mlir/IR/OpDefinition.h" + +// clang-format off: must be included after all LLVM/MLIR eaders +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h.inc" // IWYU pragma: keep +// clang-format on + +#endif // IREE_COMPILER_CODEGEN_DIALECT_IREECODEGEN_DIALECT_H_ diff --git a/iree/compiler/Codegen/Dialect/IREECodegenDialect.td b/iree/compiler/Codegen/Dialect/IREECodegenDialect.td new file mode 100644 index 000000000000..b1233e490e0e --- /dev/null +++ b/iree/compiler/Codegen/Dialect/IREECodegenDialect.td @@ -0,0 +1,44 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef IREE_CODEGEN_DIALECT_IREECODEGEN_DIALECT +#define IREE_CODEGEN_DIALECT_IREECODEGEN_DIALECT + +include "mlir/IR/OpBase.td" + +//===----------------------------------------------------------------------===// +// IREE Codegen dialect +//===----------------------------------------------------------------------===// + +def IREECodegen_Dialect : Dialect { + let name = "iree_codegen"; + let cppNamespace = "::mlir::iree_compiler::IREE::Codegen"; + + let summary = [{ + A dialect representing attributes used by the IREE Code generation. + }]; + let description = [{ + This dialect is primarily meant to hold attributes that carry the + state of the compilation when lowered to scalar code for an + architecture. Typically, a backend starts by analysing the entry + point functions within the `hal.executable.variant` and deciding + which compilation pipeline to chose. During this, even the values + for parameters such as tile sizes, etc. are also decided. The rest + of the compilation flow does not make any heuristic decisions, + rather just looks at the values of the decision specified in + attributes that belong to this dialect. This allows an external + search to easily override the heuristics that are hard-coded + within a backend. + }]; + let extraClassDeclaration = [{ + void initializeCodegenAttrs(); + OptionalParseResult parseCodegenAttrs(DialectAsmParser &parser, + StringRef mnemonic, Type type, Attribute &value) const; + LogicalResult printCodegenAttrs(Attribute attr, DialectAsmPrinter &p) const; + }]; +} + +#endif // IREE_CODEGEN_DIALECT_IREECODEGEN_DIALECT \ No newline at end of file diff --git a/iree/compiler/Codegen/Dialect/LoweringConfig.cpp b/iree/compiler/Codegen/Dialect/LoweringConfig.cpp new file mode 100644 index 000000000000..79990d6eaf4b --- /dev/null +++ b/iree/compiler/Codegen/Dialect/LoweringConfig.cpp @@ -0,0 +1,632 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" + +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" +#include "llvm/ADT/TypeSwitch.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/IR/DialectImplementation.h" + +#define GET_ATTRDEF_CLASSES +#include "iree/compiler/Codegen/Dialect/LoweringConfig.cpp.inc" +#include "iree/compiler/Codegen/Dialect/LoweringConfigEnums.cpp.inc" + +static const char kConfigAttrName[] = "lowering.config"; +static const char kTranslationInfoAttrName[] = "translation.info"; +static const char kCompilationInfoAttrName[] = "compilation.info"; + +namespace mlir { +namespace iree_compiler { + +//===----------------------------------------------------------------------===// +// Utility function for common code patterns. +//===----------------------------------------------------------------------===// + +static bool checkIntegerArrayAttr(ArrayAttr arrayAttr) { + return !llvm::any_of(arrayAttr, + [](Attribute attr) { return !attr.isa(); }); +} + +/// Returns an `ArrayAttr` where each element is an `IntegerAttr` of `IndexType` +/// whose values is obtained from `values`. +static ArrayAttr getIndexIntegerArrayAttr(MLIRContext *context, + ArrayRef values) { + auto attrs = llvm::to_vector<4>( + llvm::map_range(values, [&context](int64_t value) -> Attribute { + return IntegerAttr::get(IndexType::get(context), APInt(64, value)); + })); + return ArrayAttr::get(context, attrs); +} + +/// Returns an `ArrayAttr` where each element is an `IntegerAttr` of 64-bit +/// integer type whose values is obtained from `values`. +static ArrayAttr getI64IntegerArrayAttr(MLIRContext *context, + ArrayRef values) { + auto attrs = llvm::to_vector<4>( + llvm::map_range(values, [&context](int64_t value) -> Attribute { + return IntegerAttr::get(IntegerType::get(context, 64), + APInt(64, value)); + })); + return ArrayAttr::get(context, attrs); +} + +/// Assumes that `arrayAttr` is a list of `IntegerAttr`s and returns the values +/// in these attributes as a vector. +static SmallVector getIntegerVals(ArrayAttr arrayAttr) { + if (!arrayAttr) return {}; + SmallVector values(arrayAttr.size()); + for (auto attr : llvm::enumerate(arrayAttr)) { + values[attr.index()] = attr.value().cast().getInt(); + } + return values; +} + +namespace IREE { +namespace Codegen { + +namespace { + +// TODO(ravishankarm): The IREEFieldParser is part of the patch D111594 (where +// it is called ::mlir::FieldParser). Remove this when the upstream change lands +// in IREE. + +//===----------------------------------------------------------------------===// +// Parse Fields +//===----------------------------------------------------------------------===// + +/// Provide a template class that can be specialized by users to dispatch to +/// parsers. Auto-generated parsers generate calls to +/// `IREEFieldParser::parse`, where `T` is the parameter storage type, to +/// parse custom types. +template +struct IREEFieldParser; + +/// Parse an attribute. +template +struct IREEFieldParser< + AttributeT, std::enable_if_t::value, + AttributeT>> { + static FailureOr parse(DialectAsmParser &parser) { + AttributeT value; + if (parser.parseAttribute(value)) return failure(); + return value; + } +}; + +/// Parse any integer. +template +struct IREEFieldParser::value, IntT>> { + static FailureOr parse(DialectAsmParser &parser) { + IntT value; + if (parser.parseInteger(value)) return failure(); + return value; + } +}; + +/// Parse a string. +template <> +struct IREEFieldParser { + static FailureOr parse(DialectAsmParser &parser) { + std::string value; + if (parser.parseString(&value)) return failure(); + return value; + } +}; + +/// Parse any container that supports back insertion as a list. +template +struct IREEFieldParser< + ContainerT, std::enable_if_t::value, + ContainerT>> { + using ElementT = typename ContainerT::value_type; + static FailureOr parse(DialectAsmParser &parser) { + ContainerT elements; + auto elementParser = [&]() { + auto element = IREEFieldParser::parse(parser); + if (failed(element)) return failure(); + elements.push_back(element.getValue()); + return success(); + }; + if (parser.parseCommaSeparatedList(elementParser)) return failure(); + return elements; + } +}; +} // namespace + +//===----------------------------------------------------------------------===// +// iree_codegen.translation.info +//===----------------------------------------------------------------------===// + +TranslationInfoAttr TranslationInfoAttr::get( + MLIRContext *context, DispatchLoweringPassPipeline passPipeline, + ArrayRef workloadPerWorkgroup) { + auto pipelineAttr = StringAttr::get(context, stringifyEnum(passPipeline)); + ArrayAttr workloadPerWorkgroupAttr = + getI64IntegerArrayAttr(context, workloadPerWorkgroup); + return get(context, pipelineAttr, workloadPerWorkgroupAttr); +} + +DispatchLoweringPassPipeline +TranslationInfoAttr::getDispatchLoweringPassPipeline() { + Optional passPipeline = + symbolizeEnum(getPassPipeline().getValue()); + return passPipeline.getValue(); +} + +SmallVector TranslationInfoAttr::getWorkloadPerWorkgroupVals() { + return getIntegerVals(getWorkloadPerWorkgroup()); +} + +LogicalResult TranslationInfoAttr::verify( + function_ref emitError, StringAttr passPipeline, + ArrayAttr workloadPerWorkgroup) { + if (!passPipeline) { + return emitError() << "missing pass pipeline specification"; + } + auto passPipelineValue = + symbolizeEnum( + passPipeline.getValue()); + if (!passPipelineValue) { + return emitError() << "invalid pass pipeline value : " + << passPipeline.getValue(); + } + if (!workloadPerWorkgroup) { + return emitError() << "expected workload_per_wg to be specified (even if " + "specified as empty)"; + } + if (!checkIntegerArrayAttr(workloadPerWorkgroup)) { + return emitError() << "expected workload_per_wg to be an IntegerAttr list"; + } + return success(); +} + +::mlir::Attribute TranslationInfoAttr::parse(::mlir::DialectAsmParser &parser, + ::mlir::Type attrType) { + ::mlir::FailureOr _result_passPipeline; + ::mlir::FailureOr _result_workloadPerWorkgroup; + // Parse literal '<' + if (parser.parseLess()) return {}; + // Parse variable 'passPipeline' + _result_passPipeline = IREEFieldParser::parse(parser); + if (failed(_result_passPipeline)) { + parser.emitError(parser.getCurrentLocation(), + "failed to parse IREECodegen_TranslationInfoAttr " + "parameter 'passPipeline' which is to be a `StringAttr`"); + return {}; + } + // Parse literal ',' + if (parser.parseComma()) return {}; + // Parse literal 'workload_per_wg' + if (parser.parseKeyword("workload_per_wg")) return {}; + // Parse literal '=' + if (parser.parseEqual()) return {}; + // Parse variable 'workloadPerWorkgroup' + _result_workloadPerWorkgroup = IREEFieldParser::parse(parser); + if (failed(_result_workloadPerWorkgroup)) { + parser.emitError( + parser.getCurrentLocation(), + "failed to parse IREECodegen_TranslationInfoAttr parameter " + "'workloadPerWorkgroup' which is to be a `ArrayAttr`"); + return {}; + } + // Parse literal '>' + if (parser.parseGreater()) return {}; + return TranslationInfoAttr::get(parser.getContext(), + _result_passPipeline.getValue(), + _result_workloadPerWorkgroup.getValue()); +} + +void TranslationInfoAttr::print(::mlir::DialectAsmPrinter &printer) const { + printer << "translation.info"; + printer << "<"; + printer << getPassPipeline(); + printer << ","; + printer << ' ' << "workload_per_wg"; + printer << ' ' << "="; + printer << ' '; + printer << getWorkloadPerWorkgroup(); + printer << ">"; +} + +//===----------------------------------------------------------------------===// +// iree_codegen.lowering.config +//===----------------------------------------------------------------------===// + +LoweringConfigAttr LoweringConfigAttr::get(MLIRContext *context, + TileSizesListTypeRef tileSizes, + ArrayRef nativeVectorSize) { + auto attrList = llvm::to_vector<4>( + llvm::map_range(tileSizes, [&](ArrayRef sizes) -> Attribute { + return getI64IntegerArrayAttr(context, sizes); + })); + ArrayAttr tileSizesAttr = ArrayAttr::get(context, attrList); + ArrayAttr nativeVectorSizeAttr = + getI64IntegerArrayAttr(context, nativeVectorSize); + return get(context, tileSizesAttr, nativeVectorSizeAttr); +} + +TileSizesListType LoweringConfigAttr::getTileSizeVals() { + auto tileSizesAttr = getTileSizes(); + if (!tileSizesAttr) return {}; + TileSizesListType tileSizes; + for (auto attr : tileSizesAttr) { + auto vals = getIntegerVals(attr.cast()); + tileSizes.emplace_back(std::move(vals)); + } + return tileSizes; +} + +SmallVector LoweringConfigAttr::getTileSizeVals(unsigned level) { + ArrayAttr tileSizesAttr = getTileSizes(); + if (!tileSizesAttr || tileSizesAttr.size() <= level) return {}; + return getIntegerVals(tileSizesAttr[level].cast()); +} + +SmallVector LoweringConfigAttr::getNativeVectorSizeVals() { + ArrayAttr nativeVectorSizeAttr = getNativeVectorSize(); + if (!nativeVectorSizeAttr) return {}; + return getIntegerVals(nativeVectorSizeAttr); +} + +LogicalResult LoweringConfigAttr::verify( + function_ref emitError, ArrayAttr tileSizes, + ArrayAttr nativeVectorSize) { + if (!tileSizes) { + return emitError() << "expected tile_sizes to be specified (even is " + "specified as empty)"; + } + if (llvm::any_of(tileSizes, [](Attribute attr) { + auto arrayAttr = attr.dyn_cast(); + return !arrayAttr || !checkIntegerArrayAttr(arrayAttr); + })) { + return emitError() + << "expected all elements of tile_sizes to be a list of integers"; + } + if (!nativeVectorSize) { + return emitError() << "expected native_vector_size to be specified (even " + "if specified as empty)"; + } + if (!checkIntegerArrayAttr(nativeVectorSize)) { + return emitError() + << "expected native_vector_size to be a list of integer values"; + } + return success(); +} + +::mlir::Attribute LoweringConfigAttr::parse(::mlir::DialectAsmParser &parser, + ::mlir::Type attrType) { + ::mlir::FailureOr _result_tileSizes; + ::mlir::FailureOr _result_nativeVectorSize; + // Parse literal '<' + if (parser.parseLess()) return {}; + // Parse literal 'tile_sizes' + if (parser.parseKeyword("tile_sizes")) return {}; + // Parse literal '=' + if (parser.parseEqual()) return {}; + // Parse variable 'tileSizes' + _result_tileSizes = IREEFieldParser::parse(parser); + if (failed(_result_tileSizes)) { + parser.emitError(parser.getCurrentLocation(), + "failed to parse IREECodegen_LoweringConfigAttr parameter " + "'tileSizes' which is to be a `ArrayAttr`"); + return {}; + } + // Parse literal ',' + if (parser.parseComma()) return {}; + // Parse literal 'native_vector_size' + if (parser.parseKeyword("native_vector_size")) return {}; + // Parse literal '=' + if (parser.parseEqual()) return {}; + // Parse variable 'nativeVectorSize' + _result_nativeVectorSize = IREEFieldParser::parse(parser); + if (failed(_result_nativeVectorSize)) { + parser.emitError(parser.getCurrentLocation(), + "failed to parse IREECodegen_LoweringConfigAttr parameter " + "'nativeVectorSize' which is to be a `ArrayAttr`"); + return {}; + } + // Parse literal '>' + if (parser.parseGreater()) return {}; + return LoweringConfigAttr::get(parser.getContext(), + _result_tileSizes.getValue(), + _result_nativeVectorSize.getValue()); +} + +void LoweringConfigAttr::print(::mlir::DialectAsmPrinter &printer) const { + printer << "lowering.config"; + printer << "<"; + printer << "tile_sizes"; + printer << ' ' << "="; + printer << ' '; + printer << getTileSizes(); + printer << ","; + printer << ' ' << "native_vector_size"; + printer << ' ' << "="; + printer << ' '; + printer << getNativeVectorSize(); + printer << ">"; +} + +//===----------------------------------------------------------------------===// +// iree.compilation.info +//===----------------------------------------------------------------------===// + +CompilationInfoAttr CompilationInfoAttr::get(MLIRContext *context, + TileSizesListTypeRef tileSizes, + ArrayRef nativeVectorSize, + ArrayRef workgroupSize) { + LoweringConfigAttr configAttr = + LoweringConfigAttr::get(context, tileSizes, nativeVectorSize); + TranslationInfoAttr translationInfo = + TranslationInfoAttr::get(context, DispatchLoweringPassPipeline::None); + ArrayAttr workgroupSizeAttr = getI64IntegerArrayAttr(context, workgroupSize); + return get(context, configAttr, translationInfo, workgroupSizeAttr); +} + +CompilationInfoAttr CompilationInfoAttr::get( + MLIRContext *context, TileSizesListTypeRef tileSizes, + ArrayRef nativeVectorSize, + DispatchLoweringPassPipeline passPipeline, + ArrayRef workloadPerWorkgroup, ArrayRef workgroupSize) { + LoweringConfigAttr configAttr = + LoweringConfigAttr::get(context, tileSizes, nativeVectorSize); + TranslationInfoAttr translationInfoAttr = + TranslationInfoAttr::get(context, passPipeline, workloadPerWorkgroup); + ArrayAttr workgroupSizeAttr = getI64IntegerArrayAttr(context, workgroupSize); + return get(context, configAttr, translationInfoAttr, workgroupSizeAttr); +} + +LogicalResult CompilationInfoAttr::verify( + function_ref emitError, + LoweringConfigAttr loweringConfig, TranslationInfoAttr translationInfo, + ArrayAttr workgroupSize) { + if (!loweringConfig) { + return emitError() << "missing lowering config"; + } + if (failed( + LoweringConfigAttr::verify(emitError, loweringConfig.getTileSizes(), + loweringConfig.getNativeVectorSize()))) { + return failure(); + } + if (!translationInfo) { + return emitError() << "missing translation info"; + } + if (failed(TranslationInfoAttr::verify( + emitError, translationInfo.getPassPipeline(), + translationInfo.getWorkloadPerWorkgroup()))) { + return failure(); + } + if (!workgroupSize) { + return emitError() << "expected workgroup_size to be specified (even if " + "specified empty)"; + } + if (!checkIntegerArrayAttr(workgroupSize)) { + return emitError() << "expected workgroup_size to be a list of integers"; + } + return success(); +} + +/// Parser method that is copied from the auto-generated using `assemblyFormat` +/// available with patch D111594. Replace after that change is in IREE. +::mlir::Attribute CompilationInfoAttr::parse(::mlir::DialectAsmParser &parser, + ::mlir::Type attrType) { + ::mlir::FailureOr _result_loweringConfig; + ::mlir::FailureOr _result_translationInfo; + ::mlir::FailureOr _result_workgroupSize; + // Parse literal '<' + if (parser.parseLess()) return {}; + // Parse variable 'loweringConfig' + _result_loweringConfig = IREEFieldParser::parse(parser); + if (failed(_result_loweringConfig)) { + parser.emitError( + parser.getCurrentLocation(), + "failed to parse IREECodegen_CompilationInfoAttr parameter " + "'loweringConfig' which is to be a `LoweringConfigAttr`"); + return {}; + } + // Parse literal ',' + if (parser.parseComma()) return {}; + // Parse variable 'translationInfo' + _result_translationInfo = IREEFieldParser::parse(parser); + if (failed(_result_translationInfo)) { + parser.emitError( + parser.getCurrentLocation(), + "failed to parse IREECodegen_CompilationInfoAttr parameter " + "'translationInfo' which is to be a `TranslationInfoAttr`"); + return {}; + } + // Parse literal ',' + if (parser.parseComma()) return {}; + // Parse literal 'workgroup_size' + if (parser.parseKeyword("workgroup_size")) return {}; + // Parse literal '=' + if (parser.parseEqual()) return {}; + // Parse variable 'workgroupSize' + _result_workgroupSize = IREEFieldParser::parse(parser); + if (failed(_result_workgroupSize)) { + parser.emitError(parser.getCurrentLocation(), + "failed to parse IREECodegen_CompilationInfoAttr " + "parameter 'workgroupSize' which is to be a `ArrayAttr`"); + return {}; + } + // Parse literal '>' + if (parser.parseGreater()) return {}; + return CompilationInfoAttr::get( + parser.getContext(), _result_loweringConfig.getValue(), + _result_translationInfo.getValue(), _result_workgroupSize.getValue()); +} + +/// Printer method that is copied from the auto-generated using `assemblyFormat` +/// available with patch D111594. Replace after that change is in IREE. +void CompilationInfoAttr::print(::mlir::DialectAsmPrinter &printer) const { + printer << "compilation.info"; + printer << "<"; + printer << getLoweringConfig(); + printer << ","; + printer << ' '; + printer << getTranslationInfo(); + printer << ","; + printer << ' ' << "workgroup_size"; + printer << ' ' << "="; + printer << ' '; + printer << getWorkgroupSize(); + printer << ">"; +} + +SmallVector CompilationInfoAttr::getWorkgroupSizeVals() { + ArrayAttr workgroupSizeAttr = getWorkgroupSize(); + if (!workgroupSizeAttr) return {}; + return getIntegerVals(workgroupSizeAttr); +} + +//===----------------------------------------------------------------------===// +// Initialize attributes +//===----------------------------------------------------------------------===// + +void IREECodegenDialect::initializeCodegenAttrs() { + addAttributes< +#define GET_ATTRDEF_LIST +#include "iree/compiler/Codegen/Dialect/LoweringConfig.cpp.inc" // IWYU pragma: keeep + >(); +} + +OptionalParseResult IREECodegenDialect::parseCodegenAttrs( + DialectAsmParser &parser, StringRef mnemonic, Type type, + Attribute &value) const { + return generatedAttributeParser(parser, mnemonic, type, value); +} + +LogicalResult IREECodegenDialect::printCodegenAttrs( + Attribute attr, DialectAsmPrinter &p) const { + return generatedAttributePrinter(attr, p); +} + +} // namespace Codegen +} // namespace IREE + +//===----------------------------------------------------------------------===// +// Helpers for getting/setting iree_codegen.translation.info attribute on the +// `hal.executable.entry_point` +// ===----------------------------------------------------------------------===// + +IREE::Codegen::TranslationInfoAttr getTranslationInfo( + IREE::HAL::ExecutableEntryPointOp entryPointOp) { + return entryPointOp->getAttrOfType( + kTranslationInfoAttrName); +} + +SmallVector getWorkgroupSize( + IREE::HAL::ExecutableEntryPointOp entryPointOp) { + if (Optional workgroupSizeAttrList = + entryPointOp.workgroup_size()) { + return getIntegerVals(*workgroupSizeAttrList); + } + return {}; +} + +void setTranslationInfo(IREE::HAL::ExecutableEntryPointOp entryPointOp, + IREE::Codegen::TranslationInfoAttr translationInfo, + ArrayRef workgroupSize) { + entryPointOp->setAttr(kTranslationInfoAttrName, translationInfo); + // The workgroup size is set on the entry point op directly. + if (!workgroupSize.empty()) { + MLIRContext *context = entryPointOp->getContext(); + auto attrs = getIndexIntegerArrayAttr(context, workgroupSize); + entryPointOp.workgroup_sizeAttr(attrs); + } +} + +//===----------------------------------------------------------------------===// +// Helpers for getting/setting `iree_codegen.lowering.config` attribute on root +// operations. +// ===----------------------------------------------------------------------===// + +IREE::Codegen::LoweringConfigAttr getLoweringConfig(Operation *op) { + return op->getAttrOfType(kConfigAttrName); +} + +SmallVector getTileSizes(Operation *op, unsigned level) { + IREE::Codegen::LoweringConfigAttr configAttr = getLoweringConfig(op); + if (!configAttr) return {}; + return configAttr.getTileSizeVals(level); +} +SmallVector getTileSizes(OpBuilder &b, Operation *op, + unsigned level) { + return llvm::to_vector<4>( + llvm::map_range(getTileSizes(op, level), [&](int64_t t) -> Value { + return b.create(op->getLoc(), t); + })); +} + +void setLoweringConfig(Operation *op, + IREE::Codegen::LoweringConfigAttr config) { + op->setAttr(kConfigAttrName, config); +} + +LogicalResult setOpConfigAndEntryPointFnTranslation( + FuncOp entryPointFn, Operation *op, + IREE::Codegen::LoweringConfigAttr config, + IREE::Codegen::DispatchLoweringPassPipeline passPipeline, + ArrayRef workgroupSize) { + auto partitionedLoops = getPartitionedLoops(op); + SmallVector workloadPerWorkgroup; + auto tileSizes = config.getTileSizeVals(0); + if (!tileSizes.empty() && !partitionedLoops.empty()) { + for (unsigned depth : partitionedLoops) { + if (depth >= tileSizes.size()) { + return op->emitOpError( + "illegal configuration for lowering op, expect first level " + "tile size to contain at least ") + << partitionedLoops.back() << " elements"; + } + if (tileSizes[depth] == 0) { + return op->emitOpError("illegal to set tilesize of loop ") + << depth + << " to zero since it is set to be partitioned at the flow " + "level"; + } + workloadPerWorkgroup.push_back(tileSizes[depth]); + } + if (!workloadPerWorkgroup.empty()) { + workloadPerWorkgroup = + llvm::to_vector<3>(llvm::reverse(workloadPerWorkgroup)); + } + } + auto entryPointOp = getEntryPoint(entryPointFn); + if (!entryPointOp) { + return entryPointFn.emitOpError( + "unable to find entry point op for entry point function"); + } + auto translationInfo = IREE::Codegen::TranslationInfoAttr::get( + entryPointOp->getContext(), passPipeline, workloadPerWorkgroup); + setTranslationInfo(entryPointOp, translationInfo, workgroupSize); + return success(); +} + +//===----------------------------------------------------------------------===// +// Helpers for getting/setting `iree_codegen.compilation.info` attribute on root +// operations to override IREEs default compilation. +// ===----------------------------------------------------------------------===// + +IREE::Codegen::CompilationInfoAttr getCompilationInfo(Operation *op) { + return op->getAttrOfType( + kCompilationInfoAttrName); +} + +void setCompilationInfo(Operation *op, + IREE::Codegen::CompilationInfoAttr config) { + op->setAttr(kCompilationInfoAttrName, config); +} + +void eraseCompilationInfo(Operation *op) { + op->removeAttr(kCompilationInfoAttrName); +} + +} // namespace iree_compiler +} // namespace mlir diff --git a/iree/compiler/Codegen/Dialect/LoweringConfig.h b/iree/compiler/Codegen/Dialect/LoweringConfig.h new file mode 100644 index 000000000000..6d99215f7e8f --- /dev/null +++ b/iree/compiler/Codegen/Dialect/LoweringConfig.h @@ -0,0 +1,152 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +//===- LoweringConfig.h - Declares configuration for lowering Linalg ops --===// +// +// This file declares an attribute that drives how a dispatch region containing +// a set of operations are lowered. The attribute itself is attached to Linalg +// operations, and help converting a Linalg operation into "scalar code". +// +//===----------------------------------------------------------------------===// + +#ifndef IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_ +#define IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_ + +#include "iree/compiler/Codegen/Utils/Utils.h" +#include "iree/compiler/Dialect/HAL/IR/HALOps.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" + +namespace mlir { +namespace iree_compiler { +/// Typedef for tile sizes to use at different levels of tiling. +using TileSizesListType = SmallVector>; +using TileSizesListTypeRef = ArrayRef>; +} // namespace iree_compiler +} // namespace mlir + +// clang-format off +#include "iree/compiler/Codegen/Dialect/LoweringConfigEnums.h.inc" +#define GET_ATTRDEF_CLASSES +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h.inc" +// clang-format on + +namespace mlir { +namespace iree_compiler { +//===----------------------------------------------------------------------===// +// Helpers for getting/setting iree_codegen.translation.info attribute on the +// `hal.executable.entry_point` +// ===----------------------------------------------------------------------===// + +/// Gets the translate executable info attribute value associated with +/// `entryPointOp`. It expects that the attribute is stored using the identifier +/// `translation.info`. +IREE::Codegen::TranslationInfoAttr getTranslationInfo( + IREE::HAL::ExecutableEntryPointOp entryPointOp); +/// Returns the translation info for the `funcOp` (by looking at the entry +/// point). Returns `nullptr` on failure. +inline IREE::Codegen::TranslationInfoAttr getTranslationInfo(FuncOp funcOp) { + auto entryPointOp = getEntryPoint(funcOp); + if (!entryPointOp) return nullptr; + return getTranslationInfo(entryPointOp); +} + +/// Returns the workgroup size specified on the `entryPointOp`. +SmallVector getWorkgroupSize( + IREE::HAL::ExecutableEntryPointOp entryPointOp); + +/// Set the translate executable info with the entry point op. Overwrites the +/// existing attributes. +// TODO(ravishankarm, benvanik): Eventually all the information needed for the +// lowering will be consolidated into a single attribute with richer +// information. +void setTranslationInfo(IREE::HAL::ExecutableEntryPointOp entryPointOp, + IREE::Codegen::TranslationInfoAttr translationInfo, + ArrayRef workgroupSize = {}); +inline void setTranslationInfo( + FuncOp entryPointFn, IREE::Codegen::TranslationInfoAttr translationInfo, + ArrayRef workgroupSize = {}) { + auto entryPointOp = getEntryPoint(entryPointFn); + return setTranslationInfo(entryPointOp, translationInfo, workgroupSize); +} + +/// Sets the translation info on the `hal.executable.entry_point` op +/// corresponding to the `entryPointFn`. Returns failure if a translation info +/// is already set on the entry point op and is incompatible with what is being +/// set. +inline void setTranslationInfo( + FuncOp entryPointFn, + IREE::Codegen::DispatchLoweringPassPipeline passPipeline, + ArrayRef workloadPerWorkgroup, ArrayRef workgroupSize) { + auto entryPointOp = getEntryPoint(entryPointFn); + MLIRContext *context = entryPointFn.getContext(); + auto translationInfo = IREE::Codegen::TranslationInfoAttr::get( + context, passPipeline, workloadPerWorkgroup); + setTranslationInfo(entryPointOp, translationInfo, workgroupSize); +} + +//===----------------------------------------------------------------------===// +// Helpers for getting/setting `iree_codegen.lowering.config` attribute on root +// operations. +// ===----------------------------------------------------------------------===// + +/// Returns the lowering configuration set for an operation. Returns `nullptr` +/// if no value is set. It expects that the attribute is stored using the +/// identifier `lowering.config`. +IREE::Codegen::LoweringConfigAttr getLoweringConfig(Operation *op); + +/// Returns the tile sizes for a particular operation if the +/// `iree_codegen.lowering.config` attribute is set on it. +SmallVector getTileSizes(Operation *op, unsigned level); +SmallVector getTileSizes(OpBuilder &b, Operation *op, unsigned level); + +/// Sets the lowering configuration, overwriting existing attribute values. +void setLoweringConfig(Operation *op, IREE::Codegen::LoweringConfigAttr config); + +/// Sets translation for the entry-point function based on op configuration. +LogicalResult setOpConfigAndEntryPointFnTranslation( + FuncOp entryPointFn, Operation *op, + IREE::Codegen::LoweringConfigAttr config, + IREE::Codegen::DispatchLoweringPassPipeline passPipeline, + ArrayRef workgroupSize = {}); +inline LogicalResult setOpConfigAndEntryPointFnTranslation( + FuncOp entryPointFn, Operation *op, TileSizesListTypeRef tileSizes, + ArrayRef nativeVectorSize, + IREE::Codegen::DispatchLoweringPassPipeline passPipeline, + ArrayRef workgroupSize = {}) { + MLIRContext *context = entryPointFn.getContext(); + auto config = IREE::Codegen::LoweringConfigAttr::get(context, tileSizes, + nativeVectorSize); + setLoweringConfig(op, config); + return setOpConfigAndEntryPointFnTranslation(entryPointFn, op, config, + passPipeline, workgroupSize); +} + +//===----------------------------------------------------------------------===// +// Helpers for getting/setting `iree_codegen.compilation.info` attribute on root +// operations to override IREEs default compilation. +// ===----------------------------------------------------------------------===// + +/// Returns the `#iree_codegen.compilation.info` set on the operation. Assumes +/// that the identifier used is `compilation.info`. +IREE::Codegen::CompilationInfoAttr getCompilationInfo(Operation *op); + +/// Sets the `config` to use for compiling the operation. If `op` is the root +/// operation of the dispatch region, overrides the default configuration that +/// is used for compilation. +void setCompilationInfo(Operation *op, + IREE::Codegen::CompilationInfoAttr config); + +/// Removes the `#iree_codegen.compilation.info` attribute that is set on the +/// operation. +void eraseCompilationInfo(Operation *op); + +} // namespace iree_compiler +} // namespace mlir + +#endif // IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_ diff --git a/iree/compiler/Codegen/Dialect/LoweringConfig.td b/iree/compiler/Codegen/Dialect/LoweringConfig.td new file mode 100644 index 000000000000..13489da1e152 --- /dev/null +++ b/iree/compiler/Codegen/Dialect/LoweringConfig.td @@ -0,0 +1,192 @@ +// Copyright 2021 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef IREE_COMPILER_CODEGEN_DIALECT_LOWERINGCONFIG +#define IREE_COMPILER_CODEGEN_DIALECT_LOWERINGCONFIG + +include "iree/compiler/Codegen/Dialect/IREECodegenDialect.td" + +// List of pre-existing pipelines for translating executables. +def CPU_Default + : StrEnumAttrCase<"CPUDefault">; +def CPU_Vectorization + : StrEnumAttrCase<"CPUVectorization">; +def CPU_TensorToVectors + : StrEnumAttrCase<"CPUTensorToVectors">; + +def LLVMGPU_SimpleDistribute + : StrEnumAttrCase<"LLVMGPUDistribute">; +def LLVMGPU_Vectorize + : StrEnumAttrCase<"LLVMGPUVectorize">; +def LLVMGPU_MatmulSimt + : StrEnumAttrCase<"LLVMGPUMatmulSimt">; + +def SPIRV_SimpleDistribute + : StrEnumAttrCase<"SPIRVDistribute">; +def SPIRV_Vectorize + : StrEnumAttrCase<"SPIRVVectorize">; +def SPIRV_DistributeToGlobalID + : StrEnumAttrCase<"SPIRVDistributeToGlobalID">; +def SPIRV_VectorizeToCooperativeOps + : StrEnumAttrCase<"SPIRVVectorizeToCooperativeOps">; + +def None + : StrEnumAttrCase<"None">; + +// EnumAttrCase for all known lowerings for ops within dispatch region +// to scalar/native-vector code. +def DispatchLoweringPassPipelineEnum : StrEnumAttr< + "DispatchLoweringPassPipeline", + "identifier for pass pipeline use to lower dispatch region", + [CPU_Default, CPU_TensorToVectors, CPU_Vectorization, + LLVMGPU_SimpleDistribute, LLVMGPU_Vectorize, LLVMGPU_MatmulSimt, + SPIRV_SimpleDistribute, SPIRV_DistributeToGlobalID, + SPIRV_Vectorize, SPIRV_VectorizeToCooperativeOps, None]> { + let cppNamespace = "::mlir::iree_compiler::IREE::Codegen"; +} + +def IREECodegen_TranslationInfoAttr : + AttrDef { + let mnemonic = "translation.info"; + let summary = [{drive dispatch entry point lowering}]; + let description = [{ + Specifies the information that is used to drive the translation of + an entry point function using Linalg based structured-op + lowering.. During executable translation this is attached to the + `hal.executable.entry_point` operation. + + If this operation is already set on the root operation (as part of + `iree_codegen.compilation.info`) that drives the compilation of a + dispatch region (like `linalg.matmul`/`linalg.*conv*`), this + attribute gets propagated to the entry point function. + + The fields are + - `passPipeline` : The pass pipeline to use. + - `workloadPerWorkgroup` : Specifies how much of the original + `workload` is handled by a workgroup along `x`, `y` and `z`. If + left empty it implies that that there is a single workgroup that + does the entire `workload`. + + }]; + + // TODO(ravishankarm): Commented out till patch D111594 lands. + // let assemblyFormat = [{ + // `<` $passPipeline `,` `workload_per_wg` `=` $workloadPerWorkgroup `>` + // }]; + + let parameters = (ins + AttrParameter<"StringAttr", "">:$passPipeline, + AttrParameter<"ArrayAttr", "">:$workloadPerWorkgroup + ); + let builders = [ + AttrBuilder<(ins "DispatchLoweringPassPipeline":$passPipeline, + CArg<"ArrayRef", "{}">:$workloadPerWorkgroup)> + ]; + let extraClassDeclaration = [{ + // Returns the lowering pass pipeline set. + DispatchLoweringPassPipeline getDispatchLoweringPassPipeline(); + + // Returns values of the workloadPerWorkgroup field if set. + SmallVector getWorkloadPerWorkgroupVals(); + }]; + let genVerifyDecl = 1; +} + +def IREECodegen_LoweringConfigAttr : + AttrDef { + let mnemonic = "lowering.config"; + let summary = [{drive lowering of an operation within dispatch region}]; + let description = [{ + Specifies the information that is used by backend compiler to + translate an operation to scalar code. The way the information is + used is specific to each backend (indeed specific to the pass + pipeline used) to compile that operation. + + TODO: Currently there is no verification that the configuration + specifies everything needed for a pass-pipeline. The values to set + for these parameters is dependent on the pass-pipeline + implementation. In future, each pass pipeline could verify that + the lowering configuration has all the necessary attributes for + the pipeline. + + }]; + + // TODO(ravishankarm): Commented out till patch D111594 lands. + // let assemblyFormat = [{ + // `<` `tile_sizes` `=` $tileSizes `,` `native_vector_size` `=` $nativeVectorSize `>` + // }]; + + let parameters = (ins + AttrParameter<"ArrayAttr", "">:$tileSizes, + AttrParameter<"ArrayAttr", "">:$nativeVectorSize + ); + let builders = [ + AttrBuilder<(ins "TileSizesListTypeRef":$tileSizes, + CArg<"ArrayRef", "{}">:$nativeVectorSize)> + ]; + let extraClassDeclaration = [{ + // Returns the tile sizes for all levels set for the op. + TileSizesListType getTileSizeVals(); + + // Returns the tile sizes for a level set for the op. + SmallVector getTileSizeVals(unsigned level = 0); + + // Returns the native vector size to use. + SmallVector getNativeVectorSizeVals(); + }]; + let genVerifyDecl = 1; +} + +def IREECodegen_CompilationInfoAttr : + AttrDef { + let mnemonic = "compilation.info"; + let summary = [{drive lowering of an operation from input dialect}]; + let description = [{ + Specifies the information that allows controlling the compilation + of operations like `linalg.matmul`/`linalg.*conv` within + IREE. This information is used to override the defaults used by + the IREE compiler. Currently it is only valid to set this on + `linalg.matmul`/`linalg.*conv*` operations. + + TODO: It is expected that the `TranslationInfoAttr` and the + `LoweringConfigAttr` are specified. Currently there is no + verification that the values of the `LoweringConfigAttr` fully + specifies the behaviour of the compilation path chosen with + `TranslationInfoAttr`. This could be added in the future. Note: + Typically the values used for the first-level tiling in + `LoweringConfigAttr` and `workload_per_wg` value in the + `TranslationInfoAttr` are the same since the first-level of tile + + distribute is already done at the `Flow` level. This verification + is also a TODO. + }]; + let parameters = (ins + AttrParameter<"LoweringConfigAttr", "">:$loweringConfig, + AttrParameter<"TranslationInfoAttr", "">:$translationInfo, + AttrParameter<"ArrayAttr", "">:$workgroupSize + ); + + // TODO(ravishankarm): Commented out till patch D111594 lands. + // let assemblyFormat = [{ + // `<` $loweringConfig `,` $translationInfo `,` `workgroup_size` `=` $workgroupSize `>` + // }]; + + let builders = [ + AttrBuilder<(ins "TileSizesListTypeRef":$tileSizes, + "ArrayRef":$nativeVectorSize, + CArg<"ArrayRef", "{}">:$workgroupSize)>, + AttrBuilder<(ins "TileSizesListTypeRef":$tileSizes, + "ArrayRef":$nativeVectorSize, + "DispatchLoweringPassPipeline":$passPipeline, + "ArrayRef":$workloadPerWorkgroup, + CArg<"ArrayRef", "{}">:$workgroupSize)>, + ]; + let extraClassDeclaration = [{ + SmallVector getWorkgroupSizeVals(); + }]; + let genVerifyDecl = 1; +} + +#endif // IREE_COMPILER_CODEGEN_DIALECT_LOWERINGCONFIG diff --git a/iree/compiler/Codegen/Dialect/test/BUILD b/iree/compiler/Codegen/Dialect/test/BUILD new file mode 100644 index 000000000000..704286c9afdd --- /dev/null +++ b/iree/compiler/Codegen/Dialect/test/BUILD @@ -0,0 +1,30 @@ +# Copyright 2021 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# Tests for common transforms. + +load("//iree:lit_test.bzl", "iree_lit_test_suite") +load("//build_tools/bazel:enforce_glob.bzl", "enforce_glob") + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], + licenses = ["notice"], # Apache 2.0 +) + +iree_lit_test_suite( + name = "lit", + srcs = enforce_glob( + [ + "lowering_config_attr.mlir", + ], + include = ["*.mlir"], + ), + data = [ + "//iree/tools:IreeFileCheck", + "//iree/tools:iree-opt", + ], +) diff --git a/iree/compiler/Codegen/Dialect/test/CMakeLists.txt b/iree/compiler/Codegen/Dialect/test/CMakeLists.txt new file mode 100644 index 000000000000..4de932a994f6 --- /dev/null +++ b/iree/compiler/Codegen/Dialect/test/CMakeLists.txt @@ -0,0 +1,23 @@ +################################################################################ +# Autogenerated by build_tools/bazel_to_cmake/bazel_to_cmake.py from # +# iree/compiler/Codegen/Dialect/test/BUILD # +# # +# Use iree_cmake_extra_content from iree/build_defs.oss.bzl to add arbitrary # +# CMake-only content. # +# # +# To disable autogeneration for this file entirely, delete this header. # +################################################################################ + +iree_add_all_subdirs() + +iree_lit_test_suite( + NAME + lit + SRCS + "lowering_config_attr.mlir" + DATA + iree::tools::IreeFileCheck + iree::tools::iree-opt +) + +### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/iree/compiler/Codegen/Dialect/test/lowering_config_attr.mlir b/iree/compiler/Codegen/Dialect/test/lowering_config_attr.mlir new file mode 100644 index 000000000000..363999672367 --- /dev/null +++ b/iree/compiler/Codegen/Dialect/test/lowering_config_attr.mlir @@ -0,0 +1,37 @@ +// RUN: iree-opt -split-input-file %s | IreeFileCheck %s + +module attributes { + translation.info = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [32, 42]> +} { } +// CHECK: #translation = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [32, 42]> + +// ----- + +module attributes { + translation.info = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []> +} { } +// CHECK: #translation = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []> + +// ----- + +module attributes { + lowering.config = #iree_codegen.lowering.config +} { } +// CHECK: #config = #iree_codegen.lowering.config + +// ----- + +module attributes { + lowering.config = #iree_codegen.lowering.config +} { } +// CHECK: #config = #iree_codegen.lowering.config + +// ----- + +module attributes { + compilation.info = #iree_codegen.compilation.info< + #iree_codegen.lowering.config, + #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []>, + workgroup_size = []> +} { } +// CHECK: #compilation = #iree_codegen.compilation.info<#iree_codegen.lowering.config, #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []>, workgroup_size = []> \ No newline at end of file diff --git a/iree/compiler/Codegen/LLVMCPU/BUILD b/iree/compiler/Codegen/LLVMCPU/BUILD index 0d410558c28e..21e280217659 100644 --- a/iree/compiler/Codegen/LLVMCPU/BUILD +++ b/iree/compiler/Codegen/LLVMCPU/BUILD @@ -29,6 +29,7 @@ cc_library( deps = [ "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/Transforms", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/Flow/IR", diff --git a/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt b/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt index 50207fdd49f7..44d43b1becd7 100644 --- a/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt +++ b/iree/compiler/Codegen/LLVMCPU/CMakeLists.txt @@ -57,6 +57,7 @@ iree_cc_library( MLIRVectorToLLVM MLIRVectorToSCF iree::compiler::Codegen::Common + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::Transforms iree::compiler::Codegen::Utils diff --git a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp index 3504b5bd94ae..dbbdccad2f74 100644 --- a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp +++ b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp @@ -8,9 +8,7 @@ #include "iree/compiler/Codegen/Transforms/Transforms.h" #include "iree/compiler/Codegen/Utils/MarkerUtils.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/Flow/IR/FlowOps.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/CommandLine.h" @@ -227,8 +225,8 @@ static LogicalResult setDefaultLaunchConfig( getDefaultWorkloadPerWorkgroup(tiledLoops, nativeVectorSizeInElements); setTranslationInfo( - entryPointFn, IREE::HAL::DispatchLoweringPassPipeline::CPUDefault, - /*workgroupSize =*/ArrayRef{}, workloadPerWorkgroup); + entryPointFn, IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault, + workloadPerWorkgroup, /*workgroupSize =*/ArrayRef{}); return success(); } @@ -299,8 +297,9 @@ static LogicalResult setRootConfig(FuncOp entryPointFn, vectorSizeVals[i]); } setTranslationInfo( - entryPointFn, IREE::HAL::DispatchLoweringPassPipeline::CPUTensorToVectors, - /*workgroupSize =*/ArrayRef{}, workloadPerWorkgroup); + entryPointFn, + IREE::Codegen::DispatchLoweringPassPipeline::CPUTensorToVectors, + workloadPerWorkgroup, /*workgroupSize =*/ArrayRef{}); SmallVector l1TileSizes, vectorTileSizes; if (isBatchMatmul) { @@ -321,8 +320,8 @@ static LogicalResult setRootConfig(FuncOp entryPointFn, // level tiling. tileSizes.emplace_back(std::move(l1TileSizes)); tileSizes.emplace_back(std::move(vectorTileSizes)); - IREE::HAL::LoweringConfig config = - buildConfigAttr(tileSizes, vectorSizeVals, entryPointFn.getContext()); + auto config = IREE::Codegen::LoweringConfigAttr::get( + entryPointFn.getContext(), tileSizes, vectorSizeVals); setLoweringConfig(contractionOp, config); return success(); } @@ -368,14 +367,14 @@ static LogicalResult setRootConfig(FuncOp entryPointFn, linalg::Mmt4DOp mmt4dOp, return {1, 1, 1, M0, N0, K0}; }; - SmallVector nativeVectorSize = getVectorSizes(); + SmallVector nativeVectorSize = getVectorSizes(); TileSizesListType tileSizes = {getWorkgroupTileSizes(), getL1TileSizes(), nativeVectorSize}; return setOpConfigAndEntryPointFnTranslation( entryPointFn, mmt4dOp, tileSizes, nativeVectorSize, - IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization); + IREE::Codegen::DispatchLoweringPassPipeline::CPUVectorization); } /// Sets the lowering configuration for dispatch region for linalg_ext.fft @@ -384,8 +383,7 @@ static LogicalResult setRootConfig(FuncOp entryPointFn, linalg_ext::FftOp fftOp, ArrayRef tiledLoops) { auto partitionedLoops = getPartitionedLoops(fftOp); unsigned maxDepth = partitionedLoops.back() + 1; - SmallVector workgroupTileSizes(maxDepth, - defaultWorkgroupTileSize); + SmallVector workgroupTileSizes(maxDepth, defaultWorkgroupTileSize); llvm::DenseSet partitionedLoopsSet(partitionedLoops.begin(), partitionedLoops.end()); for (auto dim : llvm::seq(0, workgroupTileSizes.size())) { @@ -412,7 +410,7 @@ static LogicalResult setRootConfig(FuncOp entryPointFn, linalg_ext::FftOp fftOp, return setOpConfigAndEntryPointFnTranslation( entryPointFn, fftOp, tileSizes, /*nativeVectorSizes=*/ArrayRef{}, - IREE::HAL::DispatchLoweringPassPipeline::CPUDefault); + IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault); } /// Finds the root operation in the given list of linalg operations and sets @@ -454,11 +452,8 @@ static LogicalResult setTranslationInfoAndRootConfig( for (auto computeOp : computeOps) { if (!hasMarker(computeOp, getWorkgroupMarker())) continue; - if (auto config = getLoweringConfig(computeOp)) { - // Check if the op has a preset pipeline. - auto passPipeline = getLoweringPassPipeline(config); - if (!passPipeline) continue; - + if (IREE::Codegen::CompilationInfoAttr compilationInfo = + getCompilationInfo(computeOp)) { // If the function already has a translation, error out. if (auto translationInfo = getTranslationInfo(entryPointFn)) { return computeOp->emitOpError( @@ -466,17 +461,12 @@ static LogicalResult setTranslationInfoAndRootConfig( "info"); } - SmallVector workgroupSize; - if (auto workgroupSizeAttr = config.workgroupSize()) { - workgroupSize = llvm::to_vector<4>( - llvm::map_range(workgroupSizeAttr, [](Attribute intAttr) { - return intAttr.cast().getInt(); - })); - } - if (failed(setOpConfigAndEntryPointFnTranslation( - entryPointFn, computeOp, config, *passPipeline, workgroupSize))) { - return failure(); - } + SmallVector workgroupSize = + compilationInfo.getWorkgroupSizeVals(); + setTranslationInfo(entryPointFn, compilationInfo.getTranslationInfo(), + workgroupSize); + setLoweringConfig(computeOp, compilationInfo.getLoweringConfig()); + eraseCompilationInfo(computeOp); } } diff --git a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h index dbacd1a5c3b4..afb616e8cb08 100644 --- a/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h +++ b/iree/compiler/Codegen/LLVMCPU/KernelDispatch.h @@ -7,7 +7,7 @@ #ifndef IREE_COMPILER_CODEGEN_LLVMCPU_KERNELDISPATCH_H_ #define IREE_COMPILER_CODEGEN_LLVMCPU_KERNELDISPATCH_H_ -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "mlir/IR/BuiltinOps.h" namespace mlir { diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp index 87bd94ba4313..0732fbbe480a 100644 --- a/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp +++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPULowerExecutableTarget.cpp @@ -4,10 +4,10 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Codegen/LLVMCPU/KernelDispatch.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -30,8 +30,9 @@ class LLVMCPULowerExecutableTargetPass LLVMCPULowerExecutableTargetPass> { public: void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); } LLVMCPULowerExecutableTargetPass(bool vectorize = true) @@ -123,16 +124,15 @@ void LLVMCPULowerExecutableTargetPass::runOnOperation() { // is fine. llvm::StringMap entryPoints = getAllEntryPoints(moduleOp); - Optional passPipeline; + Optional passPipeline; for (auto &it : entryPoints) { auto entryPointOp = it.second; - if (IREE::HAL::TranslationInfo translationInfo = + if (IREE::Codegen::TranslationInfoAttr translationInfo = getTranslationInfo(entryPointOp)) { - Optional currPipeline = - getLoweringPassPipeline(translationInfo); - if (!currPipeline) continue; + IREE::Codegen::DispatchLoweringPassPipeline currPipeline = + translationInfo.getDispatchLoweringPassPipeline(); if (passPipeline) { - if (currPipeline.getValue() != passPipeline.getValue()) { + if (currPipeline != passPipeline.getValue()) { moduleOp.emitError( "unhandled compilation of entry point function with different " "pass pipelines within a module"); @@ -150,14 +150,14 @@ void LLVMCPULowerExecutableTargetPass::runOnOperation() { OpPassManager &nestedModulePM = executableLoweringPipeline.nest(); switch (passPipeline.getValue()) { - case IREE::HAL::DispatchLoweringPassPipeline::CPUDefault: - case IREE::HAL::DispatchLoweringPassPipeline::None: + case IREE::Codegen::DispatchLoweringPassPipeline::CPUDefault: + case IREE::Codegen::DispatchLoweringPassPipeline::None: addCPUDefaultPassPipeline(nestedModulePM); break; - case IREE::HAL::DispatchLoweringPassPipeline::CPUVectorization: + case IREE::Codegen::DispatchLoweringPassPipeline::CPUVectorization: addCPUVectorizationPassPipeline(nestedModulePM, lowerToVectors); break; - case IREE::HAL::DispatchLoweringPassPipeline::CPUTensorToVectors: + case IREE::Codegen::DispatchLoweringPassPipeline::CPUTensorToVectors: addTensorToVectorsPassPipeline(nestedModulePM, lowerToVectors); break; default: diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp index aa1a3c69d03b..5a110e773ef4 100644 --- a/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp +++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp @@ -4,6 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/LLVMCPU/KernelDispatch.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" @@ -78,9 +79,8 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { l1patterns.insert( context, linalg::LinalgTilingOptions().setTileSizeComputationFunction( - [](OpBuilder &builder, - Operation *operation) -> SmallVector { - return getTileSizes(builder, operation, + [](OpBuilder &builder, Operation *op) -> SmallVector { + return getTileSizes(builder, op, static_cast(TilingLevel::L1Tiles)); }), linalg::LinalgTransformationFilter( @@ -112,11 +112,9 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { l2patterns.insert( context, linalg::LinalgTilingOptions().setTileSizeComputationFunction( - [](OpBuilder &builder, - Operation *operation) -> SmallVector { + [](OpBuilder &builder, Operation *op) -> SmallVector { return getTileSizes( - builder, operation, - static_cast(TilingLevel::VectorTiles)); + builder, op, static_cast(TilingLevel::VectorTiles)); }), linalg::LinalgTransformationFilter( Identifier::get(getWorkgroupL1TileMarker(), context), diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp index 2fc018541ea0..53092aec2db4 100644 --- a/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp +++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPUVectorization.cpp @@ -167,9 +167,8 @@ void LLVMCPUVectorizationPass::runOnOperation() { l1patterns.insert( context, linalg::LinalgTilingOptions().setTileSizeComputationFunction( - [](OpBuilder &builder, - Operation *operation) -> SmallVector { - return getTileSizes(builder, operation, + [](OpBuilder &builder, Operation *op) -> SmallVector { + return getTileSizes(builder, op, static_cast(TilingLevel::L1Tiles)); }), linalg::LinalgTransformationFilter( @@ -188,11 +187,9 @@ void LLVMCPUVectorizationPass::runOnOperation() { l2patterns.insert( context, linalg::LinalgTilingOptions().setTileSizeComputationFunction( - [](OpBuilder &builder, - Operation *operation) -> SmallVector { + [](OpBuilder &builder, Operation *op) -> SmallVector { return getTileSizes( - builder, operation, - static_cast(TilingLevel::VectorTiles)); + builder, op, static_cast(TilingLevel::VectorTiles)); }), linalg::LinalgTransformationFilter( Identifier::get(getWorkgroupL1TileMarker(), context), diff --git a/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir b/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir index 91728ceed539..c307c9bc5261 100644 --- a/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir +++ b/iree/compiler/Codegen/LLVMCPU/test/materialize_launch_configuration.mlir @@ -59,10 +59,11 @@ hal.executable private @matmul_tensors { } } -// CHECK-DAG: #[[CONFIG:.+]] = {nativeVectorSize = [4, 4, 4], tileSizes = {{\[}}{{\[}}{{\]}}, [32, 32, 32], [4, 4, 4]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUTensorToVectors", workload_per_wg = [64, 64]> // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> // CHECK: hal.executable.entry_point public @matmul_tensors -// CHECK-SAME: translation.info = {passPipeline = "CPUTensorToVectors", workloadPerWorkgroup = [64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: (%[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index) @@ -118,11 +119,10 @@ hal.executable private @add_no_config { } } } - -// CHECK: #[[CONFIG:[a-zA-Z]+]] = {passPipeline = "CPUDefault"} -// CHECK: hal.executable private @add_no_config -// CHECK: hal.executable.entry_point public @add_no_config -// CHECK-SAME: translation.info = #[[CONFIG]] +// CHECK: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = []> +// CHECK: hal.executable private @add_no_config +// CHECK: hal.executable.entry_point public @add_no_config +// CHECK-SAME: translation.info = #[[TRANSLATION]] // ----- @@ -192,9 +192,10 @@ hal.executable private @add { } } } +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64]> // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> // CHECK: hal.executable.entry_point public @add -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: (%[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index) @@ -296,8 +297,9 @@ hal.executable private @add4D { } } // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64, 64]> // CHECK: hal.executable.entry_point public @add4D -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: (%[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index) @@ -378,8 +380,9 @@ hal.executable private @batch_matmul_tensors { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {nativeVectorSize = [1, 4, 4, 4], tileSizes = {{\[}}[], [1, 32, 32, 32], [1, 4, 4, 4]{{\]}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUTensorToVectors", workload_per_wg = [64, 64]> // CHECK: hal.executable.entry_point public @batch_matmul_tensors // CHECK-NEXT: (%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index @@ -393,6 +396,10 @@ hal.executable private @batch_matmul_tensors { // ----- +#compilation = #iree_codegen.compilation.info< + #iree_codegen.lowering.config, + #iree_codegen.translation.info<"CPUVectorization", workload_per_wg = [32, 32]>, + workgroup_size = []> hal.executable private @preset_config_matmul_tensors { hal.executable.variant @system_elf_x86_64, target = #hal.executable.target<"llvm", "system-elf-x86_64"> { hal.executable.entry_point @preset_config attributes {interface = @io, ordinal = 0 : index} @@ -427,7 +434,11 @@ hal.executable private @preset_config_matmul_tensors { %14 = affine.min affine_map<(d0)[s0] -> (-d0 + 512, s0)>(%arg1)[%workgroup_size_x] %15 = linalg.init_tensor [%13, %14] : tensor %16 = linalg.fill(%cst, %15) : f32, tensor -> tensor - %17 = linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = {passPipeline = "CPUVectorization", tileSizes = [[32, 32, 32]]}} ins(%8, %10 : tensor, tensor<256x?xf32>) outs(%16 : tensor) -> tensor + %17 = linalg.matmul { + __internal_linalg_transform__ = "workgroup", + compilation.info = #compilation} + ins(%8, %10 : tensor, tensor<256x?xf32>) + outs(%16 : tensor) -> tensor flow.dispatch.tensor.store %17, %2, offsets = [%arg0, %arg1], sizes = [%11, %12], strides = [1, 1] : tensor -> !flow.dispatch.tensor } } @@ -441,11 +452,12 @@ hal.executable private @preset_config_matmul_tensors { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {passPipeline = "CPUVectorization", tileSizes = {{\[}}[32, 32, 32]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 32)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUVectorization", workload_per_wg = [32, 32]> // CHECK: hal.executable.entry_point -// CHECK-SAME: translation.info = {passPipeline = "CPUVectorization", workloadPerWorkgroup = [32, 32]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[NWG_X:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] @@ -511,9 +523,10 @@ hal.executable @tensor_insert { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64]> // CHECK: hal.executable.entry_point public @tensor_insert_slice -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: %[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index @@ -548,12 +561,11 @@ hal.executable private @static_1d_fft_stage2 { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[64]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @static_1d_fft_stage2 -// CHECK-SAME: translation.info = { -// CHECK-SAME: passPipeline = "CPUDefault" -// CHECK-SAME: workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index): // CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index // CHECK-NEXT: %[[T0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] @@ -620,12 +632,11 @@ hal.executable private @static_3d_fft_stage3 { } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[64, 64, 64]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64, 64]> // CHECK: hal.executable.entry_point public @static_3d_fft_stage3 -// CHECK-SAME: translation.info = { -// CHECK-SAME: passPipeline = "CPUDefault" -// CHECK-SAME: workloadPerWorkgroup = [64, 64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index): // CHECK-NEXT: %[[T0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] // CHECK-NEXT: %[[T1:.+]] = affine.apply #[[MAP0]]()[%[[ARG1]]] @@ -700,8 +711,9 @@ hal.executable private @outs_fusion { } } } +// CHECK: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64]> // CHECK: hal.executable.entry_point public @outs_fusion_fn -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // ----- @@ -768,9 +780,10 @@ hal.executable private @conv { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64, 64]> // CHECK: hal.executable.entry_point public @conv attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64, 64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index) // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]] // CHECK-DAG: %[[D1:.+]] = affine.apply #[[MAP0]]()[%[[ARG1]] @@ -844,8 +857,9 @@ hal.executable private @conv_static { } // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 64, 32]> // CHECK: hal.executable.entry_point public @conv_static attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 64, 32]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index) // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]] // CHECK-DAG: %[[D1:.+]] = affine.apply #[[MAP0]]()[%[[ARG1]] @@ -902,8 +916,9 @@ hal.executable private @generic_static { } // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [32, 8]> // CHECK: hal.executable.entry_point public @generic_static attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [32, 8]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index) // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]] @@ -960,11 +975,12 @@ hal.executable private @matmul_static { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {nativeVectorSize = [4, 4, 4], tileSizes = {{\[}}[], [28, 8, 24], [4, 4, 4]{{\]}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 28)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUTensorToVectors", workload_per_wg = [8, 28]> // CHECK: hal.executable.entry_point public @matmul_static attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUTensorToVectors", workloadPerWorkgroup = [8, 28]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index) // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] @@ -1035,8 +1051,9 @@ hal.executable private @restrict_num_workgroups { // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64, 8, 4]> // CHECK: hal.executable.entry_point public @restrict_num_workgroups attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64, 8, 4]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, %[[ARG2:[a-zA-Z0-9]+]]: index) // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] // CHECK-DAG: %[[D1:.+]] = affine.apply #[[MAP1]]()[%[[ARG1]]] @@ -1074,9 +1091,10 @@ hal.executable private @test_exp_0 { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @test_exp_0 attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] @@ -1113,9 +1131,10 @@ hal.executable private @test_exp_1 { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECk-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @test_exp_1 attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] @@ -1152,9 +1171,10 @@ hal.executable private @test_exp_2 { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @test_exp_2 attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] @@ -1191,9 +1211,10 @@ hal.executable private @test_exp_3 { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @test_exp_3 attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] @@ -1230,9 +1251,10 @@ hal.executable private @test_exp_4 { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @test_exp_4 attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] @@ -1269,9 +1291,10 @@ hal.executable private @test_exp_5 { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"CPUDefault", workload_per_wg = [64]> // CHECK: hal.executable.entry_point public @test_exp_5 attributes -// CHECK-SAME: translation.info = {passPipeline = "CPUDefault", workloadPerWorkgroup = [64]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[D0:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] diff --git a/iree/compiler/Codegen/LLVMCPU/test/matmul_vectorization.mlir b/iree/compiler/Codegen/LLVMCPU/test/matmul_vectorization.mlir index e49121783ecb..21e0e9a74a0e 100644 --- a/iree/compiler/Codegen/LLVMCPU/test/matmul_vectorization.mlir +++ b/iree/compiler/Codegen/LLVMCPU/test/matmul_vectorization.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt -pass-pipeline="hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target{use-lowering-pipeline='builtin.func(iree-llvmcpu-vectorization)'}))" -split-input-file %s | IreeFileCheck %s // RUN: iree-opt -pass-pipeline="hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target{use-lowering-pipeline='builtin.func(iree-llvmcpu-vectorization{promote-workgroup-to-full-tiles}),cse'}))" -split-input-file %s | IreeFileCheck %s -check-prefix=CHECK-PROMOTED -#config = {nativeVectorSize = [4, 4, 4], tileSizes = [[64, 64], [32, 32, 32], [4, 4, 4]]} +#config = #iree_codegen.lowering.config hal.executable private @dynamic_matmul { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -114,7 +114,7 @@ hal.executable private @dynamic_matmul { // ----- -#config = {nativeVectorSize = [4, 4, 4], tileSizes = [[64, 64], [32, 32, 32], [4, 4, 4]]} +#config = #iree_codegen.lowering.config hal.executable private @matmul_i8_i8_i32 { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" diff --git a/iree/compiler/Codegen/LLVMCPU/test/tile_and_vectorize.mlir b/iree/compiler/Codegen/LLVMCPU/test/tile_and_vectorize.mlir index ddc4b8fb6d15..4a51e1ad099f 100644 --- a/iree/compiler/Codegen/LLVMCPU/test/tile_and_vectorize.mlir +++ b/iree/compiler/Codegen/LLVMCPU/test/tile_and_vectorize.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt %s -cse -iree-llvmcpu-tile-and-vectorize -cse -canonicalize -split-input-file | IreeFileCheck %s -#config0 = {tileSizes = [[64, 64]]} -#config1 = {nativeVectorSize = [4, 4, 4], tileSizes = [[64, 64], [32, 32, 32], [4, 4, 4]]} +#config0 = #iree_codegen.lowering.config +#config1 = #iree_codegen.lowering.config #map0 = affine_map<()[s0] -> (s0 * 64)> #map1 = affine_map<(d0) -> (64, -d0 + 383)> #map2 = affine_map<(d0) -> (64, -d0 + 513)> diff --git a/iree/compiler/Codegen/LLVMGPU/BUILD b/iree/compiler/Codegen/LLVMGPU/BUILD index e9cc75d421fc..51adc6e585a5 100644 --- a/iree/compiler/Codegen/LLVMGPU/BUILD +++ b/iree/compiler/Codegen/LLVMGPU/BUILD @@ -34,6 +34,7 @@ cc_library( deps = [ "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/Transforms", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/Flow/IR", diff --git a/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt b/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt index cf63b73c3033..f3c058ed86ae 100644 --- a/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt +++ b/iree/compiler/Codegen/LLVMGPU/CMakeLists.txt @@ -61,6 +61,7 @@ iree_cc_library( MLIRVectorToLLVM MLIRVectorToSCF iree::compiler::Codegen::Common + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::Transforms iree::compiler::Codegen::Utils diff --git a/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp b/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp index fa009cd74b6f..0d1ae4e7cdbe 100644 --- a/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp +++ b/iree/compiler/Codegen/LLVMGPU/KernelConfig.cpp @@ -8,7 +8,7 @@ #include -#include "iree/compiler/Codegen/Utils/Utils.h" +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Dialect/Flow/IR/FlowOps.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h" #include "llvm/Support/Debug.h" @@ -100,7 +100,7 @@ static LogicalResult setContractConfig(FuncOp entryPoint, linalg::LinalgOp op) { } // Currently just a basic tile size to enable tiling and vectorization. // TODO: pick a more efficient tile size and tile at subgroup level. - SmallVector ts; + SmallVector ts; // Tile all the higher parallel dimension with a size of 1 and the 2 most // inner dimension with the tileX/tileY size. ts.append(op.getNumParallelLoops() - 2, 1); @@ -110,14 +110,14 @@ static LogicalResult setContractConfig(FuncOp entryPoint, linalg::LinalgOp op) { tileSizes.push_back(ts); // Workgroup level. return setOpConfigAndEntryPointFnTranslation( entryPoint, op, tileSizes, /*nativeVectorSizes=*/ArrayRef{}, - IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt, + IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt, workgroupSize); } static LogicalResult setFftConfig(FuncOp entryPoint, linalg_ext::FftOp op) { auto partitionedLoops = getPartitionedLoops(op); unsigned loopDepth = partitionedLoops.back() + 1; - SmallVector workgroupTileSize(loopDepth, 0); + SmallVector workgroupTileSize(loopDepth, 0); SmallVector workgroupSize = {cudaWarpSize, 1, 1}; // Tiling along partitioned loops with size 1. @@ -137,14 +137,14 @@ static LogicalResult setFftConfig(FuncOp entryPoint, linalg_ext::FftOp op) { TileSizesListType tileSizes = {workgroupTileSize}; return setOpConfigAndEntryPointFnTranslation( entryPoint, op, tileSizes, /*nativeVectorSizes=*/ArrayRef{}, - IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute, + IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute, workgroupSize); } // Basic default properties for linalg ops that haven't been tuned. static LogicalResult setRootDefaultConfig(FuncOp entryPoint, Operation *op) { - IREE::HAL::DispatchLoweringPassPipeline passPipeline = - IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute; + IREE::Codegen::DispatchLoweringPassPipeline passPipeline = + IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute; TileSizesListType tileSizes; SmallVector partitionedLoops = getPartitionedLoops(op); if (partitionedLoops.empty()) { @@ -210,43 +210,34 @@ static LogicalResult setRootDefaultConfig(FuncOp entryPoint, Operation *op) { tileSizes.emplace_back(std::move(workgroupTileSizes)); // Workgroup level return setOpConfigAndEntryPointFnTranslation( entryPoint, op, tileSizes, /*nativeVectorSizes=*/ArrayRef{}, - IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUVectorize, workgroupSize); + IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUVectorize, + workgroupSize); } /// Propagate the configuration annotated in the incoming IR. -static LogicalResult setUserConfig(FuncOp entryPointFn, Operation *computeOp, - IREE::HAL::LoweringConfig config) { - IREE::HAL::DispatchLoweringPassPipeline passPipeline = - IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUVectorize; - if (auto setPassPipeline = getLoweringPassPipeline(config)) { - passPipeline = setPassPipeline.getValue(); - } - SmallVector workgroupSize; - if (auto workgroupSizeAttr = config.workgroupSize()) { - workgroupSize = llvm::to_vector<4>( - llvm::map_range(workgroupSizeAttr, [](Attribute intAttr) { - return intAttr.cast().getInt(); - })); +static LogicalResult setUserConfig( + FuncOp entryPointFn, Operation *computeOp, + IREE::Codegen::CompilationInfoAttr compilationInfo) { + if (auto translationInfo = getTranslationInfo(entryPointFn)) { + return computeOp->emitOpError( + "multiple ops within dispatch trying to set the translation " + "info"); } - if (failed(setOpConfigAndEntryPointFnTranslation( - entryPointFn, computeOp, config, passPipeline, workgroupSize))) { - return failure(); - } - // Reset the op configuration to drop the pass-pipeline and workgroup size - // info. The op does not carry that information anymore. - auto resetConfig = IREE::HAL::LoweringConfig::get( - config.tileSizes(), config.nativeVectorSize(), - /*passPipeline =*/nullptr, - /*workgroupSize =*/nullptr, computeOp->getContext()); - setLoweringConfig(computeOp, resetConfig); + + SmallVector workgroupSize = compilationInfo.getWorkgroupSizeVals(); + setTranslationInfo(entryPointFn, compilationInfo.getTranslationInfo(), + workgroupSize); + setLoweringConfig(computeOp, compilationInfo.getLoweringConfig()); + eraseCompilationInfo(computeOp); return success(); } static LogicalResult setRootConfig(FuncOp entryPointFn, Operation *computeOp) { - if (IREE::HAL::LoweringConfig config = getLoweringConfig(computeOp)) { + if (IREE::Codegen::CompilationInfoAttr compilationInfo = + getCompilationInfo(computeOp)) { // If the op already has a lowering config coming from the IR use this and // bypass the heuristic. - return setUserConfig(entryPointFn, computeOp, config); + return setUserConfig(entryPointFn, computeOp, compilationInfo); } if (auto linalgOp = dyn_cast(computeOp)) { if (linalg::isaContractionOpInterface(linalgOp) && @@ -294,8 +285,9 @@ LogicalResult initGPULaunchConfig(ModuleOp moduleOp) { // anything. Without any compute ops, this shouldnt be using tile and // distribute. setTranslationInfo( - funcOp, IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute, - workgroupSize, workloadPerWorkgroup); + funcOp, + IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute, + workloadPerWorkgroup, workgroupSize); continue; } @@ -330,8 +322,9 @@ LogicalResult initGPULaunchConfig(ModuleOp moduleOp) { // anything. Without any compute ops, this shouldnt be using tile and // distribute. setTranslationInfo( - funcOp, IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute, - {1, 1, 1}, /*workloadPerWorkgroup=*/{}); + funcOp, + IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute, + /*workloadPerWorkgroup=*/{}, {1, 1, 1}); continue; } if (failed(setRootConfig(funcOp, rootOperation))) continue; @@ -342,7 +335,7 @@ LogicalResult initGPULaunchConfig(ModuleOp moduleOp) { // and distributed. The rest of the compilation must be structured to either // use `TileAndFuse` or they are independent configurations that are // determined based on the op. - IREE::HAL::LoweringConfig config = getLoweringConfig(rootOperation); + IREE::Codegen::LoweringConfigAttr config = getLoweringConfig(rootOperation); for (auto op : computeOps) { if (op == rootOperation) continue; setLoweringConfig(op, config); diff --git a/iree/compiler/Codegen/LLVMGPU/KernelConfig.h b/iree/compiler/Codegen/LLVMGPU/KernelConfig.h index 2717b90b7eb0..d085245cf93b 100644 --- a/iree/compiler/Codegen/LLVMGPU/KernelConfig.h +++ b/iree/compiler/Codegen/LLVMGPU/KernelConfig.h @@ -7,7 +7,6 @@ #ifndef IREE_COMPILER_CODEGEN_LLVMGPU_KERNELCONFIG_H_ #define IREE_COMPILER_CODEGEN_LLVMGPU_KERNELCONFIG_H_ -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "mlir/IR/BuiltinOps.h" namespace mlir { diff --git a/iree/compiler/Codegen/LLVMGPU/LLVMGPUDistributeSharedMemoryCopy.cpp b/iree/compiler/Codegen/LLVMGPU/LLVMGPUDistributeSharedMemoryCopy.cpp index a07d472f342c..5dfd252dcb19 100644 --- a/iree/compiler/Codegen/LLVMGPU/LLVMGPUDistributeSharedMemoryCopy.cpp +++ b/iree/compiler/Codegen/LLVMGPU/LLVMGPUDistributeSharedMemoryCopy.cpp @@ -7,6 +7,7 @@ #include #include +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/LLVMGPU/LLVMGPUUtils.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" diff --git a/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp b/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp index 371d33c3ee4d..5ef123937d43 100644 --- a/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp +++ b/iree/compiler/Codegen/LLVMGPU/LLVMGPULowerExecutableTarget.cpp @@ -4,10 +4,11 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/LLVMGPU/KernelConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtDialect.h" @@ -32,9 +33,9 @@ class LLVMGPULowerExecutableTargetPass LLVMGPULowerExecutableTargetPass> { public: void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); } LLVMGPULowerExecutableTargetPass() = default; @@ -72,16 +73,15 @@ void LLVMGPULowerExecutableTargetPass::runOnOperation() { // is fine. llvm::StringMap entryPoints = getAllEntryPoints(moduleOp); - Optional passPipeline; + Optional passPipeline; for (auto &it : entryPoints) { auto entryPointOp = it.second; - if (IREE::HAL::TranslationInfo translationInfo = + if (IREE::Codegen::TranslationInfoAttr translationInfo = getTranslationInfo(entryPointOp)) { - Optional currPipeline = - getLoweringPassPipeline(translationInfo); - if (!currPipeline) continue; + IREE::Codegen::DispatchLoweringPassPipeline currPipeline = + translationInfo.getDispatchLoweringPassPipeline(); if (passPipeline) { - if (currPipeline.getValue() != passPipeline.getValue()) { + if (currPipeline != passPipeline.getValue()) { moduleOp.emitError( "unhandled compilation of entry point function with different " "pass pipelines within a module"); @@ -98,13 +98,13 @@ void LLVMGPULowerExecutableTargetPass::runOnOperation() { if (!testLoweringConfiguration && passPipeline.hasValue()) { OpPassManager &nestedModulePM = executableLoweringPipeline.nest(); switch (*passPipeline) { - case IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUDistribute: + case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUDistribute: addGPUSimpleDistributePassPipeline(nestedModulePM); break; - case IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUVectorize: + case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUVectorize: addGPUVectorizationPassPipeline(nestedModulePM); break; - case IREE::HAL::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt: + case IREE::Codegen::DispatchLoweringPassPipeline::LLVMGPUMatmulSimt: addGPUMatmulSimtPassPipeline(nestedModulePM); break; default: diff --git a/iree/compiler/Codegen/LLVMGPU/LLVMGPURemoveTrivialLoops.cpp b/iree/compiler/Codegen/LLVMGPU/LLVMGPURemoveTrivialLoops.cpp index 383b74dd697e..df0e622bcaa1 100644 --- a/iree/compiler/Codegen/LLVMGPU/LLVMGPURemoveTrivialLoops.cpp +++ b/iree/compiler/Codegen/LLVMGPU/LLVMGPURemoveTrivialLoops.cpp @@ -4,6 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/LLVMGPU/LLVMGPUUtils.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" @@ -77,11 +78,8 @@ static SmallVector getNumWorkgroup( auto translationInfo = getTranslationInfo(entryPointOp); if (!translationInfo) return SmallVector(); - ArrayAttr workloadPerWorkgroupAttr = translationInfo.workloadPerWorkgroup(); - if (!workloadPerWorkgroupAttr) return SmallVector(); - auto workloadPerWorkgroup = llvm::to_vector<4>(llvm::map_range( - workloadPerWorkgroupAttr, - [](Attribute attr) { return attr.cast().getInt(); })); + SmallVector workloadPerWorkgroup = + translationInfo.getWorkloadPerWorkgroupVals(); if (workloadSize.size() != workloadPerWorkgroup.size()) return SmallVector(); SmallVector numWorkgroups; diff --git a/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp b/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp index 4409c334ab70..2a701f8d165c 100644 --- a/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp +++ b/iree/compiler/Codegen/LLVMGPU/LLVMGPUTileAndDistribute.cpp @@ -4,14 +4,13 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/LLVMGPU/KernelConfig.h" #include "iree/compiler/Codegen/LLVMGPU/LLVMGPUUtils.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/Transforms/Transforms.h" #include "iree/compiler/Codegen/Utils/MarkerUtils.h" -#include "iree/compiler/Codegen/Utils/Utils.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h" #include "iree/compiler/Dialect/LinalgExt/Transforms/Transforms.h" #include "iree/compiler/Dialect/Util/IR/UtilOps.h" @@ -38,19 +37,14 @@ static void populateTilingReductionPatterns( auto tileSizesFn = [&](OpBuilder &builder, Operation *op) -> SmallVector { SmallVector partitionedLoops = getPartitionedLoops(op); - SmallVector tileSizes = getTileSizes(op, 0); - Location loc = op->getLoc(); - auto tileSizesVal = - llvm::to_vector<4>(llvm::map_range(tileSizes, [&](int64_t v) -> Value { - return builder.create(loc, v); - })); - auto zero = builder.create(loc, 0); + SmallVector tileSizes = getTileSizes(builder, op, 0); + auto zero = builder.create(op->getLoc(), 0); for (unsigned depth : partitionedLoops) { - if (depth < tileSizesVal.size()) { - tileSizesVal[depth] = zero; + if (depth < tileSizes.size()) { + tileSizes[depth] = zero; } } - return tileSizesVal; + return tileSizes; }; auto tilingOptions = linalg::LinalgTilingOptions() @@ -69,8 +63,8 @@ static void populateTilingReductionPatterns( /// Patterns for thread level tiling. static void populateTilingToInvocationPatterns( MLIRContext *context, OwningRewritePatternList &patterns, - SmallVector &workgroupSize, - SmallVector &workloadPerWorkgroup) { + SmallVectorImpl &workgroupSize, + SmallVectorImpl &workloadPerWorkgroup) { linalg::TileSizeComputationFunction getInnerTileSizeFn = [&](OpBuilder &builder, Operation *operation) { SmallVector tileSizesVal; @@ -95,7 +89,7 @@ static void populateTilingToInvocationPatterns( return tileSizesVal; }; - auto getThreadProcInfoFn = [workgroupSize]( + auto getThreadProcInfoFn = [&workgroupSize]( OpBuilder &builder, Location loc, ArrayRef parallelLoopRanges) { return getGPUThreadIdsAndCounts(builder, loc, parallelLoopRanges.size(), @@ -240,11 +234,8 @@ struct LLVMGPUTileAndDistributePass auto workgroupSize = llvm::to_vector<4>(llvm::map_range( getEntryPoint(funcOp).workgroup_size().getValue(), [&](Attribute attr) { return attr.cast().getInt(); })); - auto workloadPerWorkgroup = llvm::to_vector<4>(llvm::map_range( - getTranslationInfo(getEntryPoint(funcOp)) - .workloadPerWorkgroup() - .getValue(), - [&](Attribute attr) { return attr.cast().getInt(); })); + auto workloadPerWorkgroup = + getTranslationInfo(getEntryPoint(funcOp)).getWorkloadPerWorkgroupVals(); int64_t flatWorkgroupSize = workgroupSize[0] * workgroupSize[1] * workgroupSize[2]; diff --git a/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir b/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir index 2b7e6da2a411..e7aaf8d7a374 100644 --- a/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir +++ b/iree/compiler/Codegen/LLVMGPU/test/distribute_to_thread.mlir @@ -1,6 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-llvmgpu-tile-and-distribute))))' %s | IreeFileCheck %s -#config = {tileSizes = [[2, 256, 4]]} +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"LLVMGPUMatmulSimt", workload_per_wg = [256, 2]> #executable_target_cuda_nvptx_fb = #hal.executable.target<"cuda", "cuda-nvptx-fb"> #map0 = affine_map<()[s0] -> (s0 * 2)> #map1 = affine_map<()[s0] -> (s0 * 256)> @@ -12,7 +13,7 @@ hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb { hal.executable.entry_point @dot_dispatch_0 attributes { interface = @legacy_io, ordinal = 0 : index, - translation.info = {passPipeline = "LLVMGPUMatmulSimt" : i32, workloadPerWorkgroup = [256, 2]}, + translation.info = #translation, workgroup_size = [64 : index, 1 : index, 1 : index]} builtin.module { builtin.func @dot_dispatch_0() { @@ -86,14 +87,15 @@ hal.executable.variant @cuda, target = #executable_target_cuda_nvptx_fb { // ----- -#config = {tileSizes = [[]]} +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"LLVMGPUVectorize", workload_per_wg = []> // Pure reducion case, skip tiling. hal.executable @reduction_dispatch { hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> { hal.executable.entry_point @predict_dispatch_153 attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = "LLVMGPUVectorize" : i32}, + translation.info = #translation, workgroup_size = [1: index, 1: index, 1: index]} builtin.module { builtin.func @predict_dispatch_153() { @@ -120,7 +122,7 @@ hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvpt } } } -// CHECK: #[[CONFIG:.+]] = {tileSizes = {{\[}}[]{{\]}}} +// CHECK: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK: hal.executable public @reduction_dispatch // CHECK: linalg.fill // CHECK-SAME: lowering.config = #[[CONFIG]] diff --git a/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir b/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir index b67847a345eb..8690542645b8 100644 --- a/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir +++ b/iree/compiler/Codegen/LLVMGPU/test/gpu_set_num_workgroups.mlir @@ -33,11 +33,11 @@ hal.executable @add_dispatch_0 { } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[256]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 256)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUVectorize", workload_per_wg = [256]> // CHECK: hal.executable.entry_point public @add_dispatch_0 -// CHECK-SAME: passPipeline = "LLVMGPUVectorize" -// CHECK-SAME: workloadPerWorkgroup = [256] +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [64 : index, 1 : index, 1 : index] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index @@ -92,12 +92,12 @@ hal.executable private @dot_dispatch_1 { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[4, 2, 4]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)> // CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUMatmulSimt", workload_per_wg = [2, 4]> // CHECK: hal.executable.entry_point public @dot_dispatch_1 -// CHECK-SAME: passPipeline = "LLVMGPUMatmulSimt" -// CHECK-SAME: workloadPerWorkgroup = [2, 4] +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [2 : index, 4 : index, 1 : index] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index, // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index @@ -143,18 +143,18 @@ hal.executable @reduction_dispatch { } } -// CHECK-DAG: #[[CONFIG0:.+]] = {passPipeline = "LLVMGPUDistribute"} -// CHECK-DAG: #[[CONFIG1:.+]] = {tileSizes = {{\[}}[]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = []> // CHECK: hal.executable.entry_point public @predict_dispatch_153 -// CHECK-SAME: translation.info = #[[CONFIG0]] +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [1 : index, 1 : index, 1 : index] // CHECK-NEXT: ^bb0(%[[ARG0:[a-zA-Z0-9]+]]: index, // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index // CHECK: hal.return %[[C1]], %[[C1]], %[[C1]] // CHECK: linalg.fill -// CHECK-SAME: lowering.config = #[[CONFIG1]] +// CHECK-SAME: lowering.config = #[[CONFIG]] // CHECK: linalg.generic -// CHECK-SAME: lowering.config = #[[CONFIG1]] +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -199,9 +199,10 @@ hal.executable @tensor_insert { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 128)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 128)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [128, 1]> // CHECK: hal.executable.entry_point public @tensor_insert_slice -// CHECK-SAME: translation.info = {passPipeline = "LLVMGPUDistribute", workloadPerWorkgroup = [128, 1]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: %[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index @@ -246,10 +247,11 @@ hal.executable @tensor_insert { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[1, 256]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 256)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUVectorize", workload_per_wg = [256, 1]> // CHECK: hal.executable.entry_point public @tensor_insert_slice -// CHECK-SAME: translation.info = {passPipeline = "LLVMGPUVectorize", workloadPerWorkgroup = [256, 1]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: %[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index @@ -286,11 +288,11 @@ hal.executable private @static_1d_fft_stage2 { } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [4]> // CHECK: hal.executable.entry_point public @static_1d_fft_stage2 -// CHECK-SAME: translation.info = {passPipeline = "LLVMGPUDistribute" -// CHECK-SAME: workloadPerWorkgroup = [4]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [32 : index, 1 : index, 1 : index] // CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %{{.+}}: index, %{{.+}}: index): // CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index @@ -351,11 +353,11 @@ hal.executable private @static_3d_fft_stage3 { } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[1, 1, 8]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [8, 1, 1]> // CHECK: hal.executable.entry_point public @static_3d_fft_stage3 -// CHECK-SAME: translation.info = {passPipeline = "LLVMGPUDistribute" -// CHECK-SAME: workloadPerWorkgroup = [8, 1, 1]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [32 : index, 1 : index, 1 : index] // CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index): // CHECK-NEXT: %[[T:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] @@ -367,6 +369,10 @@ hal.executable private @static_3d_fft_stage3 { // ----- +#compilation = #iree_codegen.compilation.info< + #iree_codegen.lowering.config, + #iree_codegen.translation.info<"LLVMGPUMatmulSimt", workload_per_wg = [256, 32]>, + workgroup_size = [16, 8, 1]> hal.executable @user_config { hal.executable.variant public @cuda_nvptx_fb, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> { hal.executable.entry_point public @_lowering_config_test_dispatch_1 attributes {interface = @io, ordinal = 0 : index} @@ -401,7 +407,7 @@ hal.executable.variant public @cuda_nvptx_fb, target = #hal.executable.target<"c %14 = affine.min affine_map<(d0)[s0] -> (-d0 + 1024, s0)>(%arg1)[%workgroup_size_x] %15 = linalg.init_tensor [%13, %14] : tensor %16 = linalg.fill(%cst, %15) : f32, tensor -> tensor - %17 = linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = {passPipeline = "LLVMGPUMatmulSimt", tileSizes = [[32, 256, 64]], workgroupSize = [16, 8, 1]}} ins(%8, %10 : tensor, tensor<256x?xf32>) outs(%16 : tensor) -> tensor + %17 = linalg.matmul {__internal_linalg_transform__ = "workgroup", compilation.info = #compilation} ins(%8, %10 : tensor, tensor<256x?xf32>) outs(%16 : tensor) -> tensor flow.dispatch.tensor.store %17, %2, offsets = [%arg0, %arg1], sizes = [%11, %12], strides = [1, 1] : tensor -> !flow.dispatch.tensor } } @@ -416,10 +422,10 @@ hal.executable.variant public @cuda_nvptx_fb, target = #hal.executable.target<"c } } -// CHECK-DAG: #[[CONFIG:.+]] = {{{.*}}tileSizes = {{\[}}[32, 256, 64]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK: hal.executable.entry_point public @_lowering_config_test_dispatch_1 -// CHECK-SAME: passPipeline = "LLVMGPUMatmulSimt" -// CHECK-SAME: workloadPerWorkgroup = [256, 32] +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [16 : index, 8 : index, 1 : index] // CHECK: func @_lowering_config_test_dispatch_1 // CHECK: linalg.fill diff --git a/iree/compiler/Codegen/LLVMGPU/test/remove_loops.mlir b/iree/compiler/Codegen/LLVMGPU/test/remove_loops.mlir index 46b6931e93c2..bed9ee396197 100644 --- a/iree/compiler/Codegen/LLVMGPU/test/remove_loops.mlir +++ b/iree/compiler/Codegen/LLVMGPU/test/remove_loops.mlir @@ -42,12 +42,13 @@ hal.executable private @dispatch_0 { // ----- // CHECK-LABEL: func @workgroup_tile_loop() +#translation = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [32]> hal.executable private @workgroup_tile_loop { hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> { hal.executable.entry_point @workgroup_tile_loop attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = "LLVMGPUDistribute", workloadPerWorkgroup = [32]} + translation.info = #translation } builtin.module { builtin.func @workgroup_tile_loop() { @@ -71,12 +72,13 @@ hal.executable private @workgroup_tile_loop { // ----- // CHECK-LABEL: func @workgroup_tile_loop_negative() +#translation = #iree_codegen.translation.info<"LLVMGPUDistribute", workload_per_wg = [16]> hal.executable private @workgroup_tile_loop_negative { hal.executable.variant @cuda, target = #hal.executable.target<"cuda", "cuda-nvptx-fb"> { hal.executable.entry_point @workgroup_tile_loop_negative attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = "LLVMGPUDistribute", workloadPerWorkgroup = [16]} + translation.info = #translation } builtin.module { builtin.func @workgroup_tile_loop_negative() { diff --git a/iree/compiler/Codegen/Passes.h b/iree/compiler/Codegen/Passes.h index b513b99e292a..84b4fb5677cc 100644 --- a/iree/compiler/Codegen/Passes.h +++ b/iree/compiler/Codegen/Passes.h @@ -10,7 +10,6 @@ #include #include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "mlir/Dialect/Linalg/Transforms/Transforms.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassOptions.h" diff --git a/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp b/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp index 38240a87e672..877cac71d713 100644 --- a/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp +++ b/iree/compiler/Codegen/SPIRV/AdrenoConfig.cpp @@ -13,7 +13,9 @@ #include #include "iree/compiler/Codegen/SPIRV/KernelConfig.h" +#include "llvm/ADT/TypeSwitch.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/IR/BuiltinOps.h" namespace mlir { namespace iree_compiler { diff --git a/iree/compiler/Codegen/SPIRV/BUILD b/iree/compiler/Codegen/SPIRV/BUILD index a63331f7d242..bc50e24df4b4 100644 --- a/iree/compiler/Codegen/SPIRV/BUILD +++ b/iree/compiler/Codegen/SPIRV/BUILD @@ -39,6 +39,7 @@ cc_library( deps = [ "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/Transforms", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/Flow/IR", diff --git a/iree/compiler/Codegen/SPIRV/CMakeLists.txt b/iree/compiler/Codegen/SPIRV/CMakeLists.txt index f6bbb282df2e..9929b6367b8b 100644 --- a/iree/compiler/Codegen/SPIRV/CMakeLists.txt +++ b/iree/compiler/Codegen/SPIRV/CMakeLists.txt @@ -71,6 +71,7 @@ iree_cc_library( MLIRVectorInterfaces MLIRVectorToSPIRV iree::compiler::Codegen::Common + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::Transforms iree::compiler::Codegen::Utils diff --git a/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp b/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp index a956c04c08c4..4f9253a2b317 100644 --- a/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp +++ b/iree/compiler/Codegen/SPIRV/ConvertToSPIRVPass.cpp @@ -15,11 +15,11 @@ #include +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/SPIRV/Utils.h" #include "iree/compiler/Codegen/Utils/MarkerUtils.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "iree/compiler/Dialect/Util/IR/UtilOps.h" #include "llvm/ADT/DenseMapInfo.h" diff --git a/iree/compiler/Codegen/SPIRV/KernelConfig.cpp b/iree/compiler/Codegen/SPIRV/KernelConfig.cpp index 612947b40c09..e4439d601c29 100644 --- a/iree/compiler/Codegen/SPIRV/KernelConfig.cpp +++ b/iree/compiler/Codegen/SPIRV/KernelConfig.cpp @@ -6,11 +6,10 @@ #include "iree/compiler/Codegen/SPIRV/KernelConfig.h" +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/SPIRV/Utils.h" #include "iree/compiler/Codegen/Transforms/Transforms.h" #include "iree/compiler/Codegen/Utils/MarkerUtils.h" -#include "iree/compiler/Codegen/Utils/Utils.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" @@ -18,6 +17,7 @@ #include "mlir/Dialect/SPIRV/IR/SPIRVAttributes.h" #include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h" #include "mlir/Dialect/SPIRV/IR/TargetAndABI.h" +#include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Matchers.h" #define DEBUG_TYPE "iree-spirv-kernel-config" @@ -34,12 +34,11 @@ namespace iree_compiler { // TODO(ravishankarm): Remove this when that pipeline is deprecated. static LogicalResult setTranslationUsingDistributeToGlobalId( FuncOp funcOp, ArrayRef workgroupSize) { - auto entryPointOp = getEntryPoint(funcOp); - MLIRContext *context = entryPointOp.getContext(); - auto translationInfo = buildTranslationInfo( - IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistributeToGlobalID, - /*workloadPerWorkgroup =*/{}, context); - setTranslationInfo(entryPointOp, translationInfo, workgroupSize); + setTranslationInfo( + funcOp, + IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistributeToGlobalID, + /*workloadPerWorkgroup=*/{}, workgroupSize); + MLIRContext *context = funcOp.getContext(); OpBuilder builder(context); int64_t workgroupSizeX = workgroupSize[0]; auto numWorkgroupsFn = [workgroupSizeX](OpBuilder &b, Location loc, @@ -107,9 +106,9 @@ LogicalResult setConvOpConfig(linalg::LinalgOp linalgOp, int64_t residualThreads = subgroupSize; int64_t residualTilingFactor = bestTilingFactor; - SmallVector workgroupSize(3, 1); // (X, Y, Z) - SmallVector workgroupTileSizes(4, 0); // (N, OH, OW, OC) - SmallVector invocationTileSizes(4, 0); // (N, OH, OW, OC) + SmallVector workgroupSize(3, 1); // (X, Y, Z) + SmallVector workgroupTileSizes(4, 0); // (N, OH, OW, OC) + SmallVector invocationTileSizes(4, 0); // (N, OH, OW, OC) // Deduce the configuration for the OC dimension. for (int64_t x = residualThreads; x >= 2; x >>= 1) { @@ -181,7 +180,7 @@ LogicalResult setConvOpConfig(linalg::LinalgOp linalgOp, } } - auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize; + auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize; TileSizesListType tileSizes; tileSizes.push_back(workgroupTileSizes); tileSizes.push_back(invocationTileSizes); @@ -244,10 +243,10 @@ LogicalResult setMatmulOpConfig(linalg::LinalgOp op, int64_t residualThreads = bestX * bestY; int64_t residualTilingFactor = (bestThreadM + bestThreadK) * bestThreadN; - SmallVector workgroupSize(3, 1); // (X, Y, Z) - SmallVector workgroupTileSizes(2 + isBM, 0); // (B, M, N) - SmallVector invocationTileSizes(2 + isBM, 0); // (B, M, N) - SmallVector reductionTileSizes(3 + isBM, 0); // (B, M, N, K) + SmallVector workgroupSize(3, 1); // (X, Y, Z) + SmallVector workgroupTileSizes(2 + isBM, 0); // (B, M, N, K) + SmallVector invocationTileSizes(2 + isBM, 0); // (B, M, N, K) + SmallVector reductionTileSizes(3 + isBM, 0); // (B, M, N, K) if (isBM) workgroupTileSizes[0] = invocationTileSizes[0] = 1; @@ -302,7 +301,7 @@ LogicalResult setMatmulOpConfig(linalg::LinalgOp op, } if (reductionTileSizes[2 + isBM] == 0) return success(); - auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize; + auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize; TileSizesListType tileSizes; tileSizes.push_back(workgroupTileSizes); tileSizes.push_back(invocationTileSizes); @@ -321,13 +320,13 @@ LogicalResult setMatmulOpConfig(linalg::LinalgOp op, static LogicalResult setOpConfig(spirv::ResourceLimitsAttr limits, linalg_ext::FftOp op) { const int64_t subgroupSize = limits.subgroup_size().getValue().getSExtValue(); - auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistribute; + auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistribute; std::array workgroupSize = {subgroupSize, 1, 1}; auto partitionedLoops = getPartitionedLoops(op); unsigned loopDepth = partitionedLoops.back() + 1; - SmallVector workgroupTileSize(loopDepth, 0); + SmallVector workgroupTileSize(loopDepth, 0); // Tiling along partitioned loops with size 1. for (int64_t loopIndex : partitionedLoops) { @@ -357,7 +356,7 @@ static LogicalResult setDefaultOpConfig(spirv::ResourceLimitsAttr limits, Operation *op) { auto partitionedLoops = getPartitionedLoops(op); if (partitionedLoops.empty()) { - auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize; + auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize; std::array workgroupSize = {1, 1, 1}; auto funcOp = op->getParentOfType(); return setOpConfigAndEntryPointFnTranslation(funcOp, op, {}, {}, pipeline, @@ -367,7 +366,7 @@ static LogicalResult setDefaultOpConfig(spirv::ResourceLimitsAttr limits, const int64_t subgroupSize = limits.subgroup_size().getValue().getSExtValue(); int64_t numElementsPerWorkgroup = subgroupSize; int64_t numElementsPerThread = 1; - auto pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistribute; + auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistribute; // Returns true if the given `operand` has 32-bit element type. auto has32BitElementType = [](Value operand) { @@ -415,15 +414,15 @@ static LogicalResult setDefaultOpConfig(spirv::ResourceLimitsAttr limits, if (vectorize) { numElementsPerThread = numElementsPerWorkgroup / subgroupSize; - pipeline = IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize; + pipeline = IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize; } } std::array workgroupSize = {subgroupSize, 1, 1}; unsigned loopDepth = partitionedLoops.back() + 1; - SmallVector workgroupTileSize(loopDepth, 0); - SmallVector threadTileSize(loopDepth, 0); + SmallVector workgroupTileSize(loopDepth, 0); + SmallVector threadTileSize(loopDepth, 0); // Tiling along partitioned loops with size 1. for (int64_t loopIndex : partitionedLoops) { @@ -597,8 +596,9 @@ LogicalResult initSPIRVLaunchConfig(ModuleOp module) { SmallVector workloadPerWorkgroup(tiledLoops.size(), 1); workloadPerWorkgroup.front() = subgroupSize * 4; setTranslationInfo( - funcOp, IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistribute, - workgroupSize, workloadPerWorkgroup); + funcOp, + IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistribute, + workloadPerWorkgroup, workgroupSize); return success(); } return funcOp.emitError("contains no root Linalg operation"); @@ -610,7 +610,7 @@ LogicalResult initSPIRVLaunchConfig(ModuleOp module) { // and distributed. The rest of the compilation must be structured to either // use `TileAndFuse` or they are independent configurations that are // determined based on the op. - IREE::HAL::LoweringConfig config = getLoweringConfig(rootOperation); + IREE::Codegen::LoweringConfigAttr config = getLoweringConfig(rootOperation); for (auto op : computeOps) { if (op == rootOperation) continue; setLoweringConfig(op, config); diff --git a/iree/compiler/Codegen/SPIRV/KernelConfig.h b/iree/compiler/Codegen/SPIRV/KernelConfig.h index c0d4f31c8f24..81858940884d 100644 --- a/iree/compiler/Codegen/SPIRV/KernelConfig.h +++ b/iree/compiler/Codegen/SPIRV/KernelConfig.h @@ -17,9 +17,9 @@ #include -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "mlir/Dialect/Linalg/IR/LinalgInterfaces.h" #include "mlir/Dialect/SPIRV/IR/TargetAndABI.h" +#include "mlir/IR/BuiltinOps.h" namespace mlir { namespace iree_compiler { diff --git a/iree/compiler/Codegen/SPIRV/MaliConfig.cpp b/iree/compiler/Codegen/SPIRV/MaliConfig.cpp index 1c19955f33ad..9577d43d1838 100644 --- a/iree/compiler/Codegen/SPIRV/MaliConfig.cpp +++ b/iree/compiler/Codegen/SPIRV/MaliConfig.cpp @@ -13,7 +13,9 @@ #include #include "iree/compiler/Codegen/SPIRV/KernelConfig.h" +#include "llvm/ADT/TypeSwitch.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/IR/BuiltinOps.h" namespace mlir { namespace iree_compiler { diff --git a/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp b/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp index 3d07b75e57a9..dc1e4470dce7 100644 --- a/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp +++ b/iree/compiler/Codegen/SPIRV/NVIDIAConfig.cpp @@ -10,10 +10,12 @@ // //===----------------------------------------------------------------------===// +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/SPIRV/KernelConfig.h" #include "iree/compiler/Codegen/Utils/Utils.h" #include "llvm/Support/Debug.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/IR/BuiltinOps.h" #define DEBUG_TYPE "iree-spirv-nvidia-config" @@ -80,8 +82,8 @@ static LogicalResult setOpConfig(const spirv::TargetEnv &targetEnv, getElementType(init), lhsShape[0], rhsShape[1], lhsShape[1]); if (!coopMatSize) return success(); - auto pipeline = - IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorizeToCooperativeOps; + auto pipeline = IREE::Codegen::DispatchLoweringPassPipeline:: + SPIRVVectorizeToCooperativeOps; // For now only support one subgroup per workgroup because in the above // configuration deduction step we only consider whether the input workload is diff --git a/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp b/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp index fbc151eb6054..f559ee488bbe 100644 --- a/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp +++ b/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp @@ -4,10 +4,11 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/SPIRV/KernelConfig.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtDialect.h" @@ -36,7 +37,8 @@ class SPIRVLowerExecutableTargetPass SPIRVLowerExecutableTargetPass(const SPIRVLowerExecutableTargetPass &pass) {} void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); @@ -71,16 +73,15 @@ void SPIRVLowerExecutableTargetPass::runOnOperation() { // is fine. llvm::StringMap entryPoints = getAllEntryPoints(moduleOp); - Optional passPipeline; + Optional passPipeline; for (auto &it : entryPoints) { auto entryPointOp = it.second; - if (IREE::HAL::TranslationInfo translationInfo = + if (IREE::Codegen::TranslationInfoAttr translationInfo = getTranslationInfo(entryPointOp)) { - Optional currPipeline = - getLoweringPassPipeline(translationInfo); - if (!currPipeline) continue; + IREE::Codegen::DispatchLoweringPassPipeline currPipeline = + translationInfo.getDispatchLoweringPassPipeline(); if (passPipeline) { - if (currPipeline.getValue() != passPipeline.getValue()) { + if (currPipeline != passPipeline.getValue()) { moduleOp.emitError( "unhandled compilation of entry point function with different " "pass pipelines within a module"); @@ -97,16 +98,17 @@ void SPIRVLowerExecutableTargetPass::runOnOperation() { if (!testLoweringConfiguration && passPipeline.hasValue()) { OpPassManager &nestedModulePM = executableLoweringPipeline.nest(); switch (*passPipeline) { - case IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistribute: + case IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistribute: addSPIRVTileAndDistributePassPipeline(nestedModulePM); break; - case IREE::HAL::DispatchLoweringPassPipeline::SPIRVDistributeToGlobalID: + case IREE::Codegen::DispatchLoweringPassPipeline:: + SPIRVDistributeToGlobalID: addSPIRVDistributeToGlobalIDPassPipeline(nestedModulePM); break; - case IREE::HAL::DispatchLoweringPassPipeline::SPIRVVectorize: + case IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize: addSPIRVTileAndVectorizePassPipeline(nestedModulePM); break; - case IREE::HAL::DispatchLoweringPassPipeline:: + case IREE::Codegen::DispatchLoweringPassPipeline:: SPIRVVectorizeToCooperativeOps: addSPIRVTileAndVectorizeToCooperativeOpsPassPipeline(nestedModulePM); break; diff --git a/iree/compiler/Codegen/SPIRV/SPIRVRemoveOneTripTiledLoops.cpp b/iree/compiler/Codegen/SPIRV/SPIRVRemoveOneTripTiledLoops.cpp index 079f8953052d..21b7e62da190 100644 --- a/iree/compiler/Codegen/SPIRV/SPIRVRemoveOneTripTiledLoops.cpp +++ b/iree/compiler/Codegen/SPIRV/SPIRVRemoveOneTripTiledLoops.cpp @@ -4,6 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/Transforms/Transforms.h" @@ -112,11 +113,7 @@ class SPIRVRemoveOneTripTiledLoopPass auto translationInfo = getTranslationInfo(entryPointOp); if (!translationInfo) return; - ArrayAttr workloadPerWorkgroupAttr = translationInfo.workloadPerWorkgroup(); - if (!workloadPerWorkgroupAttr) return; - auto workloadPerWorkgroup = llvm::to_vector<4>(llvm::map_range( - workloadPerWorkgroupAttr, - [](Attribute attr) { return attr.cast().getInt(); })); + auto workloadPerWorkgroup = translationInfo.getWorkloadPerWorkgroupVals(); MLIRContext *context = &getContext(); removeOneTripTiledLoops(context, funcOp, cast(rootOp[0]), diff --git a/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp b/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp index 07b3a62864c4..15dc4fe54ced 100644 --- a/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp +++ b/iree/compiler/Codegen/SPIRV/SPIRVTileAndDistribute.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/SPIRV/Utils.h" #include "iree/compiler/Codegen/Transforms/Transforms.h" #include "iree/compiler/Codegen/Utils/MarkerUtils.h" -#include "iree/compiler/Codegen/Utils/Utils.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtOps.h" #include "iree/compiler/Dialect/LinalgExt/Transforms/Transforms.h" #include "llvm/ADT/STLExtras.h" @@ -79,11 +79,7 @@ static void populateTilingToInvocationPatterns(MLIRContext *context, RewritePatternSet &patterns) { linalg::TileSizeComputationFunction getInnerTileSizeFn = [&](OpBuilder &builder, Operation *op) { - SmallVector tileSizes = getTileSizes(op, 1); - return llvm::to_vector<4>( - llvm::map_range(tileSizes, [&](int64_t v) -> Value { - return builder.create(op->getLoc(), v); - })); + return getTileSizes(builder, op, 1); }; auto getThreadProcInfoFn = [](OpBuilder &builder, Location loc, @@ -161,11 +157,7 @@ static void populateTilingReductionPatterns( MLIRContext *context, RewritePatternSet &patterns, linalg::LinalgTransformationFilter marker) { auto getTileSizeFn = [&](OpBuilder &builder, Operation *op) { - SmallVector tileSizes = getTileSizes(op, 2); - return llvm::to_vector<4>( - llvm::map_range(tileSizes, [&](int64_t v) -> Value { - return builder.create(op->getLoc(), v); - })); + return getTileSizes(builder, op, 2); }; auto tilingOptions = linalg::LinalgTilingOptions() diff --git a/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp b/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp index dc611dec4cdc..3a3c10b5696c 100644 --- a/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp +++ b/iree/compiler/Codegen/SPIRV/SPIRVTileAndVectorizeToCooperativeOps.cpp @@ -13,6 +13,7 @@ #include +#include "iree/compiler/Codegen/Dialect/LoweringConfig.h" #include "iree/compiler/Codegen/PassDetail.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Codegen/SPIRV/KernelConfig.h" @@ -20,7 +21,6 @@ #include "iree/compiler/Codegen/Transforms/Transforms.h" #include "iree/compiler/Codegen/Utils/MarkerUtils.h" #include "iree/compiler/Codegen/Utils/Utils.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "llvm/Support/Debug.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" #include "mlir/Dialect/GPU/GPUDialect.h" diff --git a/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir b/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir index aadabbccff45..c1781560ce43 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_adreno_conv.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s // Conv - large OC - distribute to only one workgroup dimension. @@ -74,18 +74,20 @@ hal.executable @conv_112x112x512 { } } -// CHECK-LABEL: hal.executable.entry_point public @conv_112x112x512 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [256, 8, 1]} -// CHECK-SAME: workgroup_size = [64 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C2:.+]] = arith.constant 2 : index -// CHECK-NEXT: %[[C14:.+]] = arith.constant 14 : index -// CHECK-NEXT: %[[C112:.+]] = arith.constant 112 : index -// CHECK-NEXT: hal.return %[[C2]], %[[C14]], %[[C112]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [256, 8, 1]> +// CHECK: hal.executable.entry_point public @conv_112x112x512 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [64 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C2:.+]] = arith.constant 2 : index +// CHECK-NEXT: %[[C14:.+]] = arith.constant 14 : index +// CHECK-NEXT: %[[C112:.+]] = arith.constant 112 : index +// CHECK-NEXT: hal.return %[[C2]], %[[C14]], %[[C112]] -// CHECK: func @conv_112x112x512() -// CHECK: linalg.conv_2d_nhwc_hwcf -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 1, 8, 256], [0, 1, 8, 4], [0, 0, 0, 0, 1, 1, 4]]} +// CHECK: func @conv_112x112x512() +// CHECK: linalg.conv_2d_nhwc_hwcf +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -163,18 +165,20 @@ hal.executable @conv_112x112x32 { } } -// CHECK-LABEL: hal.executable.entry_point public @conv_112x112x32 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 16, 4]} -// CHECK-SAME: workgroup_size = [8 : index, 8 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[C7:.+]] = arith.constant 7 : index -// CHECK-NEXT: %[[C28:.+]] = arith.constant 28 : index -// CHECK-NEXT: hal.return %[[C1]], %[[C7]], %[[C28]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 16, 4]> +// CHECK: hal.executable.entry_point public @conv_112x112x32 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 8 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[C7:.+]] = arith.constant 7 : index +// CHECK-NEXT: %[[C28:.+]] = arith.constant 28 : index +// CHECK-NEXT: hal.return %[[C1]], %[[C7]], %[[C28]] -// CHECK: func @conv_112x112x32() -// CHECK: linalg.conv_2d_nhwc_hwcf -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 4, 16, 32], [0, 4, 2, 4], [0, 0, 0, 0, 1, 1, 4]]} +// CHECK: func @conv_112x112x32() +// CHECK: linalg.conv_2d_nhwc_hwcf +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -251,17 +255,19 @@ hal.executable @conv_16x16x16 { } } -// CHECK-LABEL: hal.executable.entry_point public @conv_16x16x16 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 8, 8]} -// CHECK-SAME: workgroup_size = [4 : index, 4 : index, 4 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[C2:.+]] = arith.constant 2 : index -// CHECK-NEXT: hal.return %[[C1]], %[[C2]], %[[C2]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 8, 8]> +// CHECK: hal.executable.entry_point public @conv_16x16x16 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [4 : index, 4 : index, 4 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[C2:.+]] = arith.constant 2 : index +// CHECK-NEXT: hal.return %[[C1]], %[[C2]], %[[C2]] -// CHECK: func @conv_16x16x16() -// CHECK: linalg.conv_2d_nhwc_hwcf -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 8, 8, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4]]} +// CHECK: func @conv_16x16x16() +// CHECK: linalg.conv_2d_nhwc_hwcf +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -340,17 +346,19 @@ hal.executable @dwconv_28x28x144 { } } -// CHECK-LABEL: hal.executable.entry_point public @dwconv_28x28x144 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]} -// CHECK-SAME: workgroup_size = [4 : index, 4 : index, 4 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C9:.+]] = arith.constant 9 : index -// CHECK-NEXT: %[[C7:.+]] = arith.constant 7 : index -// CHECK-NEXT: hal.return %[[C9]], %[[C7]], %[[C7]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]> +// CHECK: hal.executable.entry_point public @dwconv_28x28x144 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [4 : index, 4 : index, 4 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C9:.+]] = arith.constant 9 : index +// CHECK-NEXT: %[[C7:.+]] = arith.constant 7 : index +// CHECK-NEXT: hal.return %[[C9]], %[[C7]], %[[C7]] -// CHECK: func @dwconv_28x28x144() -// CHECK: linalg.depthwise_conv2D_nhw -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 4, 4, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]]} +// CHECK: func @dwconv_28x28x144() +// CHECK: linalg.depthwise_conv2D_nhw +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -428,14 +436,15 @@ hal.executable @dwconv_4x4x8 { } } } +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 4, 4]> +// CHECK: hal.executable.entry_point public @dwconv_4x4x8 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [2 : index, 4 : index, 4 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: hal.return %[[C1]], %[[C1]], %[[C1]] -// CHECK-LABEL: hal.executable.entry_point public @dwconv_4x4x8 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 4, 4]} -// CHECK-SAME: workgroup_size = [2 : index, 4 : index, 4 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: hal.return %[[C1]], %[[C1]], %[[C1]] - -// CHECK: func @dwconv_4x4x8() -// CHECK: linalg.depthwise_conv2D_nhw -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 4, 4, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]]} +// CHECK: func @dwconv_4x4x8() +// CHECK: linalg.depthwise_conv2D_nhw +// CHECK-SAME: lowering.config = #[[CONFIG]] diff --git a/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir index 8412a2808af6..251f21fcf408 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_adreno_matmul.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s // Large matmul that can match the best tiling scheme. @@ -62,18 +62,22 @@ hal.executable @matmul_1024x2048x512 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_1024x2048x512 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [128, 32]} -// CHECK-SAME: workgroup_size = [32 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 128)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_1024x2048x512() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[32, 128], [16, 4], [0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 128) +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [128, 32]> +// CHECK: hal.executable.entry_point public @matmul_1024x2048x512 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [32 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_1024x2048x512() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -139,18 +143,22 @@ hal.executable @matmul_3136x24x96 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_3136x24x96 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 448]} -// CHECK-SAME: workgroup_size = [2 : index, 32 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 448)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_3136x24x96() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[448, 8], [14, 4], [0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 448)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 448]> +// CHECK: hal.executable.entry_point public @matmul_3136x24x96 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [2 : index, 32 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_3136x24x96() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -216,18 +224,22 @@ hal.executable @matmul_196x64x192 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_196x64x192 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 28]} -// CHECK-SAME: workgroup_size = [16 : index, 4 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 64)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 28)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_196x64x192() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[28, 64], [7, 4], [0, 0, 8]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 28)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 28]> +// CHECK: hal.executable.entry_point public @matmul_196x64x192 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [16 : index, 4 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_196x64x192() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -288,18 +300,22 @@ hal.executable @matmul_12544x96x16 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_12544x96x16 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 128]} -// CHECK-SAME: workgroup_size = [8 : index, 8 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 128)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_12544x96x16() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[128, 32], [16, 4], [0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 128)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 128]> +// CHECK: hal.executable.entry_point public @matmul_12544x96x16 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 8 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_12544x96x16() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -365,18 +381,22 @@ hal.executable @matmul_49x160x576 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_49x160x576 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 7]} -// CHECK-SAME: workgroup_size = [8 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 7)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_49x160x576() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[7, 32], [7, 4], [0, 0, 8]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 7)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 7]> +// CHECK: hal.executable.entry_point public @matmul_49x160x576 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_49x160x576() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -453,17 +473,21 @@ hal.executable @batch_matmul_4x384x384 { } } -// CHECK-LABEL: hal.executable.entry_point public @batch_matmul_4x384x384 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [128, 32, 1]} -// CHECK-SAME: workgroup_size = [32 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 128)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] - -// CHECK: func @batch_matmul_4x384x384() -// CHECK: linalg.batch_matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 32, 128], [1, 16, 4], [0, 0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 128)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [128, 32, 1]> +// CHECK: hal.executable.entry_point public @batch_matmul_4x384x384 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [32 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] + +// CHECK: func @batch_matmul_4x384x384() +// CHECK: linalg.batch_matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -540,14 +564,17 @@ hal.executable @batch_matmul_4x8x8 { } } -// CHECK-LABEL: hal.executable.entry_point public @batch_matmul_4x8x8 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 8, 1]} -// CHECK-SAME: workgroup_size = [2 : index, 8 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] - -// CHECK: func @batch_matmul_4x8x8() -// CHECK: linalg.batch_matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 8, 8], [1, 1, 4], [0, 0, 0, 16]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 8, 1]> +// CHECK: hal.executable.entry_point public @batch_matmul_4x8x8 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [2 : index, 8 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] + +// CHECK: func @batch_matmul_4x8x8() +// CHECK: linalg.batch_matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] \ No newline at end of file diff --git a/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir index 0b584bfc1da5..10dc64f3c81d 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_default_matmul.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s // Odd K that forbids vectorization. @@ -74,16 +74,19 @@ hal.executable @batch_matmul_1x3x32 { } } -// CHECK-LABEL: hal.executable.entry_point public @batch_matmul_1x3x32 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [4, 1, 1]} -// CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%[[X]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y]], %[[Z]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [4, 1, 1]> +// CHECK: hal.executable.entry_point public @batch_matmul_1x3x32 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y]], %[[Z]] -// CHECK: func @batch_matmul_1x3x32() -// CHECK: linalg.batch_matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 1, 4], [1, 1, 1]]} +// CHECK: func @batch_matmul_1x3x32() +// CHECK: linalg.batch_matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -149,14 +152,17 @@ hal.executable private @matmul_64x16 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_64x16 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [4, 1]} -// CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%[[X]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y]], %[[ONE]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [4, 1]> +// CHECK: hal.executable.entry_point public @matmul_64x16 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [4 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y]], %[[ONE]] -// CHECK: func @matmul_64x16() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 4], [1, 1]]} +// CHECK: func @matmul_64x16() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] diff --git a/iree/compiler/Codegen/SPIRV/test/config_linalg_ext_ops.mlir b/iree/compiler/Codegen/SPIRV/test/config_linalg_ext_ops.mlir index ae68820d73f0..57ea3d876000 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_linalg_ext_ops.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_linalg_ext_ops.mlir @@ -1,5 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s - +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s hal.executable private @static_1d_sort { hal.interface @io { hal.interface.binding @s0b0_rw_external, set=0, binding=0, type="StorageBuffer", access="Read|Write" @@ -34,8 +33,10 @@ hal.executable private @static_1d_sort { // Check that the workgroup count and size are (1, 1, 1) for serializing the computation. -// CHECK-LABEL: hal.executable.entry_point public @static_1d_sort -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize"} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = []> +// CHECK: hal.executable.entry_point public @static_1d_sort +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [1 : index, 1 : index, 1 : index] // CHECK-NEXT: ^{{.+}}(%{{.+}}: index, %{{.+}}: index, %{{.+}}: index): // CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index @@ -43,7 +44,7 @@ hal.executable private @static_1d_sort { // CHECK: func @static_1d_sort() // CHECK: linalg_ext.sort -// CHECK-SAME: lowering.config = {} +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -98,17 +99,20 @@ hal.executable private @static_3d_sort { } } -// CHECK-LABEL: hal.executable.entry_point public @static_3d_sort -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [16, 1]} -// CHECK-SAME: workgroup_size = [16 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[DIV:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%[[X]]] -// CHECK-NEXT: hal.return %[[DIV]], %[[Y]], %[[ONE]] - -// CHECK: func @static_3d_sort() -// CHECK: linalg_ext.sort -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 0, 16], [1, 0, 1]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [16, 1]> +// CHECK: hal.executable.entry_point public @static_3d_sort +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [16 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[DIV:.+]] = affine.apply #[[MAP]]()[%[[X]]] +// CHECK-NEXT: hal.return %[[DIV]], %[[Y]], %[[ONE]] + +// CHECK: func @static_3d_sort() +// CHECK: linalg_ext.sort +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -144,18 +148,20 @@ hal.executable private @static_1d_fft_stage2 { } } -// CHECK-LABEL: hal.executable.entry_point public @static_1d_fft_stage2 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute" -// CHECK-SAME: workloadPerWorkgroup = [4]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [4]> +// CHECK: hal.executable.entry_point public @static_1d_fft_stage2 +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [16 : index, 1 : index, 1 : index] // CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %{{.+}}: index, %{{.+}}: index): // CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[T:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%[[ARG0]]] +// CHECK-NEXT: %[[T:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] // CHECK-NEXT: hal.return %[[T]], %[[ONE]], %[[ONE]] // CHECK: func @static_1d_fft_stage2() // CHECK: linalg_ext.fft -// CHECK-SAME: lowering.config = {tileSizes = {{\[}}[4]]} +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -214,14 +220,16 @@ hal.executable private @static_3d_fft_stage3 { } -// CHECK-LABEL: hal.executable.entry_point public @static_3d_fft_stage3 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute" -// CHECK-SAME: workloadPerWorkgroup = [8, 1, 1]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [8, 1, 1]> +// CHECK: hal.executable.entry_point public @static_3d_fft_stage3 +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-SAME: workgroup_size = [16 : index, 1 : index, 1 : index] // CHECK-NEXT: ^{{.+}}(%[[ARG0:.+]]: index, %[[ARG1:.+]]: index, %[[ARG2:.+]]: index): -// CHECK-NEXT: %[[T:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[ARG0]]] +// CHECK-NEXT: %[[T:.+]] = affine.apply #[[MAP]]()[%[[ARG0]]] // CHECK-NEXT: hal.return %[[T]], %[[ARG1]], %[[ARG2]] // CHECK: func @static_3d_fft_stage3() // CHECK: linalg_ext.fft -// CHECK-SAME: lowering.config = {tileSizes = {{\[}}[1, 1, 8]]} +// CHECK-SAME: lowering.config = #[[CONFIG]] diff --git a/iree/compiler/Codegen/SPIRV/test/config_linalg_ops.mlir b/iree/compiler/Codegen/SPIRV/test/config_linalg_ops.mlir index 0cb6a62c3827..4ed74419e78d 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_linalg_ops.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_linalg_ops.mlir @@ -47,9 +47,10 @@ hal.executable @tensor_insert { } } } -// CHECK: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 64)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [64, 1]> // CHECK: hal.executable.entry_point public @tensor_insert_slice -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [64, 1]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: %[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index @@ -100,10 +101,11 @@ hal.executable @tensor_insert { } } } -// CHECK-DAG: #[[CONFIG:.+]] = {tileSizes = {{\[}}[1, 16], [1, 1]{{\]}}} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config // CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [16, 1]> // CHECK: hal.executable.entry_point public @tensor_insert_slice -// CHECK-SAME: translation.info = {passPipeline = "SPIRVDistribute", workloadPerWorkgroup = [16, 1]} +// CHECK-SAME: translation.info = #[[TRANSLATION]] // CHECK-NEXT: %[[ARG0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index diff --git a/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir b/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir index 02000f274de1..644ab49b27f7 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_mali_conv.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s // Conv - large OC - distribute to only one workgroup dimension. @@ -74,18 +74,20 @@ hal.executable @conv_112x112x512 { } } -// CHECK-LABEL: hal.executable.entry_point public @conv_112x112x512 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 4, 1]} -// CHECK-SAME: workgroup_size = [16 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C8:.+]] = arith.constant 8 : index -// CHECK-NEXT: %[[C28:.+]] = arith.constant 28 : index -// CHECK-NEXT: %[[C112:.+]] = arith.constant 112 : index -// CHECK-NEXT: hal.return %[[C8]], %[[C28]], %[[C112]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 4, 1]> +// CHECK: hal.executable.entry_point public @conv_112x112x512 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [16 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C8:.+]] = arith.constant 8 : index +// CHECK-NEXT: %[[C28:.+]] = arith.constant 28 : index +// CHECK-NEXT: %[[C112:.+]] = arith.constant 112 : index +// CHECK-NEXT: hal.return %[[C8]], %[[C28]], %[[C112]] -// CHECK: func @conv_112x112x512() -// CHECK: linalg.conv_2d_nhwc_hwcf -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 1, 4, 64], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4]]} +// CHECK: func @conv_112x112x512() +// CHECK: linalg.conv_2d_nhwc_hwcf +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -163,18 +165,20 @@ hal.executable @conv_112x112x32 { } } -// CHECK-LABEL: hal.executable.entry_point public @conv_112x112x32 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 8, 1]} -// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[C14:.+]] = arith.constant 14 : index -// CHECK-NEXT: %[[C112:.+]] = arith.constant 112 : index -// CHECK-NEXT: hal.return %[[C1]], %[[C14]], %[[C112]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 8, 1]> +// CHECK: hal.executable.entry_point public @conv_112x112x32 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[C14:.+]] = arith.constant 14 : index +// CHECK-NEXT: %[[C112:.+]] = arith.constant 112 : index +// CHECK-NEXT: hal.return %[[C1]], %[[C14]], %[[C112]] -// CHECK: func @conv_112x112x32() -// CHECK: linalg.conv_2d_nhwc_hwcf -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 1, 8, 32], [0, 1, 4, 4], [0, 0, 0, 0, 1, 1, 4]]} +// CHECK: func @conv_112x112x32() +// CHECK: linalg.conv_2d_nhwc_hwcf +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -251,17 +255,19 @@ hal.executable @conv_16x16x16 { } } -// CHECK-LABEL: hal.executable.entry_point public @conv_16x16x16 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]} -// CHECK-SAME: workgroup_size = [4 : index, 2 : index, 2 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[C4:.+]] = arith.constant 4 : index -// CHECK-NEXT: hal.return %[[C1]], %[[C4]], %[[C4]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]> +// CHECK: hal.executable.entry_point public @conv_16x16x16 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [4 : index, 2 : index, 2 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[C4:.+]] = arith.constant 4 : index +// CHECK-NEXT: hal.return %[[C1]], %[[C4]], %[[C4]] -// CHECK: func @conv_16x16x16() -// CHECK: linalg.conv_2d_nhwc_hwcf -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 4, 4, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1, 4]]} +// CHECK: func @conv_16x16x16() +// CHECK: linalg.conv_2d_nhwc_hwcf +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -340,17 +346,19 @@ hal.executable @dwconv_28x28x144 { } } -// CHECK-LABEL: hal.executable.entry_point public @dwconv_28x28x144 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]} -// CHECK-SAME: workgroup_size = [4 : index, 2 : index, 2 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C9:.+]] = arith.constant 9 : index -// CHECK-NEXT: %[[C7:.+]] = arith.constant 7 : index -// CHECK-NEXT: hal.return %[[C9]], %[[C7]], %[[C7]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]> +// CHECK: hal.executable.entry_point public @dwconv_28x28x144 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [4 : index, 2 : index, 2 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C9:.+]] = arith.constant 9 : index +// CHECK-NEXT: %[[C7:.+]] = arith.constant 7 : index +// CHECK-NEXT: hal.return %[[C9]], %[[C7]], %[[C7]] -// CHECK: func @dwconv_28x28x144() -// CHECK: linalg.depthwise_conv2D_nhw -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 4, 4, 16], [0, 2, 2, 4], [0, 0, 0, 0, 1, 1]]} +// CHECK: func @dwconv_28x28x144() +// CHECK: linalg.depthwise_conv2D_nhw +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -430,14 +438,16 @@ hal.executable @dwconv_1x2x8 { } } -// CHECK-LABEL: hal.executable.entry_point public @dwconv_1x2x8 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 2, 1]} -// CHECK-SAME: workgroup_size = [2 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: hal.return %[[C1]], %[[C1]], %[[C1]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 2, 1]> +// CHECK: hal.executable.entry_point public @dwconv_1x2x8 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [2 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: hal.return %[[C1]], %[[C1]], %[[C1]] -// CHECK: func @dwconv_1x2x8() -// CHECK: linalg.depthwise_conv2D_nhw -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[0, 1, 2, 8], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]]} +// CHECK: func @dwconv_1x2x8() +// CHECK: linalg.depthwise_conv2D_nhw +// CHECK-SAME: lowering.config = #[[CONFIG]] diff --git a/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir index 4d72dc471823..fe03afa14a9f 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_mali_matmul.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s // Large matmul that can match the best tiling scheme. @@ -62,18 +62,22 @@ hal.executable @matmul_1024x2048x512 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_1024x2048x512 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 8]} -// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_1024x2048x512() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[8, 32], [4, 4], [0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 8]> +// CHECK: hal.executable.entry_point public @matmul_1024x2048x512 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_1024x2048x512() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -139,18 +143,22 @@ hal.executable @matmul_3136x24x96 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_3136x24x96 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 32]} -// CHECK-SAME: workgroup_size = [2 : index, 8 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_3136x24x96() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[32, 8], [4, 4], [0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 32]> +// CHECK: hal.executable.entry_point public @matmul_3136x24x96 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [2 : index, 8 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_3136x24x96() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -216,18 +224,22 @@ hal.executable @matmul_196x64x192 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_196x64x192 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 4]} -// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 4)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_196x64x192() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[4, 32], [2, 4], [0, 0, 8]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 4)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4]> +// CHECK: hal.executable.entry_point public @matmul_196x64x192 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_196x64x192() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -288,18 +300,22 @@ hal.executable @matmul_12544x96x16 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_12544x96x16 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 8]} -// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] - -// CHECK: func @matmul_12544x96x16() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[8, 32], [4, 4], [0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 8]> +// CHECK: hal.executable.entry_point public @matmul_12544x96x16 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[ONE]] + +// CHECK: func @matmul_12544x96x16() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -365,17 +381,20 @@ hal.executable @matmul_49x160x576 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_49x160x576 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 1]} -// CHECK-SAME: workgroup_size = [8 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y]], %[[ONE]] - -// CHECK: func @matmul_49x160x576() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 32], [1, 4], [0, 0, 8]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 1]> +// CHECK: hal.executable.entry_point public @matmul_49x160x576 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[ONE:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y]], %[[ONE]] + +// CHECK: func @matmul_49x160x576() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -452,17 +471,21 @@ hal.executable @batch_matmul_4x384x384 { } } -// CHECK-LABEL: hal.executable.entry_point public @batch_matmul_4x384x384 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [32, 12, 1]} -// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 32)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 12)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] - -// CHECK: func @batch_matmul_4x384x384() -// CHECK: linalg.batch_matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 12, 32], [1, 6, 4], [0, 0, 0, 4]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 32)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 12)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 12, 1]> +// CHECK: hal.executable.entry_point public @batch_matmul_4x384x384 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [8 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] + +// CHECK: func @batch_matmul_4x384x384() +// CHECK: linalg.batch_matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -540,14 +563,18 @@ hal.executable @batch_matmul_4x2x8 { } } -// CHECK-LABEL: hal.executable.entry_point public @batch_matmul_4x2x8 -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [8, 2, 1]} -// CHECK-SAME: workgroup_size = [2 : index, 2 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 8)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 2)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] - -// CHECK: func @batch_matmul_4x2x8() -// CHECK: linalg.batch_matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[1, 2, 8], [1, 1, 4], [0, 0, 0, 8]]} +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 ceildiv 8)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 ceildiv 2)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 2, 1]> +// CHECK: hal.executable.entry_point public @batch_matmul_4x2x8 +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [2 : index, 2 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %[[Z:.+]]: index): +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP0]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP1]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[Z]] + +// CHECK: func @batch_matmul_4x2x8() +// CHECK: linalg.batch_matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] diff --git a/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir b/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir index 3968d729923b..1097c3caaae3 100644 --- a/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir +++ b/iree/compiler/Codegen/SPIRV/test/config_nvidia_matmul_cooperative_ops.mlir @@ -1,4 +1,4 @@ -// RUN: iree-opt -split-input-file -mlir-print-local-scope -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-spirv-lower-executable-target-pass{test-lowering-configuration=true}))' %s | IreeFileCheck %s #map0 = affine_map<()[s0, s1] -> (s0 * s1)> #map1 = affine_map<(d0)[s0] -> (s0, -d0 + 256)> @@ -100,18 +100,21 @@ hal.executable public @matmul_256x1024x128_div_sub { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_256x1024x128_div_sub -// CHECK-SAME: translation.info = {passPipeline = "SPIRVVectorizeToCooperativeOps", workloadPerWorkgroup = [16, 16]} -// CHECK-SAME: workgroup_size = [32 : index, 1 : index, 1 : index] -// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): -// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index -// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%[[X]]] -// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply affine_map<()[s0] -> (s0 ceildiv 16)>()[%[[Y]]] -// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[C1]] +// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering.config +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0] -> (s0 ceildiv 16)> +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorizeToCooperativeOps", workload_per_wg = [16, 16]> +// CHECK: hal.executable.entry_point public @matmul_256x1024x128_div_sub +// CHECK-SAME: translation.info = #[[TRANSLATION]] +// CHECK-SAME: workgroup_size = [32 : index, 1 : index, 1 : index] +// CHECK-NEXT: ^{{.+}}(%[[X:.+]]: index, %[[Y:.+]]: index, %{{.+}}: index): +// CHECK-NEXT: %[[C1:.+]] = arith.constant 1 : index +// CHECK-NEXT: %[[X_COUNT:.+]] = affine.apply #[[MAP]]()[%[[X]]] +// CHECK-NEXT: %[[Y_COUNT:.+]] = affine.apply #[[MAP]]()[%[[Y]]] +// CHECK-NEXT: hal.return %[[X_COUNT]], %[[Y_COUNT]], %[[C1]] -// CHECK: func @matmul_256x1024x128_div_sub() -// CHECK: linalg.matmul -// CHECK-SAME{LITERAL}: lowering.config = {tileSizes = [[16, 16, 16], [16, 16, 16]]} +// CHECK: func @matmul_256x1024x128_div_sub() +// CHECK: linalg.matmul +// CHECK-SAME: lowering.config = #[[CONFIG]] // ----- @@ -194,5 +197,6 @@ hal.executable public @matmul_256x1024x8 { } } -// CHECK-LABEL: hal.executable.entry_point public @matmul_256x1024x8 -// CHECK-SAME: passPipeline = "SPIRVVectorize" +// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation.info<"SPIRVVectorize" +// CHECK: hal.executable.entry_point public @matmul_256x1024x8 +// CHECK-SAME: translation.info = #[[TRANSLATION]] diff --git a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir index fc1058e1469f..3315774f2237 100644 --- a/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir +++ b/iree/compiler/Codegen/SPIRV/test/pipeline_matmul_vectorization.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-codegen-linalg-to-spirv-pipeline))' %s | IreeFileCheck %s -#config = {tileSizes = [[8, 64], [8, 4], [0, 0, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8]> hal.executable private @fuse_and_vectorize_fill_matmul { hal.interface @io { hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read" @@ -13,7 +13,7 @@ hal.executable private @fuse_and_vectorize_fill_matmul { hal.executable.entry_point @fuse_and_vectorize_fill_matmul attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 1: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8]} + translation.info = #translation } builtin.module { func @fuse_and_vectorize_fill_matmul() { @@ -70,8 +70,8 @@ hal.executable private @fuse_and_vectorize_fill_matmul { // ----- -#config = {tileSizes = [[8, 64], [8, 4], [0, 0, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8]> hal.executable private @fuse_and_vectorize_matmul_add { hal.interface @io { hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read" @@ -83,7 +83,7 @@ hal.executable private @fuse_and_vectorize_matmul_add { hal.executable.entry_point @fuse_and_vectorize_matmul_add attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 1: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8]} + translation.info = #translation } builtin.module { func @fuse_and_vectorize_matmul_add() { diff --git a/iree/compiler/Codegen/SPIRV/test/remove_one_trip_tiled_loop.mlir b/iree/compiler/Codegen/SPIRV/test/remove_one_trip_tiled_loop.mlir index 2dd847e3e27a..6b8b7bcc91b7 100644 --- a/iree/compiler/Codegen/SPIRV/test/remove_one_trip_tiled_loop.mlir +++ b/iree/compiler/Codegen/SPIRV/test/remove_one_trip_tiled_loop.mlir @@ -1,5 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-remove-one-trip-tiled-loop))))' %s | IreeFileCheck %s +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]> hal.executable private @static_shaped_conv { hal.interface @io { hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read" @@ -9,7 +11,7 @@ hal.executable private @static_shaped_conv { hal.executable.variant @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb"> { hal.executable.entry_point @static_shaped_conv attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]}, + translation.info = #translation, workgroup_size = [4 : index, 4 : index, 1 : index] } builtin.module { @@ -46,8 +48,8 @@ hal.executable private @static_shaped_conv { %16 = affine.min affine_map<(d0) -> (4, -d0 + 112)>(%arg0) %17 = affine.min affine_map<(d0) -> (4, -d0 + 112)>(%arg1) %18 = memref.subview %2[0, %arg0, %arg1, %arg2] [1, %16, %17, %14] [1, 1, 1, 1] : memref<1x112x112x32xf32> to memref<1x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 401408 + s0 + d1 * 3584 + d2 * 32 + d3)>> - linalg.fill(%cst, %18) {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[0, 4, 4, 16], [], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4]]}} : f32, memref<1x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 401408 + s0 + d1 * 3584 + d2 * 32 + d3)>> - linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, lowering.config = {tileSizes = [[0, 4, 4, 16], [], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4]]}, strides = dense<2> : tensor<2xi64>} + linalg.fill(%cst, %18) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f32, memref<1x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 401408 + s0 + d1 * 3584 + d2 * 32 + d3)>> + linalg.conv_2d_nhwc_hwcf {__internal_linalg_transform__ = "workgroup", dilations = dense<1> : tensor<2xi64>, lowering.config = #config, strides = dense<2> : tensor<2xi64>} ins(%13, %15 : memref<1x?x?x3xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 151875 + s0 + d1 * 675 + d2 * 3 + d3)>>, memref<3x3x3x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 288 + s0 + d1 * 96 + d2 * 32 + d3)>>) outs(%18 : memref<1x?x?x?xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 401408 + s0 + d1 * 3584 + d2 * 32 + d3)>>) } diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir index bdd9652f5844..b12a0993fe99 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_scatter.mlir @@ -1,5 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-tile-and-distribute))))' %s | IreeFileCheck %s +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [16, 1]> hal.executable private @static_scatter_update_slice { hal.interface @io { hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read" @@ -10,7 +12,7 @@ hal.executable private @static_scatter_update_slice { hal.executable.variant @vulkan_spirv_fb, target = #hal.executable.target<"vulkan", "vulkan-spirv-fb"> { hal.executable.entry_point @static_scatter_update_slice attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = 5 : i32, workloadPerWorkgroup = [16, 1]}, + translation.info = #translation, workgroup_size = [16 : index, 1 : index, 1 : index] } @@ -36,7 +38,7 @@ hal.executable private @static_scatter_update_slice { %8 = memref.subview %1[%arg0, 0] [1, 1] [1, 1] : memref<40x1xi32> to memref<1x1xi32, affine_map<(d0, d1)[s0] -> (d0 + s0 + d1)>> %9 = memref.cast %8 : memref<1x1xi32, affine_map<(d0, d1)[s0] -> (d0 + s0 + d1)>> to memref (d0 + s0 + d1)>> %10 = memref.subview %2[0, %arg1] [100, %5] [1, 1] : memref<100x500xi32> to memref<100x?xi32, affine_map<(d0, d1)[s0] -> (d0 * 500 + s0 + d1)>> - linalg_ext.scatter {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[1, 16], [1, 1]]}} ins(%7, %9 : memref (d0 * 500 + s0 + d1)>>, memref (d0 + s0 + d1)>>) outs(%10 : memref<100x?xi32, affine_map<(d0, d1)[s0] -> (d0 * 500 + s0 + d1)>>) { + linalg_ext.scatter {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%7, %9 : memref (d0 * 500 + s0 + d1)>>, memref (d0 + s0 + d1)>>) outs(%10 : memref<100x?xi32, affine_map<(d0, d1)[s0] -> (d0 * 500 + s0 + d1)>>) { ^bb0(%arg2: i32, %arg3: i32): // no predecessors linalg_ext.yield %arg2 : i32 } diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir index 201f344c7665..345c7317a644 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_distribute_sort.mlir @@ -1,5 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-tile-and-distribute, cse))))' %s | IreeFileCheck %s +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVDistribute", workload_per_wg = [16, 1]> hal.executable private @static_3d_sort { hal.interface @io { hal.interface.binding @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read" @@ -8,7 +10,7 @@ hal.executable private @static_3d_sort { hal.executable.variant @vulkan_spirv_fb, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> { hal.executable.entry_point @static_3d_sort attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = 5 : i32, workloadPerWorkgroup = [16, 1]}, + translation.info = #translation, workgroup_size = [16 : index, 1 : index, 1 : index] } builtin.module { @@ -30,8 +32,8 @@ hal.executable private @static_3d_sort { %5 = memref.cast %4 : memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>> to memref %6 = memref.subview %1[%arg0, 0, %arg1] [1, 32, 16] [1, 1, 1] : memref<64x32x128xi32> to memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>> %7 = memref.cast %6 : memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>> to memref (d0 * 4096 + s0 + d1 * 128 + d2)>> - linalg.copy(%5, %6) {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[1, 0, 16], [1, 0, 1]]}} : memref, memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>> - linalg_ext.sort dimension(1) {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[1, 0, 16], [1, 0, 1]]}} outs(%7 : memref (d0 * 4096 + s0 + d1 * 128 + d2)>>) { + linalg.copy(%5, %6) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : memref, memref<1x32x16xi32, affine_map<(d0, d1, d2)[s0] -> (d0 * 4096 + s0 + d1 * 128 + d2)>> + linalg_ext.sort dimension(1) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} outs(%7 : memref (d0 * 4096 + s0 + d1 * 128 + d2)>>) { ^bb0(%arg2: i32, %arg3: i32): // no predecessors %8 = arith.cmpi slt, %arg2, %arg3 : i32 linalg_ext.yield %8 : i1 diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir index 8bf2d36bced6..958207c71242 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize.mlir @@ -8,7 +8,8 @@ #map5 = affine_map<(d0, d1, d2) -> (d2, d1)> #map6 = affine_map<(d0, d1, d2) -> (d0, d1)> -#config = {tileSizes = [[8, 16], [1, 1], [0, 0, 1]]} +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [8, 16]> hal.executable private @matmul { hal.interface @io { @@ -20,7 +21,7 @@ hal.executable private @matmul { hal.executable.entry_point @matmul attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 8: index, 1: index], - translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [8, 16]} + translation.info = #translation } builtin.module { func @matmul() { @@ -82,8 +83,8 @@ hal.executable private @matmul { // ----- -#config = {tileSizes = [[1, 4, 32], [1, 1, 1]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4, 1]> hal.executable private @conv_1d { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -94,7 +95,7 @@ hal.executable private @conv_1d { hal.executable.entry_point @conv_1d attributes { interface = @io, ordinal = 0 : index, workgroup_size = [32: index, 4: index, 1: index], - translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]} + translation.info = #translation } builtin.module { func @conv_1d() { @@ -165,8 +166,8 @@ hal.executable private @conv_1d { #map6 = affine_map<(d0)[s0] -> (4, -d0 + s0)> #map7 = affine_map<(d0)[s0] -> (32, -d0 + s0)> -#config = {tileSizes = [[0, 1, 4, 32], [0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4, 1]> hal.executable private @conv_no_padding { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -177,7 +178,7 @@ hal.executable private @conv_no_padding { hal.executable.entry_point @conv_no_padding attributes { interface = @io, ordinal = 0 : index, workgroup_size = [32: index, 4: index, 1: index], - translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]} + translation.info = #translation } builtin.module { func @conv_no_padding() { @@ -292,8 +293,8 @@ hal.executable private @conv_no_padding { // ----- -#config = {tileSizes = [[0, 0, 1, 4, 32], [0, 0, 1, 1, 1]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4, 1]> hal.executable private @conv_3d { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -304,7 +305,7 @@ hal.executable private @conv_3d { hal.executable.entry_point @conv_3d attributes { interface = @io, ordinal = 0 : index, workgroup_size = [32: index, 4: index, 1: index], - translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]} + translation.info = #translation } builtin.module { func @conv_3d() { @@ -365,8 +366,8 @@ hal.executable private @conv_3d { #map6 = affine_map<()[s0] -> (32, s0 * -32 + 13)> #map7 = affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 1092 + s0 + d1 * 78 + d2 * 6 + d3)> -#config = {tileSizes = [[1, 4, 32], [1, 1, 1]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [32, 4, 1]> module { hal.executable private @pooling_nhwc_max { hal.interface @io { @@ -378,7 +379,7 @@ module { hal.executable.entry_point @pooling_nhwc_max attributes { interface = @io, ordinal = 0 : index, workgroup_size = [32: index, 4: index, 1: index], - translation.info = {passPipeline = 6 : i32, workloadPerWorkgroup = [32, 4, 1]} + translation.info = #translation } builtin.module { func @pooling_nhwc_max() { diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir index 2cd1b62aeaf9..6018c6b94794 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_batch_matmul.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-set-num-workgroups,builtin.module(builtin.func(iree-spirv-tile-and-distribute,iree-spirv-vectorize))))' -canonicalize -cse %s | IreeFileCheck %s -#config = {tileSizes = [[1, 8, 64], [1, 8, 4], [0, 0, 0, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8, 1]> hal.executable private @batch_matmul_static_shape { hal.interface private @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -12,7 +12,7 @@ hal.executable private @batch_matmul_static_shape { hal.executable.entry_point @batch_matmul_static_shape attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 1: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8, 1]} + translation.info = #translation } builtin.module { func @batch_matmul_static_shape() { @@ -370,8 +370,8 @@ hal.executable private @batch_matmul_static_shape { // ----- -#config = {tileSizes = [[1, 8, 64], [1, 8, 4], [0, 0, 0, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8, 1]> hal.executable private @fused_fill_batch_matmul { hal.interface private @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -382,7 +382,7 @@ hal.executable private @fused_fill_batch_matmul { hal.executable.entry_point @fused_fill_batch_matmul attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 1: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8, 1]} + translation.info = #translation } builtin.module { func @fused_fill_batch_matmul() { diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir index ffd3b8f83b13..757dd467d2d2 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_conv.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-set-num-workgroups,builtin.module(builtin.func(canonicalize,iree-spirv-remove-one-trip-tiled-loop,iree-spirv-tile-and-distribute,iree-spirv-vectorize))))' -canonicalize -cse %s | IreeFileCheck %s -#config = {tileSizes = [[0, 4, 4, 16], [0, 4, 1, 4], [0, 0, 0, 0, 1, 1, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]> hal.executable private @conv_static_shape_f32 { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -13,7 +13,7 @@ hal.executable private @conv_static_shape_f32 { interface = @io, ordinal = 0 : index, workgroup_size = [4: index, 4: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]} + translation.info = #translation } { ^bb0(%arg0 : index, %arg1 : index, %arg2 : index): %x = arith.constant 2: index @@ -99,8 +99,8 @@ hal.executable private @conv_static_shape_f32 { // ----- -#config = {tileSizes = [[0, 4, 4, 16], [0, 1, 1, 4], [0, 0, 0, 0, 1, 1]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [16, 4, 4]> hal.executable private @depthwise_conv_static_shape_f32 { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -112,7 +112,7 @@ hal.executable private @depthwise_conv_static_shape_f32 { interface = @io, ordinal = 0 : index, workgroup_size = [4: index, 4: index, 4: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [16, 4, 4]} + translation.info = #translation } { ^bb0(%arg0 : index, %arg1 : index, %arg2 : index): %x = arith.constant 6: index diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir index 0906c264c185..a3c5db1f5a0b 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_matmul.mlir @@ -1,7 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(iree-set-num-workgroups,builtin.module(builtin.func(iree-spirv-tile-and-distribute,iree-spirv-vectorize))))' -canonicalize -cse %s | IreeFileCheck %s -#config = {tileSizes = [[8, 64], [8, 4], [0, 0, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8]> hal.executable private @matmul_static_shape_f16 { hal.interface private @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -12,7 +12,7 @@ hal.executable private @matmul_static_shape_f16 { hal.executable.entry_point @matmul_static_shape_f16 attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 1: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8]} + translation.info = #translation } builtin.module { func @matmul_static_shape_f16() { @@ -66,8 +66,8 @@ hal.executable private @matmul_static_shape_f16 { // ----- -#config = {tileSizes = [[8, 64], [8, 4], [0, 0, 4]]} - +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorize", workload_per_wg = [64, 8]> hal.executable private @matmul_static_shape_f32 { hal.interface private @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -78,7 +78,7 @@ hal.executable private @matmul_static_shape_f32 { hal.executable.entry_point @matmul_static_shape_f32 attributes { interface = @io, ordinal = 0 : index, workgroup_size = [16: index, 1: index, 1: index], - translation.info = {passPipeline = "SPIRVVectorize", workloadPerWorkgroup = [64, 8]} + translation.info = #translation } builtin.module { func @matmul_static_shape_f32() { diff --git a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir index e63bd03118d0..42e13de893df 100644 --- a/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir +++ b/iree/compiler/Codegen/SPIRV/test/tile_and_vectorize_to_cooperative_ops.mlir @@ -1,5 +1,7 @@ // RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-tile-and-vectorize-to-cooperative-ops))))' %s | IreeFileCheck %s +#config = #iree_codegen.lowering.config +#translation = #iree_codegen.translation.info<"SPIRVVectorizeToCooperativeOps", workload_per_wg = [16, 16]> hal.executable public @matmul_256x1024x128_div_sub { hal.interface public @io { hal.interface.binding public @s0b0_ro_external, set=0, binding=0, type="StorageBuffer", access="Read" @@ -28,7 +30,7 @@ hal.executable public @matmul_256x1024x128_div_sub { subgroup_size = 32 : i32}>}> { hal.executable.entry_point public @matmul_256x1024x128_div_sub attributes { interface = @io, ordinal = 0 : index, - translation.info = {passPipeline = "SPIRVVectorizeToCooperativeOps", workloadPerWorkgroup = [16, 16]}, + translation.info = #translation, workgroup_size = [32 : index, 1 : index, 1 : index] } { ^bb0(%arg0: index, %arg1: index, %arg2: index): // no predecessors @@ -63,14 +65,14 @@ hal.executable public @matmul_256x1024x128_div_sub { %11 = memref.subview %2[%arg0, 0] [16, 128] [1, 1] : memref<256x128xf16> to memref<16x128xf16, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> %12 = memref.subview %3[0, %arg1] [128, 16] [1, 1] : memref<128x1024xf16> to memref<128x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> %13 = memref.subview %4[%arg0, %arg1] [16, 16] [1, 1] : memref<256x1024xf16> to memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> - linalg.fill(%cst, %13) {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[16, 16, 16], [16, 16, 16]]}} : f16, memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> - linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[16, 16, 16], [16, 16, 16]]}} + linalg.fill(%cst, %13) {__internal_linalg_transform__ = "workgroup", lowering.config = #config} : f16, memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>> + linalg.matmul {__internal_linalg_transform__ = "workgroup", lowering.config = #config} ins(%11, %12 : memref<16x128xf16, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>, memref<128x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>) outs(%13 : memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>) linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%13, %9, %10 : memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>, memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>, memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>) outs(%13 : memref<16x16xf16, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>) - attrs = {__internal_linalg_transform__ = "workgroup", lowering.config = {tileSizes = [[16, 16, 16], [16, 16, 16]]}} { + attrs = {__internal_linalg_transform__ = "workgroup", lowering.config = #config} { ^bb0(%arg2: f16, %arg3: f16, %arg4: f16, %arg5: f16): // no predecessors %14 = arith.divf %arg2, %arg3 : f16 %15 = arith.subf %14, %arg4 : f16 diff --git a/iree/compiler/Codegen/SPIRV/test/vectorize_elementwise_ops.mlir b/iree/compiler/Codegen/SPIRV/test/vectorize_elementwise_ops.mlir index 667dab3a02c3..3785956a2c20 100644 --- a/iree/compiler/Codegen/SPIRV/test/vectorize_elementwise_ops.mlir +++ b/iree/compiler/Codegen/SPIRV/test/vectorize_elementwise_ops.mlir @@ -5,6 +5,7 @@ // CHECK: vector.transfer_read %{{.+}}[%c0], {{.+}} memref<4xf32, #{{.+}}>, vector<4xf32> // CHECK: addf %{{.*}}, %{{.*}} : vector<4xf32> // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, memref<4xf32 +#config = #iree_codegen.lowering.config hal.executable private @elementwise_static_shape { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -24,7 +25,7 @@ hal.executable private @elementwise_static_shape { %ret0 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<128xf32> linalg.generic { __internal_linalg_transform__ = "workgroup", - lowering.config = {tileSizes = [[128], [4]]}, + lowering.config = #config, indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], @@ -54,6 +55,7 @@ hal.executable private @elementwise_static_shape { // CHECK-NOT: vector.transfer_read // CHECK: scf.for // CHECK: scf.for +#config = #iree_codegen.lowering.config hal.executable private @elementwise_transpose { hal.interface @io { hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" @@ -73,7 +75,7 @@ hal.executable private @elementwise_transpose { %ret0 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<128x8xf32> linalg.generic { __internal_linalg_transform__ = "workgroup", - lowering.config = {tileSizes = [[1, 32], [1, 1]]}, + lowering.config = #config, indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], diff --git a/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir b/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir index bd16b5d9cacb..618ec1f128e0 100644 --- a/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir +++ b/iree/compiler/Codegen/SPIRV/test/vectorize_matmul.mlir @@ -1,5 +1,6 @@ // RUN: iree-opt -split-input-file -iree-spirv-vectorize %s | IreeFileCheck %s +#config = #iree_codegen.lowering.config func @matmul_2x128x4() { %c0 = arith.constant 0 : index %c128 = arith.constant 128 : index @@ -25,10 +26,10 @@ func @matmul_2x128x4() { %11 = "gpu.thread_id"() {dimension = "y"} : () -> index %12 = affine.apply affine_map<()[s0] -> (s0 * 4)>()[%10] %13 = memref.subview %9[%11, %12] [1, 4] [1, 1] : memref<2x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> to memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> - linalg.fill(%cst, %13) {__internal_linalg_transform__ = "vectorize", lowering.config = {tileSizes = [[2, 128], [], [1, 4], [0, 0, 4]]}} : f32, memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> + linalg.fill(%cst, %13) {__internal_linalg_transform__ = "vectorize", lowering.config = #config} : f32, memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> %17 = memref.subview %7[%11, 0] [1, 4] [1, 1] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 4 + s0 + d1)>> to memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 4 + s0 + d1)>> %18 = memref.subview %8[0, %12] [4, 4] [1, 1] : memref<4x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> to memref<4x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>> - linalg.matmul {__internal_linalg_transform__ = "vectorize", lowering.config = {tileSizes = [[2, 128], [], [1, 4], [0, 0, 4]]}} + linalg.matmul {__internal_linalg_transform__ = "vectorize", lowering.config = #config} ins(%17, %18 : memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 4 + s0 + d1)>>, memref<4x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>) outs(%13 : memref<1x4xf32, affine_map<(d0, d1)[s0] -> (d0 * 128 + s0 + d1)>>) } diff --git a/iree/compiler/Codegen/Utils/Utils.cpp b/iree/compiler/Codegen/Utils/Utils.cpp index 4f55f594f7f6..58a1e0494c35 100644 --- a/iree/compiler/Codegen/Utils/Utils.cpp +++ b/iree/compiler/Codegen/Utils/Utils.cpp @@ -19,6 +19,10 @@ namespace mlir { namespace iree_compiler { +//===----------------------------------------------------------------------===// +// Utility functions to get entry point(s) +//===----------------------------------------------------------------------===// + bool isEntryPoint(FuncOp func) { return func.isPublic(); } IREE::HAL::ExecutableEntryPointOp getEntryPoint(FuncOp funcOp) { @@ -41,21 +45,9 @@ llvm::StringMap getAllEntryPoints( return entryPointOps; } -IREE::HAL::TranslationInfo getTranslationInfo(FuncOp funcOp) { - auto entryPointOp = getEntryPoint(funcOp); - if (!entryPointOp) return nullptr; - return getTranslationInfo(entryPointOp); -} - -void setTranslationInfo(FuncOp entryPointFn, - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workgroupSize, - ArrayRef workloadPerWorkgroup) { - auto entryPointOp = getEntryPoint(entryPointFn); - auto translationInfo = buildTranslationInfo( - passPipeline, workloadPerWorkgroup, entryPointFn.getContext()); - setTranslationInfo(entryPointOp, translationInfo, workgroupSize); -} +//===----------------------------------------------------------------------===// +// Utility functions used in setting default configurations. +//===----------------------------------------------------------------------===// SmallVector getPartitionedLoops(Operation *op) { if (auto mmt4dOp = dyn_cast(op)) { @@ -80,45 +72,6 @@ SmallVector getPartitionedLoops(Operation *op) { return {}; } -LogicalResult setOpConfigAndEntryPointFnTranslation( - FuncOp entryPointFn, Operation *op, IREE::HAL::LoweringConfig config, - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workgroupSize) { - auto partitionedLoops = getPartitionedLoops(op); - SmallVector workloadPerWorkgroup; - auto tileSizes = getTileSizes(config, 0); - if (!tileSizes.empty() && !partitionedLoops.empty()) { - for (unsigned depth : partitionedLoops) { - if (depth >= tileSizes.size()) { - return op->emitOpError( - "illegal configuration for lowering op, expect first level " - "tile size to contain at least ") - << partitionedLoops.back() << " elements"; - } - if (tileSizes[depth] == 0) { - return op->emitOpError("illegal to set tilesize of loop ") - << depth - << " to zero since it is set to be partitioned at the flow " - "level"; - } - workloadPerWorkgroup.push_back(tileSizes[depth]); - } - if (!workloadPerWorkgroup.empty()) { - workloadPerWorkgroup = - llvm::to_vector<3>(llvm::reverse(workloadPerWorkgroup)); - } - } - auto entryPointOp = getEntryPoint(entryPointFn); - if (!entryPointOp) { - return entryPointFn.emitOpError( - "unable to find entry point op for entry point function"); - } - IREE::HAL::TranslationInfo translationInfo = buildTranslationInfo( - passPipeline, workloadPerWorkgroup, entryPointOp->getContext()); - setTranslationInfo(entryPointOp, translationInfo, workgroupSize); - return success(); -} - /// Walk up the defs of the view, to get the untiled value. Either walks up /// `ViewOpInterface` op-chains or the `subtensor` op-chains. static Value getViewSource(Value view) { diff --git a/iree/compiler/Codegen/Utils/Utils.h b/iree/compiler/Codegen/Utils/Utils.h index 120e2c4e96f3..f0c563a1c00d 100644 --- a/iree/compiler/Codegen/Utils/Utils.h +++ b/iree/compiler/Codegen/Utils/Utils.h @@ -8,7 +8,6 @@ #define IREE_COMPILER_CODEGEN_UTILS_UTILS_H_ #include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "llvm/ADT/StringMap.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/IR/BuiltinOps.h" @@ -18,6 +17,10 @@ namespace iree_compiler { static constexpr unsigned kNumMaxParallelDims = 3; +//===----------------------------------------------------------------------===// +// Utility functions to get entry point(s) +//===----------------------------------------------------------------------===// + /// Returns true if the given `func` is a kernel dispatch entry point. bool isEntryPoint(FuncOp func); @@ -28,18 +31,9 @@ llvm::StringMap getAllEntryPoints( /// Returns the entry point op for the `funcOp`. Returns `nullptr` on failure. IREE::HAL::ExecutableEntryPointOp getEntryPoint(FuncOp funcOp); -/// Returns the translation info for the `funcOp` (by looking at the entry -/// point). Returns `nullptr` on failure. -IREE::HAL::TranslationInfo getTranslationInfo(FuncOp funcOp); - -/// Sets the translation info on the `hal.executable.entry_point` op -/// corresponding to the `entryPointFn`. Returns failure if a translation info -/// is already set on the entry point op and is incompatible with what is being -/// set. -void setTranslationInfo(FuncOp entryPointFn, - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workgroupSize, - ArrayRef workloadPerWorkgroup); +//===----------------------------------------------------------------------===// +// Utility functions used in setting default configurations. +//===----------------------------------------------------------------------===// /// Returns the loops that are partitioned during dispatch region formations, in /// order, i.e. starting from the outer-most to innermost. @@ -47,23 +41,6 @@ void setTranslationInfo(FuncOp entryPointFn, /// formation to tile and distribute the ops. SmallVector getPartitionedLoops(Operation *op); -/// Sets translation for the entry-point function based on op configuration. -LogicalResult setOpConfigAndEntryPointFnTranslation( - FuncOp entryPointFn, Operation *op, IREE::HAL::LoweringConfig config, - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workgroupSize = {}); -inline LogicalResult setOpConfigAndEntryPointFnTranslation( - FuncOp entryPointFn, Operation *op, TileSizesListTypeRef tileSizes, - ArrayRef nativeVectorSize, - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workgroupSize = {}) { - IREE::HAL::LoweringConfig config = - buildConfigAttr(tileSizes, nativeVectorSize, op->getContext()); - setLoweringConfig(op, config); - return setOpConfigAndEntryPointFnTranslation(entryPointFn, op, config, - passPipeline, workgroupSize); -} - /// Returns the untiled type of a tiled view for both tensor and memref /// types. Either walks the `ViewOpInterface` chain (for memrefs) or the /// `subtensor` op chain (for tensors). diff --git a/iree/compiler/Dialect/HAL/IR/BUILD b/iree/compiler/Dialect/HAL/IR/BUILD index 9586bfc10959..c0cdf752ae22 100644 --- a/iree/compiler/Dialect/HAL/IR/BUILD +++ b/iree/compiler/Dialect/HAL/IR/BUILD @@ -27,7 +27,6 @@ td_library( "HALDialect.td", "HALInterfaces.td", "HALOps.td", - "LoweringConfig.td", ], include = ["*.td"], ), @@ -46,14 +45,12 @@ cc_library( "HALOpFolders.cpp", "HALOps.cpp", "HALTypes.cpp", - "LoweringConfig.cpp", ], hdrs = [ "HALDialect.h", "HALOps.h", "HALTraits.h", "HALTypes.h", - "LoweringConfig.h", ], textual_hdrs = [ "HALAttrs.cpp.inc", @@ -70,18 +67,12 @@ cc_library( "HALStructs.h.inc", "HALTypeInterfaces.cpp.inc", "HALTypeInterfaces.h.inc", - "LoweringConfig.h.inc", - "LoweringConfig.cpp.inc", - "LoweringConfigEnums.h.inc", - "LoweringConfigEnums.cpp.inc", ], deps = [ ":HALInterfacesGen", ":HALOpsGen", ":HALStructsGen", ":HALTypesGen", - ":LoweringConfigEnumGen", - ":LoweringConfigGen", "//iree/compiler/Dialect/Shape/IR", "//iree/compiler/Dialect/Util/IR", "@llvm-project//llvm:Support", @@ -221,37 +212,3 @@ iree_tablegen_doc( td_file = "HALOps.td", deps = [":td_files"], ) - -gentbl_cc_library( - name = "LoweringConfigGen", - tbl_outs = [ - ( - ["-gen-struct-attr-decls"], - "LoweringConfig.h.inc", - ), - ( - ["-gen-struct-attr-defs"], - "LoweringConfig.cpp.inc", - ), - ], - tblgen = "@llvm-project//mlir:mlir-tblgen", - td_file = "LoweringConfig.td", - deps = [":td_files"], -) - -gentbl_cc_library( - name = "LoweringConfigEnumGen", - tbl_outs = [ - ( - ["-gen-enum-decls"], - "LoweringConfigEnums.h.inc", - ), - ( - ["-gen-enum-defs"], - "LoweringConfigEnums.cpp.inc", - ), - ], - tblgen = "@llvm-project//mlir:mlir-tblgen", - td_file = "LoweringConfig.td", - deps = [":td_files"], -) diff --git a/iree/compiler/Dialect/HAL/IR/CMakeLists.txt b/iree/compiler/Dialect/HAL/IR/CMakeLists.txt index 341d7ea45396..8d2316a5c360 100644 --- a/iree/compiler/Dialect/HAL/IR/CMakeLists.txt +++ b/iree/compiler/Dialect/HAL/IR/CMakeLists.txt @@ -18,7 +18,6 @@ iree_cc_library( "HALOps.h" "HALTraits.h" "HALTypes.h" - "LoweringConfig.h" TEXTUAL_HDRS "HALAttrInterfaces.cpp.inc" "HALAttrInterfaces.h.inc" @@ -34,22 +33,15 @@ iree_cc_library( "HALStructs.h.inc" "HALTypeInterfaces.cpp.inc" "HALTypeInterfaces.h.inc" - "LoweringConfig.cpp.inc" - "LoweringConfig.h.inc" - "LoweringConfigEnums.cpp.inc" - "LoweringConfigEnums.h.inc" SRCS "HALOpFolders.cpp" "HALOps.cpp" "HALTypes.cpp" - "LoweringConfig.cpp" DEPS ::HALInterfacesGen ::HALOpsGen ::HALStructsGen ::HALTypesGen - ::LoweringConfigEnumGen - ::LoweringConfigGen LLVMSupport MLIRIR MLIRMemRef @@ -145,24 +137,4 @@ iree_tablegen_doc( -gen-dialect-doc HALDialect.md ) -iree_tablegen_library( - NAME - LoweringConfigGen - TD_FILE - "LoweringConfig.td" - OUTS - -gen-struct-attr-decls LoweringConfig.h.inc - -gen-struct-attr-defs LoweringConfig.cpp.inc -) - -iree_tablegen_library( - NAME - LoweringConfigEnumGen - TD_FILE - "LoweringConfig.td" - OUTS - -gen-enum-decls LoweringConfigEnums.h.inc - -gen-enum-defs LoweringConfigEnums.cpp.inc -) - ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### diff --git a/iree/compiler/Dialect/HAL/IR/HALDialect.cpp b/iree/compiler/Dialect/HAL/IR/HALDialect.cpp index 7b38e4a34883..a3e30eba8d37 100644 --- a/iree/compiler/Dialect/HAL/IR/HALDialect.cpp +++ b/iree/compiler/Dialect/HAL/IR/HALDialect.cpp @@ -10,7 +10,6 @@ #include "iree/compiler/Dialect/HAL/Conversion/HALToVM/ConvertHALToVM.h" #include "iree/compiler/Dialect/HAL/IR/HALOps.h" #include "iree/compiler/Dialect/HAL/IR/HALTypes.h" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" #include "iree/compiler/Dialect/HAL/hal.imports.h" #include "iree/compiler/Dialect/Util/IR/UtilDialect.h" #include "iree/compiler/Dialect/VM/Conversion/ConversionDialectInterface.h" @@ -44,9 +43,6 @@ struct HALOpAsmInterface : public OpAsmDialectInterface { } else if (auto targetAttr = attr.dyn_cast()) { os << "executable_target_" << targetAttr.getSymbolNameFragment(); return AliasResult::OverridableAlias; - } else if (attr.isa()) { - os << "config"; - return AliasResult::OverridableAlias; } return AliasResult::NoAlias; } diff --git a/iree/compiler/Dialect/HAL/IR/LoweringConfig.cpp b/iree/compiler/Dialect/HAL/IR/LoweringConfig.cpp deleted file mode 100644 index 4bc7e4e32087..000000000000 --- a/iree/compiler/Dialect/HAL/IR/LoweringConfig.cpp +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h" - -#include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" - -static const char kConfigAttrName[] = "lowering.config"; -static const char kTranslationInfoAttrName[] = "translation.info"; - -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.cpp.inc" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfigEnums.cpp.inc" - -namespace mlir { -namespace iree_compiler { - -//===----------------------------------------------------------------------===// -// Helpers for getting/setting information needed to lower an executable. These -// are information that are stored as attributes on the -// `hal.executable.entry_point` -//===----------------------------------------------------------------------===// - -IREE::HAL::TranslationInfo buildTranslationInfo( - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workloadPerWorkgroup, MLIRContext *context) { - OpBuilder builder(context); - auto pipelineAttr = StringAttr::get(context, stringifyEnum(passPipeline)); - ArrayAttr workloadPerWorkgroupAttr = nullptr; - if (!workloadPerWorkgroup.empty()) { - workloadPerWorkgroupAttr = builder.getI64ArrayAttr(workloadPerWorkgroup); - } - return IREE::HAL::TranslationInfo::get(pipelineAttr, workloadPerWorkgroupAttr, - context); -} - -IREE::HAL::TranslationInfo getTranslationInfo( - IREE::HAL::ExecutableEntryPointOp entryPointOp) { - return entryPointOp->getAttrOfType( - kTranslationInfoAttrName); -} - -SmallVector getWorkgroupSize( - IREE::HAL::ExecutableEntryPointOp entryPointOp) { - SmallVector workgroupSize; - if (Optional workgroupSizeAttrList = - entryPointOp.workgroup_size()) { - workgroupSize.resize(workgroupSizeAttrList->size()); - for (auto attr : llvm::enumerate(workgroupSizeAttrList.getValue())) { - workgroupSize[attr.index()] = attr.value().cast().getInt(); - } - } - return workgroupSize; -} - -void setTranslationInfo(IREE::HAL::ExecutableEntryPointOp entryPointOp, - IREE::HAL::TranslationInfo translationInfo, - ArrayRef workgroupSize) { - entryPointOp->setAttr(kTranslationInfoAttrName, translationInfo); - // The workgroup size is set on the entry point op directly. - if (!workgroupSize.empty()) { - MLIRContext *context = entryPointOp->getContext(); - auto indexType = IndexType::get(context); - auto attrs = llvm::to_vector<4>( - llvm::map_range(workgroupSize, [&](int64_t v) -> Attribute { - return IntegerAttr::get(indexType, v); - })); - entryPointOp.workgroup_sizeAttr(ArrayAttr::get(context, attrs)); - } -} - -//===----------------------------------------------------------------------===// -// Helpers for getting/setting the `hal.lowering.*` attributes that drive the -// linalg-based lowering. -// ===----------------------------------------------------------------------===// - -IREE::HAL::LoweringConfig getLoweringConfig(Operation *op) { - return op->getAttrOfType(kConfigAttrName); -} - -void setLoweringConfig(Operation *op, IREE::HAL::LoweringConfig config) { - op->setAttr(kConfigAttrName, config); -} - -void eraseLoweringConfig(Operation *op) { op->removeAttr(kConfigAttrName); } - -//===----------------------------------------------------------------------===// -// Helpers for accessing values from the LoweringConfig attribute. -//===----------------------------------------------------------------------===// - -IREE::HAL::LoweringConfig buildConfigAttr(TileSizesListTypeRef tileSizes, - ArrayRef nativeVectorSize, - MLIRContext *context) { - OpBuilder builder(context); - ArrayAttr tileSizesAttr = nullptr; - if (!tileSizes.empty()) { - auto attrList = llvm::to_vector<4>( - llvm::map_range(tileSizes, [&](ArrayRef sizes) -> Attribute { - return builder.getI64ArrayAttr(sizes); - })); - tileSizesAttr = builder.getArrayAttr(attrList); - } - ArrayAttr nativeVectorSizeAttr = nullptr; - if (!nativeVectorSize.empty()) { - nativeVectorSizeAttr = builder.getI64ArrayAttr(nativeVectorSize); - } - return IREE::HAL::LoweringConfig::get(tileSizesAttr, nativeVectorSizeAttr, - /*passPipeline = */ nullptr, - /*workgroupSize = */ nullptr, context); -} - -TileSizesListType getTileSizes(IREE::HAL::LoweringConfig config) { - auto tileSizesAttr = config.tileSizes(); - if (!tileSizesAttr) return {}; - return llvm::to_vector<1>(llvm::map_range( - tileSizesAttr, [&](Attribute attr) -> SmallVector { - return llvm::to_vector<4>( - llvm::map_range(attr.cast(), [&](Attribute intAttr) { - return intAttr.cast().getInt(); - })); - })); -} - -SmallVector getTileSizes(IREE::HAL::LoweringConfig config, - unsigned level) { - ArrayAttr tileSizesAttr = config.tileSizes(); - if (!tileSizesAttr || tileSizesAttr.size() <= level) return {}; - return llvm::to_vector<4>(llvm::map_range( - tileSizesAttr.getValue()[level].cast(), - [&](Attribute intAttr) { return intAttr.cast().getInt(); })); -} - -SmallVector getTileSizes(OpBuilder &b, Operation *op, - unsigned level) { - return llvm::to_vector<4>( - llvm::map_range(getTileSizes(op, level), [&](int64_t t) -> Value { - return b.create(op->getLoc(), t); - })); -} - -SmallVector getNativeVectorSize(IREE::HAL::LoweringConfig config) { - ArrayAttr nativeVectorSizeAttr = config.nativeVectorSize(); - if (!nativeVectorSizeAttr) return {}; - return llvm::to_vector<4>(llvm::map_range( - nativeVectorSizeAttr, - [&](Attribute intAttr) { return intAttr.cast().getInt(); })); -} - -} // namespace iree_compiler -} // namespace mlir diff --git a/iree/compiler/Dialect/HAL/IR/LoweringConfig.h b/iree/compiler/Dialect/HAL/IR/LoweringConfig.h deleted file mode 100644 index a48d60ae45fb..000000000000 --- a/iree/compiler/Dialect/HAL/IR/LoweringConfig.h +++ /dev/null @@ -1,163 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -//===- LoweringConfig.h - Declares configuration for lowering Linalg ops --===// -// -// This file declares an attribute that drives how a dispatch region containing -// a set of operations are lowered. The attribute itself is attached to Linalg -// operations, and help converting a Linalg operation into "scalar code". -// -//===----------------------------------------------------------------------===// - -#ifndef IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_ -#define IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_ - -#include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "mlir/IR/Builders.h" -#include "mlir/IR/BuiltinAttributes.h" -#include "mlir/IR/BuiltinTypes.h" - -// clang-format off -#include "iree/compiler/Dialect/HAL/IR/LoweringConfigEnums.h.inc" -#include "iree/compiler/Dialect/HAL/IR/LoweringConfig.h.inc" -// clang-format on - -namespace mlir { -namespace iree_compiler { - -namespace IREE { -namespace HAL { - -inline bool operator==(const TranslationInfo &lhs, const TranslationInfo &rhs) { - return lhs.passPipeline() == rhs.passPipeline() && - lhs.workloadPerWorkgroup() == rhs.workloadPerWorkgroup(); -} - -inline bool operator!=(const TranslationInfo &lhs, const TranslationInfo &rhs) { - return !(lhs == rhs); -} - -} // namespace HAL -} // namespace IREE - -//===----------------------------------------------------------------------===// -// Helpers for getting/setting information needed to lower an executable. These -// are information that are stored as attributes on the -// `hal.executable.entry_point` -//===----------------------------------------------------------------------===// - -/// Builder method for IREE::HAL::TranslationInfoAttr. -IREE::HAL::TranslationInfo buildTranslationInfo( - IREE::HAL::DispatchLoweringPassPipeline passPipeline, - ArrayRef workloadPerWorkgroup, MLIRContext *context); - -/// Gets the translate executable info attribute value associated with -/// `entryPointOp`. -IREE::HAL::TranslationInfo getTranslationInfo( - IREE::HAL::ExecutableEntryPointOp entryPointOp); - -/// Get the pass pipeline specified in the `translationInfo` -inline Optional -getLoweringPassPipeline(IREE::HAL::TranslationInfo translationInfo) { - return IREE::HAL::symbolizeDispatchLoweringPassPipeline( - translationInfo.passPipeline().getValue()); -} - -/// Returns the workgroup size specified on the `entryPointOp`. -SmallVector getWorkgroupSize( - IREE::HAL::ExecutableEntryPointOp entryPointOp); - -/// Set the translate executable info with the entry point op. Overwrites the -/// existing attributes. -// TODO(ravishankarm, benvanik): Eventually all the information needed for the -// lowering will be consolidated into a single attribute with richer -// information. -void setTranslationInfo(IREE::HAL::ExecutableEntryPointOp entryPointOp, - IREE::HAL::TranslationInfo translationInfo, - ArrayRef workgroupSize = {}); - -//===----------------------------------------------------------------------===// -// Helpers for getting/setting the `hal.lowering.*` attributes that drive the -// linalg-based lowering. -// ===----------------------------------------------------------------------===// - -/// Returns the lowering configuration set for an operation. -IREE::HAL::LoweringConfig getLoweringConfig(Operation *op); - -/// Sets the lowering configuration, overwriting existing attribute values. -void setLoweringConfig(Operation *op, IREE::HAL::LoweringConfig config); - -/// Removes the lowering configuration on the operation if it exists. -void eraseLoweringConfig(Operation *op); - -//===----------------------------------------------------------------------===// -// Helpers for accessing values from the LoweringConfig attribute. -//===----------------------------------------------------------------------===// - -// TODO(ravishankarm): Struct attributes dont have a way of defining extra class -// methods. When they do, these could all be moved into the attribute definition -// itself. - -/// Stores the tile sizes to use at different levels of tiling as a vector of -/// vectors. -/// - First level tiling maps to workgroups. -/// - Second level tiling maps to subgroups. -/// - Third level tiling maps to invocations. -using TileSizesListType = SmallVector, 1>; -using TileSizesListTypeRef = ArrayRef>; - -/// Construct a lowering configuration. -IREE::HAL::LoweringConfig buildConfigAttr(TileSizesListTypeRef tileSizes, - ArrayRef nativeVectorSize, - MLIRContext *context); - -/// Get the tile sizes for all levels. -TileSizesListType getTileSizes(IREE::HAL::LoweringConfig config); - -/// Get the tile sizes for all levels for an operation if the lowering -/// configuration is set. -inline TileSizesListType getTileSizes(Operation *op) { - auto configAttr = getLoweringConfig(op); - if (!configAttr) return {}; - return getTileSizes(configAttr); -} - -/// Get the tile sizes for level `level`, if it is defined. Returns {} if tile -/// sizes are not set for that level. -SmallVector getTileSizes(IREE::HAL::LoweringConfig config, - unsigned level); - -/// Get the tile sizes for level `level` for an operation if the lowering -/// configuration for the operation is set, and tile sizes are defined for that -/// level. -inline SmallVector getTileSizes(Operation *op, unsigned level) { - auto configAttr = getLoweringConfig(op); - if (!configAttr) return {}; - return getTileSizes(configAttr, level); -} -SmallVector getTileSizes(OpBuilder &b, Operation *op, unsigned level); - -/// Gets the native vector size defined in the lowering configuration. -SmallVector getNativeVectorSize(IREE::HAL::LoweringConfig config); - -/// Gets the native vector size defined for lowering an operation, if the -/// lowering configuration is defined. If not returns empty vector. -inline SmallVector getNativeVectorSize(Operation *op) { - auto configAttr = getLoweringConfig(op); - if (!configAttr) return {}; - return getNativeVectorSize(configAttr); -} - -/// Get the pass pipeline specified in the `loweringConfig` -inline Optional -getLoweringPassPipeline(IREE::HAL::LoweringConfig config) { - return IREE::HAL::symbolizeDispatchLoweringPassPipeline( - config.passPipeline().getValue()); -} - -} // namespace iree_compiler -} // namespace mlir -#endif // IREE_COMPILER_CONVERSION_COMMON_LOWERINGCONFIG_H_ diff --git a/iree/compiler/Dialect/HAL/IR/LoweringConfig.td b/iree/compiler/Dialect/HAL/IR/LoweringConfig.td deleted file mode 100644 index 9e520ceb907e..000000000000 --- a/iree/compiler/Dialect/HAL/IR/LoweringConfig.td +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright 2021 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#ifndef IREE_COMPILER_DIALECT_HAL_IR_LOWERINGCONFIG -#define IREE_COMPILER_DIALECT_HAL_IR_LOWERINGCONFIG - -// Putting this in HAL dialect for now. -include "iree/compiler/Dialect/HAL/IR/HALDialect.td" - -// List of pre-existing pipelines for translating executables. -def CPU_Default - : StrEnumAttrCase<"CPUDefault">; -def CPU_Vectorization - : StrEnumAttrCase<"CPUVectorization">; -def CPU_TensorToVectors - : StrEnumAttrCase<"CPUTensorToVectors">; - -def LLVMGPU_SimpleDistribute - : StrEnumAttrCase<"LLVMGPUDistribute">; -def LLVMGPU_Vectorize - : StrEnumAttrCase<"LLVMGPUVectorize">; -def LLVMGPU_MatmulSimt - : StrEnumAttrCase<"LLVMGPUMatmulSimt">; - -def SPIRV_SimpleDistribute - : StrEnumAttrCase<"SPIRVDistribute">; -def SPIRV_DistributeToGlobalID - : StrEnumAttrCase<"SPIRVDistributeToGlobalID">; -def SPIRV_Vectorize - : StrEnumAttrCase<"SPIRVVectorize">; -def SPIRV_VectorizeToCooperativeOps - : StrEnumAttrCase<"SPIRVVectorizeToCooperativeOps">; -def None - : StrEnumAttrCase<"None">; - -// EnumAttrCase for all known lowerings for ops within dispatch region -// to scalar/native-vector code. -def DispatchLoweringPassPipelineEnum : StrEnumAttr< - "DispatchLoweringPassPipeline", - "identifier for pass pipeline use to lower dispatch region", - [CPU_Default, CPU_TensorToVectors, CPU_Vectorization, - LLVMGPU_SimpleDistribute, LLVMGPU_Vectorize, LLVMGPU_MatmulSimt, - SPIRV_SimpleDistribute, SPIRV_DistributeToGlobalID, - SPIRV_Vectorize, SPIRV_VectorizeToCooperativeOps, - None]> { - let cppNamespace = "::mlir::iree_compiler::IREE::HAL"; -} - -def TileSizesListAttr : - TypedArrayAttrBase { } - -// Attribute that captures information needed for translating the executables. -def TranslationInfoAttr : - StructAttr<"TranslationInfo", HAL_Dialect, [ - StructFieldAttr<"passPipeline", DispatchLoweringPassPipelineEnum>, - StructFieldAttr<"workloadPerWorkgroup", - DefaultValuedAttr>, - ]>; - -// Attribute that carries information needed to perform -// tiling/vectorization, etc. -def HAL_LoweringConfigAttr : - StructAttr<"LoweringConfig", HAL_Dialect, [ - StructFieldAttr<"tileSizes", - DefaultValuedAttr>, - StructFieldAttr<"nativeVectorSize", - DefaultValuedAttr>, - StructFieldAttr<"passPipeline", - DefaultValuedAttr< - DispatchLoweringPassPipelineEnum, - "\"IREE::HAL::DispatchLoweringPassPipeline::None\"">>, - StructFieldAttr<"workgroupSize", - DefaultValuedAttr> - ]>; - -#endif // IREE_COMPILER_DIALECT_HAL_IR_LOWERINGCONFIG diff --git a/iree/compiler/Dialect/HAL/Target/CUDA/BUILD b/iree/compiler/Dialect/HAL/Target/CUDA/BUILD index 0be0ecd2ddca..bf8438203934 100644 --- a/iree/compiler/Dialect/HAL/Target/CUDA/BUILD +++ b/iree/compiler/Dialect/HAL/Target/CUDA/BUILD @@ -42,6 +42,7 @@ cc_library( deps = [ ":cuda_libdevice", "//iree/compiler/Codegen:PassHeaders", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/LLVMGPU", "//iree/compiler/Dialect/HAL/Target", "//iree/compiler/Utils", diff --git a/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp b/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp index 51210f0fc772..4974fa9dc437 100644 --- a/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.cpp @@ -6,6 +6,7 @@ #include "iree/compiler/Dialect/HAL/Target/CUDA/CUDATarget.h" +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Dialect/HAL/Target/CUDA/LLVMPasses.h" #include "iree/compiler/Dialect/HAL/Target/CUDA/libdevice.h" @@ -150,7 +151,7 @@ class CUDATargetBackend final : public TargetBackend { std::string name() const override { return "cuda"; } void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); mlir::registerLLVMDialectTranslation(registry); mlir::registerNVVMDialectTranslation(registry); } diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/BUILD b/iree/compiler/Dialect/HAL/Target/LLVM/BUILD index 398b138f93be..710eb0fb2f48 100644 --- a/iree/compiler/Dialect/HAL/Target/LLVM/BUILD +++ b/iree/compiler/Dialect/HAL/Target/LLVM/BUILD @@ -37,6 +37,7 @@ cc_library( ":StaticLibraryGenerator", "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/LLVMCPU", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/HAL/Target", diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt index 7c24113f9522..f6ea0e49115d 100644 --- a/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt +++ b/iree/compiler/Dialect/HAL/Target/LLVM/CMakeLists.txt @@ -46,6 +46,7 @@ iree_cc_library( MLIRLLVMToLLVMIRTranslation MLIRTargetLLVMIRExport iree::compiler::Codegen::Common + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::LLVMCPU iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::Utils diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp index 4cf5cd7887d4..5275258ce1eb 100644 --- a/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/LLVM/LLVMAOTTarget.cpp @@ -8,6 +8,7 @@ #include +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Dialect/HAL/Target/LLVM/LLVMIRPasses.h" #include "iree/compiler/Dialect/HAL/Target/LLVM/LibraryBuilder.h" @@ -110,6 +111,7 @@ class LLVMAOTTargetBackend final : public TargetBackend { void getDependentDialects(DialectRegistry ®istry) const override { mlir::registerLLVMDialectTranslation(registry); + registry.insert(); } IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget( diff --git a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/BUILD b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/BUILD index 01f3323acdd7..6fd21ab87791 100644 --- a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/BUILD +++ b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/BUILD @@ -28,6 +28,7 @@ cc_library( ":SPIRVToMSL", "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/SPIRV", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/HAL/Target", diff --git a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/CMakeLists.txt index d5fecf0b9e2e..d01431a31b30 100644 --- a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/CMakeLists.txt +++ b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/CMakeLists.txt @@ -30,6 +30,7 @@ iree_cc_library( MLIRSPIRVSerialization MLIRVector iree::compiler::Codegen::Common + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::SPIRV iree::compiler::Codegen::Utils diff --git a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp index ed74c7079c0f..6d8ec736a34e 100644 --- a/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.cpp @@ -6,6 +6,7 @@ #include "iree/compiler/Dialect/HAL/Target/MetalSPIRV/MetalSPIRVTarget.h" +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Dialect/HAL/Target/MetalSPIRV/SPIRVToMSL.h" #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h" @@ -44,7 +45,8 @@ class MetalSPIRVTargetBackend : public TargetBackend { std::string name() const override { return "metal"; } void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); } IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget( diff --git a/iree/compiler/Dialect/HAL/Target/ROCM/BUILD b/iree/compiler/Dialect/HAL/Target/ROCM/BUILD index 67ffe56895b1..6c1f412886bf 100644 --- a/iree/compiler/Dialect/HAL/Target/ROCM/BUILD +++ b/iree/compiler/Dialect/HAL/Target/ROCM/BUILD @@ -31,6 +31,7 @@ cc_library( ], deps = [ "//iree/compiler/Codegen:PassHeaders", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/LLVMGPU", "//iree/compiler/Dialect/HAL/Target", "//iree/compiler/Utils", diff --git a/iree/compiler/Dialect/HAL/Target/ROCM/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/ROCM/CMakeLists.txt index a57a8b868a93..041ca473dbbc 100644 --- a/iree/compiler/Dialect/HAL/Target/ROCM/CMakeLists.txt +++ b/iree/compiler/Dialect/HAL/Target/ROCM/CMakeLists.txt @@ -38,6 +38,7 @@ iree_cc_library( MLIRROCDLToLLVMIRTranslation MLIRSupport MLIRTargetLLVMIRExport + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::LLVMGPU iree::compiler::Codegen::PassHeaders iree::compiler::Dialect::HAL::Target diff --git a/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp b/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp index 08f26434316d..90aaeb32f4c2 100644 --- a/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/ROCM/ROCMTarget.cpp @@ -8,6 +8,7 @@ #include +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h" #include "iree/compiler/Utils/FlatbufferUtils.h" @@ -77,13 +78,13 @@ class ROCMTargetBackend final : public TargetBackend { void getDependentDialects(DialectRegistry ®istry) const override { mlir::registerLLVMDialectTranslation(registry); mlir::registerROCDLDialectTranslation(registry); + registry.insert(); } IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget( MLIRContext *context) const override { Builder b(context); SmallVector configItems; - ; configItems.emplace_back(b.getIdentifier("executable_targets"), getExecutableTargets(context)); diff --git a/iree/compiler/Dialect/HAL/Target/VMVX/BUILD b/iree/compiler/Dialect/HAL/Target/VMVX/BUILD index fdf19ce140c6..4582466745b0 100644 --- a/iree/compiler/Dialect/HAL/Target/VMVX/BUILD +++ b/iree/compiler/Dialect/HAL/Target/VMVX/BUILD @@ -30,6 +30,7 @@ cc_library( ], deps = [ "//iree/compiler/Codegen:PassHeaders", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Dialect/Flow/IR", "//iree/compiler/Dialect/HAL/Target", "//iree/compiler/Dialect/Modules/VMVX/IR:VMVXDialect", diff --git a/iree/compiler/Dialect/HAL/Target/VMVX/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/VMVX/CMakeLists.txt index f21e2c09ae4e..757b3fd2bad6 100644 --- a/iree/compiler/Dialect/HAL/Target/VMVX/CMakeLists.txt +++ b/iree/compiler/Dialect/HAL/Target/VMVX/CMakeLists.txt @@ -26,6 +26,7 @@ iree_cc_library( MLIRIR MLIRPass MLIRSupport + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Dialect::Flow::IR iree::compiler::Dialect::HAL::Target diff --git a/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp b/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp index 9206b7541607..8b3fa5274e0d 100644 --- a/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.cpp @@ -6,6 +6,7 @@ #include "iree/compiler/Dialect/HAL/Target/VMVX/VMVXTarget.h" +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Dialect/Flow/IR/FlowOps.h" #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h" #include "iree/compiler/Dialect/Modules/VMVX/IR/VMVXDialect.h" @@ -35,7 +36,8 @@ class VMVXTargetBackend final : public TargetBackend { std::string name() const override { return "vmvx"; } void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); } IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget( diff --git a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/BUILD b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/BUILD index 0345228d8572..2d398cc93b0b 100644 --- a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/BUILD +++ b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/BUILD @@ -31,6 +31,7 @@ cc_library( deps = [ "//iree/compiler/Codegen:PassHeaders", "//iree/compiler/Codegen/Common", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Codegen/SPIRV", "//iree/compiler/Codegen/Utils", "//iree/compiler/Dialect/Flow/IR", diff --git a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/CMakeLists.txt b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/CMakeLists.txt index d7f45902413d..d653bfe7c1a9 100644 --- a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/CMakeLists.txt +++ b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/CMakeLists.txt @@ -34,6 +34,7 @@ iree_cc_library( MLIRSupport MLIRVector iree::compiler::Codegen::Common + iree::compiler::Codegen::Dialect::IREECodegenDialect iree::compiler::Codegen::PassHeaders iree::compiler::Codegen::SPIRV iree::compiler::Codegen::Utils diff --git a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp index 3afdcba1a76a..9af89bc04ae4 100644 --- a/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp +++ b/iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.cpp @@ -6,6 +6,7 @@ #include "iree/compiler/Dialect/HAL/Target/VulkanSPIRV/VulkanSPIRVTarget.h" +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Codegen/Passes.h" #include "iree/compiler/Dialect/Flow/IR/FlowOps.h" #include "iree/compiler/Dialect/HAL/Target/TargetRegistry.h" @@ -119,8 +120,8 @@ class VulkanSPIRVTargetBackend : public TargetBackend { std::string name() const override { return "vulkan"; } void getDependentDialects(DialectRegistry ®istry) const override { - registry - .insert(); + registry.insert(); } IREE::HAL::DeviceTargetAttr getDefaultDeviceTarget( diff --git a/iree/test/e2e/regression/lowering_config.mlir b/iree/test/e2e/regression/lowering_config.mlir index a70c4910f6e5..17b401fec304 100644 --- a/iree/test/e2e/regression/lowering_config.mlir +++ b/iree/test/e2e/regression/lowering_config.mlir @@ -1,11 +1,17 @@ -#config1 = {tileSizes = [[32, 32, 32]], passPipeline = 1 : i32} -#config2 = {tileSizes = [[64, 64, 64]], passPipeline = 1 : i32} +#compilation0 = #iree_codegen.compilation.info< + #iree_codegen.lowering.config, + #iree_codegen.translation.info<"CPUVectorization", workload_per_wg = [32, 32]>, + workgroup_size = []> +#compilation1 = #iree_codegen.compilation.info< + #iree_codegen.lowering.config, + #iree_codegen.translation.info<"CPUVectorization", workload_per_wg = [64, 64]>, + workgroup_size = []> func @lowering_config_test() { %a = util.unfoldable_constant dense<1.0> : tensor<128x256xf32> %b = util.unfoldable_constant dense<2.0> : tensor<256x512xf32> %c = util.unfoldable_constant dense<2.0> : tensor<256x1024xf32> - %d = "mhlo.dot"(%a, %b) {lowering.config = #config1} : (tensor<128x256xf32>, tensor<256x512xf32>) -> tensor<128x512xf32> - %e = "mhlo.dot"(%a, %c) {lowering.config = #config2} : (tensor<128x256xf32>, tensor<256x1024xf32>) -> tensor<128x1024xf32> + %d = "mhlo.dot"(%a, %b) {compilation.info = #compilation0} : (tensor<128x256xf32>, tensor<256x512xf32>) -> tensor<128x512xf32> + %e = "mhlo.dot"(%a, %c) {compilation.info = #compilation1} : (tensor<128x256xf32>, tensor<256x1024xf32>) -> tensor<128x1024xf32> check.expect_almost_eq_const(%d, dense<512.0> : tensor<128x512xf32>) : tensor<128x512xf32> check.expect_almost_eq_const(%e, dense<512.0> : tensor<128x1024xf32>) : tensor<128x1024xf32> return diff --git a/iree/tools/BUILD b/iree/tools/BUILD index 2723b386a396..9a68ab2a6b1a 100644 --- a/iree/tools/BUILD +++ b/iree/tools/BUILD @@ -100,6 +100,7 @@ cc_library( deps = [ "//iree/compiler/Bindings/Native/Transforms", "//iree/compiler/Bindings/TFLite/Transforms", + "//iree/compiler/Codegen/Dialect:IREECodegenDialect", "//iree/compiler/Dialect/Flow/IR", "//iree/compiler/Dialect/Flow/Transforms", "//iree/compiler/Dialect/HAL/IR:HALDialect", diff --git a/iree/tools/init_iree_dialects.h b/iree/tools/init_iree_dialects.h index b37493471664..184af7755894 100644 --- a/iree/tools/init_iree_dialects.h +++ b/iree/tools/init_iree_dialects.h @@ -14,6 +14,7 @@ #include "iree-dialects/Dialect/IREE/IREEDialect.h" #include "iree-dialects/Dialect/IREEPyDM/IR/Dialect.h" +#include "iree/compiler/Codegen/Dialect/IREECodegenDialect.h" #include "iree/compiler/Dialect/Flow/IR/FlowDialect.h" #include "iree/compiler/Dialect/HAL/IR/HALDialect.h" #include "iree/compiler/Dialect/LinalgExt/IR/LinalgExtDialect.h" @@ -32,7 +33,8 @@ namespace iree_compiler { // Add all the IREE dialects to the provided registry. inline void registerIreeDialects(DialectRegistry ®istry) { // clang-format off - registry.insert Date: Mon, 25 Oct 2021 14:04:25 -0700 Subject: [PATCH 03/22] Dump more IRs for each stage of LLVMCPUTileAndVectorizePass in debug mode. (#7364) --- ...LLVMCPUTileAndVectorizeLinalgTensorOps.cpp | 55 ++++++++++++++++++- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp b/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp index 5a110e773ef4..0c77693bb043 100644 --- a/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp +++ b/iree/compiler/Codegen/LLVMCPU/LLVMCPUTileAndVectorizeLinalgTensorOps.cpp @@ -21,7 +21,7 @@ #include "mlir/Pass/Pass.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#define DEBUG_TYPE "iree-linalg-to-llvm-tile-and-pad-workgroups" +#define DEBUG_TYPE "iree-llvmcpu-tile-and-vectorize" namespace mlir { namespace iree_compiler { @@ -73,7 +73,13 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { MLIRContext *context = &getContext(); auto funcOp = getOperation(); - // First level of tiling patterns { + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- Before LLVMCPUTileAndVectorizePass ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); + + // First level of tiling patterns { OwningRewritePatternList l1patterns(&getContext()); l1patterns.insert( @@ -90,6 +96,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(l1patterns)))) { return signalPassFailure(); } + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- After first level of tiling patterns ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } // Apply canoncalization @@ -104,6 +116,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { funcOp, std::move(canonicalizationPatterns)))) { return signalPassFailure(); } + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- After canonicalization ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } // Second level of tiling patterns{ @@ -123,7 +141,14 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(l2patterns)))) { return signalPassFailure(); } + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- After second level of tiling patterns ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } + // Apply canoncalization { OwningRewritePatternList canonicalizationPatterns(&getContext()); @@ -136,6 +161,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { funcOp, std::move(canonicalizationPatterns)))) { return signalPassFailure(); } + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- After canonicalization ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } if (!lowerToVectors) { @@ -154,6 +185,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { funcOp, std::move(vectorizationPatterns)))) { return signalPassFailure(); } + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- After vectorization ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } { @@ -163,6 +200,14 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { context); (void)applyPatternsAndFoldGreedily(funcOp, std::move(canonicalizationPatterns)); + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() + << "\n--- After folding consumer add ops into contraction op " + "iteself ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } // Apply vector specific operation lowering. @@ -181,6 +226,12 @@ void LLVMCPUTileAndVectorizePass::runOnOperation() { funcOp, std::move(vectorContractLoweringPatterns)))) { return signalPassFailure(); } + + DEBUG_WITH_TYPE(DEBUG_TYPE, { + llvm::dbgs() << "\n--- After vector specific operatrion lowering ---\n"; + funcOp.print(llvm::dbgs(), OpPrintingFlags().useLocalScope()); + llvm::dbgs() << "\n\n"; + }); } } From 2ad8bd7635b99f72fcc11329785ab42566780c86 Mon Sep 17 00:00:00 2001 From: MaheshRavishankar <1663364+MaheshRavishankar@users.noreply.github.com> Date: Mon, 25 Oct 2021 16:04:12 -0700 Subject: [PATCH 04/22] Fix assertion being hit by trying to dereference Block::iterator::end. (#7455) --- .../Dialect/Flow/IR/FlowOpFolders.cpp | 3 +- iree/compiler/Dialect/Shape/IR/Builders.cpp | 4 +- .../Dialect/Stream/IR/StreamOpFolders.cpp | 4 +- .../Dialect/Util/IR/UtilInterfaces.td | 5 ++- iree/compiler/Dialect/Util/IR/UtilTypes.cpp | 44 +++++++++++-------- iree/compiler/Dialect/Util/IR/UtilTypes.h | 5 ++- 6 files changed, 38 insertions(+), 27 deletions(-) diff --git a/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp b/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp index 1ceb06673c9a..ef12029b718f 100644 --- a/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp +++ b/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp @@ -697,7 +697,8 @@ struct ResolveShapedDim : public OpRewritePattern { return success(); } - auto dynamicDims = IREE::Util::findDynamicDims(op.source(), op); + auto dynamicDims = IREE::Util::findDynamicDims( + op.source(), op->getBlock(), Block::iterator(op.getOperation())); if (!dynamicDims.hasValue()) { return rewriter.notifyMatchFailure(op, "no dynamic dims found/usable"); } diff --git a/iree/compiler/Dialect/Shape/IR/Builders.cpp b/iree/compiler/Dialect/Shape/IR/Builders.cpp index c4557ee26aa8..8a56f16664b8 100644 --- a/iree/compiler/Dialect/Shape/IR/Builders.cpp +++ b/iree/compiler/Dialect/Shape/IR/Builders.cpp @@ -141,8 +141,8 @@ SmallVector buildOrFindDynamicDimsForValue(Location loc, Value value, // This is the first step on the path: we are going to gradually start // removing the implementation of the ShapeCarryingInterface on ops and use // the new ShapeAwareOpInterface. - auto dynamicDims = - IREE::Util::findDynamicDims(value, &*builder.getInsertionPoint()); + auto dynamicDims = IREE::Util::findDynamicDims(value, builder.getBlock(), + builder.getInsertionPoint()); if (dynamicDims.hasValue()) { return llvm::to_vector<4>(dynamicDims.getValue()); } diff --git a/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp b/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp index b77c2ef2029e..84a19314f86d 100644 --- a/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp +++ b/iree/compiler/Dialect/Stream/IR/StreamOpFolders.cpp @@ -306,7 +306,9 @@ void ResourceDeallocaOp::getCanonicalizationPatterns( OpFoldResult ResourceSizeOp::fold(ArrayRef operands) { auto sizeAwareType = operand().getType().cast(); - return sizeAwareType.findSizeValue(operand(), *this); + Operation *op = this->getOperation(); + return sizeAwareType.findSizeValue(operand(), op->getBlock(), + Block::iterator(op)); } //===----------------------------------------------------------------------===// diff --git a/iree/compiler/Dialect/Util/IR/UtilInterfaces.td b/iree/compiler/Dialect/Util/IR/UtilInterfaces.td index e092f535b99c..fec6cd44418a 100644 --- a/iree/compiler/Dialect/Util/IR/UtilInterfaces.td +++ b/iree/compiler/Dialect/Util/IR/UtilInterfaces.td @@ -367,8 +367,9 @@ def Util_SizeAwareType : TypeInterface<"SizeAwareTypeInterface"> { let extraClassDeclaration = [{ // Walks the SSA use-def chain to find the size of the type. // Returns nullptr if the size cannot be found or if it is defined after - // |forOp|. - static Value findSizeValue(Value resourceValue, Operation *forOp); + // {|block|, |insertionPoint|}. + static Value findSizeValue(Value resourceValue, Block *block, + Block::iterator insertionPoint); // Returns an SSA value representing the byte size of |value| or nullptr // if not a sized value. diff --git a/iree/compiler/Dialect/Util/IR/UtilTypes.cpp b/iree/compiler/Dialect/Util/IR/UtilTypes.cpp index f1edb9240944..20d934649aab 100644 --- a/iree/compiler/Dialect/Util/IR/UtilTypes.cpp +++ b/iree/compiler/Dialect/Util/IR/UtilTypes.cpp @@ -294,34 +294,38 @@ void excludeTiedOperandAndResultIndices( // IREE::Util::SizeAwareTypeInterface //===----------------------------------------------------------------------===// -static bool isValueUsableForOp(Value value, Operation *forOp) { - if (forOp->getBlock() == nullptr) { +static bool isValueUsableForOp(Value value, Block *block, + Block::iterator insertionPoint) { + if (block == nullptr) { // Op is not in a block; can't analyze (maybe?). return false; } auto *definingBlock = value.getParentBlock(); - if (definingBlock == forOp->getBlock()) { + if (definingBlock == block) { // Defined in the same block; ensure block order. if (value.isa()) return true; - if (value.getDefiningOp()->isBeforeInBlock(forOp)) return true; + if (insertionPoint == block->end()) return true; + if (value.getDefiningOp()->isBeforeInBlock(&*insertionPoint)) { + return true; + } } else if (definingBlock->isEntryBlock()) { // Entry block always dominates - fast path for constants. return true; } else { // See if block the value is defined in dominates the forOp block. // TODO(benvanik): optimize this, it's terribly expensive to recompute. - DominanceInfo dominanceInfo(forOp->getParentOp()); - return dominanceInfo.dominates(definingBlock, forOp->getBlock()); + DominanceInfo dominanceInfo(block->getParentOp()); + return dominanceInfo.dominates(definingBlock, block); } return false; } // static -Value SizeAwareTypeInterface::findSizeValue(Value resourceValue, - Operation *forOp) { +Value SizeAwareTypeInterface::findSizeValue(Value resourceValue, Block *block, + Block::iterator insertionPoint) { // See if the value is produced by a size-aware op; we can just ask for the // size it has tied. Walking upward is always good as we know any size we find - // dominates |forOp|. + // dominates {|block|, |insertionPoint|}. SmallVector worklist; worklist.push_back(resourceValue); while (!worklist.empty()) { @@ -347,7 +351,8 @@ Value SizeAwareTypeInterface::findSizeValue(Value resourceValue, use.getOwner())) { auto sizeValue = sizeAwareOp.getOperandSize(use.getOperandNumber()); if (sizeValue) { - if (isValueUsableForOp(sizeValue, forOp)) return sizeValue; + if (isValueUsableForOp(sizeValue, block, insertionPoint)) + return sizeValue; } } if (auto tiedOp = @@ -369,8 +374,8 @@ Value SizeAwareTypeInterface::queryValueSize(Location loc, Value resourceValue, return {}; // Not a sized type. } if (!builder.getInsertionPoint().getNodePtr()->isKnownSentinel()) { - Operation &insertionPt = *builder.getInsertionPoint(); - auto sizeValue = sizeAwareType.findSizeValue(resourceValue, &insertionPt); + auto sizeValue = sizeAwareType.findSizeValue( + resourceValue, builder.getBlock(), builder.getInsertionPoint()); if (sizeValue) { return sizeValue; // Found in IR. } @@ -414,9 +419,10 @@ ValueRange findVariadicDynamicDims(unsigned idx, ValueRange values, return dynamicDims.slice(offset, shapedType.getNumDynamicDims()); } -Optional findDynamicDims(Value shapedValue, Operation *forOp) { +Optional findDynamicDims(Value shapedValue, Block *block, + Block::iterator insertionPoint) { // Look up the use-def chain: always safe, as any value we reach dominates - // |forOp| implicitly. + // {|block|, |insertionPoint|} implicitly. SmallVector worklist; worklist.push_back(shapedValue); while (!worklist.empty()) { @@ -432,16 +438,16 @@ Optional findDynamicDims(Value shapedValue, Operation *forOp) { } } - // Look down the use-def chain: not safe at some point because we'll move - // past where |forOp| is dominated. This is often fine for a bit, though, as - // |forOp| may be a user of |shapedValue| and be able to provide the shape - // itself. + // Look down the use-def chain: not safe at some point because we'll move past + // where {|block|, |insertionPoint|} is dominated. This is often fine for a + // bit, though, as {|block|, |insertionPoint|} may be a user of |shapedValue| + // and be able to provide the shape itself. for (auto &use : shapedValue.getUses()) { if (auto shapeAwareOp = dyn_cast(use.getOwner())) { auto dynamicDims = shapeAwareOp.getOperandDynamicDims(use.getOperandNumber()); if (llvm::all_of(dynamicDims, [&](Value dim) { - return isValueUsableForOp(dim, forOp); + return isValueUsableForOp(dim, block, insertionPoint); })) { return dynamicDims; } diff --git a/iree/compiler/Dialect/Util/IR/UtilTypes.h b/iree/compiler/Dialect/Util/IR/UtilTypes.h index aec114aacded..03faf9c03690 100644 --- a/iree/compiler/Dialect/Util/IR/UtilTypes.h +++ b/iree/compiler/Dialect/Util/IR/UtilTypes.h @@ -171,8 +171,9 @@ void excludeTiedOperandAndResultIndices( // Walks the SSA use-def chain to find the dynamic dimensions of the value. // Returns None if the shape cannot be found or if it is defined after -// |forOp|. -Optional findDynamicDims(Value shapedValue, Operation *forOp); +// {|block|, |insertionPoint|}. +Optional findDynamicDims(Value shapedValue, Block *block, + Block::iterator insertionPoint); // Returns the dynamic dimensions for the value at |idx|. ValueRange findVariadicDynamicDims(unsigned idx, ValueRange values, From 1c95d1ad8f2fd43ac084d4e3f1c658bae4d67798 Mon Sep 17 00:00:00 2001 From: bjacob Date: Tue, 26 Oct 2021 11:23:09 -0400 Subject: [PATCH 05/22] enable mixed-shapes testcases (#7440) --- iree/test/e2e/regression/generate_e2e_matmul_tests.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/iree/test/e2e/regression/generate_e2e_matmul_tests.py b/iree/test/e2e/regression/generate_e2e_matmul_tests.py index 26aa509ed6ef..9fec3e6f63c0 100644 --- a/iree/test/e2e/regression/generate_e2e_matmul_tests.py +++ b/iree/test/e2e/regression/generate_e2e_matmul_tests.py @@ -99,9 +99,7 @@ def get_test_generators(): # Generators using general random matrices ("random", "random", "random", "dynamic"), ("random", "random", "random", "static"), - # TODO: enable 'mixed' testcases. For now they cause iree-opt - # errors. - #("random", "random", "random", "mixed"), + ("random", "random", "random", "mixed"), ], "large": [ # Fewer generators are used for large shapes, to limit the From 109fcfddae01d82e8a58ce7ea1149f39b68f3624 Mon Sep 17 00:00:00 2001 From: bjacob Date: Tue, 26 Oct 2021 16:28:47 -0400 Subject: [PATCH 06/22] Use dataclasses and enums in generate_e2e_matmul_tests.py (#7441) Just some improvement to how we use python in this file. Should be a no-op change. Fixes #7431 . --- .../regression/generate_e2e_matmul_tests.py | 382 +++++++++++------- 1 file changed, 245 insertions(+), 137 deletions(-) diff --git a/iree/test/e2e/regression/generate_e2e_matmul_tests.py b/iree/test/e2e/regression/generate_e2e_matmul_tests.py index 9fec3e6f63c0..bc983c352da6 100644 --- a/iree/test/e2e/regression/generate_e2e_matmul_tests.py +++ b/iree/test/e2e/regression/generate_e2e_matmul_tests.py @@ -4,116 +4,206 @@ # Licensed under the Apache License v2.0 with LLVM Exceptions. # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -"""iree_generated_check_test generator for end-to-end matrix multiplication. +"""iree_generated_trace_runner_test generator for e2e matmul tests. """ import argparse -import random import os import yaml import re - -# Returns lists of shapes as (M, K, N) tuples. -# For example (M, K, 1) is a matrix*vector product, and (M, 1, N) is an outer -# product. -def get_test_shapes(): - return { - "small": [ # Small sizes, square matrices - (x, x, x) for x in range(1, 40) - ] + [ - # Small sizes, slightly rectangular matrices - (2, 3, 4), - (8, 7, 6), - (15, 16, 17), - (14, 19, 23), - (31, 33, 32), - (25, 41, 35), - # Small sizes, involving vectors (i.e. most rectangular cases) - (10, 1, 1), - (1, 10, 1), - (1, 1, 10), - (1, 10, 10), - (10, 1, 10), - (10, 10, 1), - # Small sizes, involving other very small dimensions just above 1 - (13, 14, 2), - (3, 17, 12), - (21, 4, 18), - # Medium sizes, square matrices - (100, 100, 100), - # Medium sizes, slightly rectangular matrices - (101, 102, 103), - # Medium sizes, involving vectors (i.e. most rectangular cases) - (10000, 1, 1), - (1, 10000, 1), - (1, 1, 10000), - (1, 1000, 1000), - (1000, 1, 1000), - (1000, 1000, 1), - # Medium sizes, involving other very small dimensions just above 1 - (1300, 1300, 2), - (1300, 1300, 3), - (1300, 1300, 4), - ], - "large": [ - # Large sizes, powers of two - (256, 256, 512), - (512, 512, 128), - (1024, 512, 512), - (512, 1024, 512), - # Large sizes, powers of two minus one - (127, 63, 511), - # Large sizes, powers of two plus one - (129, 65, 513), - # Large sizes, misc. - (200, 300, 400), - (123, 456, 789), - (500, 500, 50), - # Be conservative in adding larger shapes. They can result in - # high latency tests. If you have to, consider splitting them - # out in a way that constrains the latency impact, e.g. by - # running on fewer backends/drivers or with fewer generators - # (see get_test_generators). - ] - } - - -# Returns lists of 'generators', which are tuples of the form -# (lhs_generator, rhs_generator, acc_generator, dynamicity) -# The first 3 entries specify how to generate test input data. -# The dynamicity entry chooses between static, dynamic or mixed shapes. -# -# TODO (Issue #7431): turn into enum and dataclass. -def get_test_generators(): - return { - "small": [ - # Generators using simple matrices for ease of numerical debugging. - # They don't add significant test coverage (all bugs are hit by - # tests using random matrices anyway). They are only here to make - # the bulk of our debugging easier. - ("identity", "identity", "zero", "dynamic"), - ("random", "identity", "zero", "dynamic"), - ("identity", "random", "zero", "dynamic"), - ("identity", "identity", "random", "dynamic"), - # Generators using general random matrices - ("random", "random", "random", "dynamic"), - ("random", "random", "random", "static"), - ("random", "random", "random", "mixed"), - ], - "large": [ - # Fewer generators are used for large shapes, to limit the - # latency impact. Most bugs are going to be caught on small - # shapes anyway. - ("random", "random", "random", "dynamic"), - ("random", "random", "random", "static"), - ] - } +import enum +import dataclasses + + +# Data type of matrix entries. The string values must match MLIR data types. +# This is a superset of the values accepted for the --lhs_rhs_types= flag, +# as this also includes accumulator-specific types like i32. +@enum.unique +class MatrixElemTypeId(enum.Enum): + I8 = "i8" + I32 = "i32" + F32 = "f32" + + +# Enumerates of the collections of shapes that we can generate tests for. +# The values are the accepted values for the --shapes= flag. +@enum.unique +class ShapesId(enum.Enum): + SMALL = "small" + LARGE = "large" + + +# Enumerates ways to construct MLIR tensor types. +@enum.unique +class Dynamicity(enum.Enum): + DYNAMIC = "dynamic" # Use '?' everywhere. Example: tensor. + STATIC = "static" # Use fixed values everywhere. Example: tensor<4x6xf32>. + MIXED = "mixed" # Randomly mix '?' and values. Example: tensor. + + +# Enumerates ways to initialize matrix buffer contents. +@enum.unique +class MatrixGenerator(enum.Enum): + ZERO = "zero" # Fill with zeros + IDENTITY = "identity" # Make an identity matrix (generalized to any shape). + RANDOM = "random" # Fill with (deterministic) pseudorandom values. + + +# Describes the shape of a matrix multiplication in the usual convention: +# the LHS is {m}x{k}, the RHS is {k}x{n}, the accumulator/result is {m}x{n}. +@dataclasses.dataclass +class TestShape: + m: int + k: int + n: int + + +# Describes how to construct MLIR tensor types and how to initialize buffer +# contents for a test case (for an already given TestShape, and already given +# matrix element data types). +@dataclasses.dataclass +class TestGenerator: + lhs: MatrixGenerator + rhs: MatrixGenerator + acc: MatrixGenerator + dynamicity: Dynamicity + + +# Returns the list of TestShape's to use for the collection of shapes +# identified by shapes_id. +def get_test_shapes(shapes_id: ShapesId): + if shapes_id == ShapesId.SMALL: + return [ # Small sizes, square matrices + TestShape(m=x, k=x, n=x) for x in range(1, 40) + ] + [ + # Small sizes, slightly rectangular matrices + TestShape(m=2, k=3, n=4), + TestShape(m=8, k=7, n=6), + TestShape(m=15, k=16, n=17), + TestShape(m=14, k=19, n=23), + TestShape(m=31, k=33, n=32), + TestShape(m=25, k=41, n=35), + # Small sizes, involving vectors (i.e. most rectangular cases) + TestShape(m=10, k=1, n=1), + TestShape(m=1, k=10, n=1), + TestShape(m=1, k=1, n=10), + TestShape(m=1, k=10, n=10), + TestShape(m=10, k=1, n=10), + TestShape(m=10, k=10, n=1), + # Small sizes, involving other very small dimensions just above 1 + TestShape(m=13, k=14, n=2), + TestShape(m=3, k=17, n=12), + TestShape(m=21, k=4, n=18), + # Medium sizes, square matrices + TestShape(m=100, k=100, n=100), + # Medium sizes, slightly rectangular matrices + TestShape(m=101, k=102, n=103), + # Medium sizes, involving vectors (i.e. most rectangular cases) + TestShape(m=10000, k=1, n=1), + TestShape(m=1, k=10000, n=1), + TestShape(m=1, k=1, n=10000), + TestShape(m=1, k=1000, n=1000), + TestShape(m=1000, k=1, n=1000), + TestShape(m=1000, k=1000, n=1), + # Medium sizes, involving other very small dimensions just above 1 + TestShape(m=1300, k=1300, n=2), + TestShape(m=1300, k=1300, n=3), + TestShape(m=1300, k=1300, n=4), + ] + if shapes_id == ShapesId.LARGE: + return [ + # Large sizes, powers of two + TestShape(m=256, k=256, n=512), + TestShape(m=512, k=512, n=128), + TestShape(m=1024, k=512, n=512), + TestShape(m=512, k=1024, n=512), + # Large sizes, powers of two minus one + TestShape(m=127, k=63, n=511), + # Large sizes, powers of two plus one + TestShape(m=129, k=65, n=513), + # Large sizes, misc. + TestShape(m=200, k=300, n=400), + TestShape(m=123, k=456, n=789), + TestShape(m=500, k=500, n=50), + # Be conservative in adding larger shapes. They can result in + # high latency tests. If you have to, consider splitting them + # out in a way that constrains the latency impact, e.g. by + # running on fewer backends/drivers or with fewer generators + # (see get_test_generators). + ] + raise ValueError(shapes_id) + + +# Returns the list of TestGenerator's to use for the collection of shapes +# identified by shapes_id. +def get_test_generators(shapes_id: ShapesId): + if shapes_id == ShapesId.SMALL: + return [ + # Generators using simple matrices for ease of numerical debugging. + # They don't add significant test coverage (all bugs are hit by + # tests using random matrices anyway). They are only here to make + # the bulk of our debugging easier. + TestGenerator(lhs=MatrixGenerator.IDENTITY, + rhs=MatrixGenerator.IDENTITY, + acc=MatrixGenerator.ZERO, + dynamicity=Dynamicity.DYNAMIC), + TestGenerator(lhs=MatrixGenerator.RANDOM, + rhs=MatrixGenerator.IDENTITY, + acc=MatrixGenerator.ZERO, + dynamicity=Dynamicity.DYNAMIC), + TestGenerator(lhs=MatrixGenerator.IDENTITY, + rhs=MatrixGenerator.RANDOM, + acc=MatrixGenerator.ZERO, + dynamicity=Dynamicity.DYNAMIC), + TestGenerator(lhs=MatrixGenerator.IDENTITY, + rhs=MatrixGenerator.IDENTITY, + acc=MatrixGenerator.RANDOM, + dynamicity=Dynamicity.DYNAMIC), + # Generators using general random matrices + TestGenerator(lhs=MatrixGenerator.RANDOM, + rhs=MatrixGenerator.RANDOM, + acc=MatrixGenerator.RANDOM, + dynamicity=Dynamicity.DYNAMIC), + TestGenerator(lhs=MatrixGenerator.RANDOM, + rhs=MatrixGenerator.RANDOM, + acc=MatrixGenerator.RANDOM, + dynamicity=Dynamicity.STATIC), + TestGenerator(lhs=MatrixGenerator.RANDOM, + rhs=MatrixGenerator.RANDOM, + acc=MatrixGenerator.RANDOM, + dynamicity=Dynamicity.MIXED), + ] + if shapes_id == ShapesId.LARGE: + return [ + # Fewer generators are used for large shapes, to limit the + # latency impact. Most bugs are going to be caught on small + # shapes anyway. + TestGenerator(lhs=MatrixGenerator.RANDOM, + rhs=MatrixGenerator.RANDOM, + acc=MatrixGenerator.RANDOM, + dynamicity=Dynamicity.DYNAMIC), + TestGenerator(lhs=MatrixGenerator.RANDOM, + rhs=MatrixGenerator.RANDOM, + acc=MatrixGenerator.RANDOM, + dynamicity=Dynamicity.STATIC), + ] + raise ValueError(shapes_id) # Generates a name for a test function in the generated MLIR code. -def function_name(lhs_rhs_type, accum_type, shape, gen): - return f"{lhs_rhs_type}_{gen[3]}_{gen[0]}_{shape[0]}x{shape[1]}_times_{gen[1]}_{shape[1]}x{shape[2]}_plus_{gen[2]}_{accum_type}" +def function_name(lhs_rhs_type: MatrixElemTypeId, acc_type: MatrixElemTypeId, + shape: TestShape, gen: TestGenerator): + dyn = gen.dynamicity.value + lhs_g = gen.lhs.value + rhs_g = gen.rhs.value + acc_g = gen.acc.value + input_t = lhs_rhs_type.value + acc_t = acc_type.value + m = shape.m + k = shape.k + n = shape.n + return f"{input_t}_{dyn}_{lhs_g}_{m}x{k}_times_{rhs_g}_{k}x{n}_plus_{acc_g}_{acc_t}" # Intentionally fixed seed! We want full reproducibility here, both across runs @@ -125,12 +215,12 @@ def function_name(lhs_rhs_type, accum_type, shape, gen): # Generates a compile-time MLIR size value, i.e. either a fixed positive integer # or a '?' depending on dynamicity. -def static_size(x, dynamicity): - if dynamicity == "dynamic": +def static_size(x: int, dynamicity: Dynamicity): + if dynamicity == Dynamicity.DYNAMIC: return "?" - elif dynamicity == "static": + elif dynamicity == Dynamicity.STATIC: return x - elif dynamicity == "mixed": + elif dynamicity == Dynamicity.MIXED: global local_pseudorandom_state # Same as C++ std::minstd_rand. # Using a local pseudorandom generator implementation ensures that it's @@ -144,17 +234,18 @@ def static_size(x, dynamicity): # Generates a test function in the generated MLIR code. # The generated function will take the same arguments as linalg.matmul and # will just call linalg.matmul with them, returning its result. -def generate_function(func_name, lhs_rhs_type, accum_type, shape, gen): - (m, k, n) = shape - lhs_m = static_size(m, gen[3]) - lhs_k = static_size(k, gen[3]) - rhs_k = static_size(k, gen[3]) - rhs_n = static_size(n, gen[3]) - acc_m = static_size(m, gen[3]) - acc_n = static_size(n, gen[3]) - lhs_tensor_type = f"tensor<{lhs_m}x{lhs_k}x{lhs_rhs_type}>" - rhs_tensor_type = f"tensor<{rhs_k}x{rhs_n}x{lhs_rhs_type}>" - acc_tensor_type = f"tensor<{acc_m}x{acc_n}x{accum_type}>" +def generate_function(func_name: str, lhs_rhs_type: MatrixElemTypeId, + acc_type: MatrixElemTypeId, shape: TestShape, + gen: TestGenerator): + lhs_m = static_size(shape.m, gen.dynamicity) + lhs_k = static_size(shape.k, gen.dynamicity) + rhs_k = static_size(shape.k, gen.dynamicity) + rhs_n = static_size(shape.n, gen.dynamicity) + acc_m = static_size(shape.m, gen.dynamicity) + acc_n = static_size(shape.n, gen.dynamicity) + lhs_tensor_type = f"tensor<{lhs_m}x{lhs_k}x{lhs_rhs_type.value}>" + rhs_tensor_type = f"tensor<{rhs_k}x{rhs_n}x{lhs_rhs_type.value}>" + acc_tensor_type = f"tensor<{acc_m}x{acc_n}x{acc_type.value}>" return ( f"func @{func_name}(%lhs: {lhs_tensor_type}, %rhs: {rhs_tensor_type}, %acc: {acc_tensor_type}) -> {acc_tensor_type} {{\n" f" %result = linalg.matmul ins(%lhs, %rhs: {lhs_tensor_type}, {rhs_tensor_type}) outs(%acc: {acc_tensor_type}) -> {acc_tensor_type}\n" @@ -170,12 +261,12 @@ def generate_function(func_name, lhs_rhs_type, accum_type, shape, gen): # Generates a contents_generator tag to use in the output trace. -def contents_generator_tag(generator): - if generator == "zero": +def contents_generator_tag(generator: MatrixGenerator): + if generator == MatrixGenerator.ZERO: return "" - elif generator == "identity": + elif generator == MatrixGenerator.IDENTITY: return "!tag:iree:identity_matrix" - elif generator == "random": + elif generator == MatrixGenerator.RANDOM: global pseudorandom_generator_seed pseudorandom_generator_seed = pseudorandom_generator_seed + 1 return f"!tag:iree:fully_specified_pseudorandom {pseudorandom_generator_seed}" @@ -185,11 +276,13 @@ def contents_generator_tag(generator): # Generate a matrix function argument in the output trace, as a dictionary # to be passed to yaml.dump. -def generate_trace_matrix_arg(matrix_shape, element_type, generator): +def generate_trace_matrix_arg(matrix_shape: list, + element_type: MatrixElemTypeId, + generator: MatrixGenerator): result = { "type": "hal.buffer_view", "shape": matrix_shape, - "element_type": element_type, + "element_type": element_type.value, } generator_tag = contents_generator_tag(generator) if generator_tag: @@ -199,12 +292,14 @@ def generate_trace_matrix_arg(matrix_shape, element_type, generator): # Generates the output trace for a testcase i.e. a single test function call, # as a dictionary to be passed to yaml.dump. -def generate_trace(func_name, lhs_rhs_type, acc_type, shape, gen): - (m, k, n) = shape - lhs_arg = generate_trace_matrix_arg([m, k], lhs_rhs_type, gen[0]) - rhs_arg = generate_trace_matrix_arg([k, n], lhs_rhs_type, gen[1]) - acc_arg = generate_trace_matrix_arg([m, n], acc_type, gen[2]) - result_arg = generate_trace_matrix_arg([m, n], acc_type, "zero") +def generate_trace(func_name: str, lhs_rhs_type: MatrixElemTypeId, + acc_type: MatrixElemTypeId, shape: TestShape, + gen: TestGenerator): + lhs_arg = generate_trace_matrix_arg([shape.m, shape.k], lhs_rhs_type, gen.lhs) + rhs_arg = generate_trace_matrix_arg([shape.k, shape.n], lhs_rhs_type, gen.rhs) + acc_arg = generate_trace_matrix_arg([shape.m, shape.n], acc_type, gen.acc) + result_arg = generate_trace_matrix_arg([shape.m, shape.n], acc_type, + MatrixGenerator.ZERO) return { "type": "call", "function": "module." + func_name, @@ -218,14 +313,13 @@ def generate_trace(func_name, lhs_rhs_type, acc_type, shape, gen): # Generates all output files' contents as strings. -def generate(args): +def generate(lhs_rhs_type: MatrixElemTypeId, acc_type: MatrixElemTypeId, + shapes_id: ShapesId): functions = {} traces = [] - lhs_rhs_type = args.lhs_rhs_type - accum_type = 'i32' if lhs_rhs_type == 'i8' else lhs_rhs_type - for shape in get_test_shapes()[args.shapes]: - for gen in get_test_generators()[args.shapes]: - func_name = function_name(lhs_rhs_type, accum_type, shape, gen) + for shape in get_test_shapes(shapes_id): + for gen in get_test_generators(shapes_id): + func_name = function_name(lhs_rhs_type, acc_type, shape, gen) # Different testcases may differ only by runtime parameters but # share the same code. For example, dynamic-shapes testcases # share the same code involing tensor even though the runtime @@ -233,9 +327,9 @@ def generate(args): # generate_function conditionally, and generate_trace unconditionally. if func_name not in functions: functions[func_name] = generate_function(func_name, lhs_rhs_type, - accum_type, shape, gen) + acc_type, shape, gen) traces.append( - generate_trace(func_name, lhs_rhs_type, accum_type, shape, gen)) + generate_trace(func_name, lhs_rhs_type, acc_type, shape, gen)) return (functions, traces) @@ -256,7 +350,7 @@ def parse_arguments(): required=True) parser.add_argument("--shapes", type=str, - choices=["small", "large"], + choices=[s.value for s in ShapesId], help="Collection of matrix shapes to test", required=True) parser.add_argument( @@ -308,8 +402,22 @@ def write_trace_file(traces, filename, module_path): file.write(processed_yaml) +# For now, the accumulator type can always be inferred from the input LHS/RHS +# type, so we do that. That is temporary: eventually there will be cases +# where the same input types are used with different accumulator types, e.g. +# f16 inputs with both f16 and f32 accumulator. +def infer_acc_type(lhs_rhs_type: MatrixElemTypeId): + if lhs_rhs_type == MatrixElemTypeId.I8: + return MatrixElemTypeId.I32 + else: + return lhs_rhs_type + + def main(args): - (functions, traces) = generate(args) + lhs_rhs_type = MatrixElemTypeId(args.lhs_rhs_type) + acc_type = infer_acc_type(lhs_rhs_type) + shapes_id = ShapesId(args.shapes) + (functions, traces) = generate(lhs_rhs_type, acc_type, shapes_id) write_code_file(functions, args.output_code) write_trace_file(traces, args.output_trace, args.module_path) From e1d4d53cc5c8d60f0a0758263147c308b92b1f7e Mon Sep 17 00:00:00 2001 From: powderluv Date: Tue, 26 Oct 2021 13:30:29 -0700 Subject: [PATCH 07/22] Fix unused var in stricter compilers (#7461) TEST=builds --- iree/compiler/Utils/FlatbufferUtils.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/iree/compiler/Utils/FlatbufferUtils.cpp b/iree/compiler/Utils/FlatbufferUtils.cpp index febbde9bdda3..3a00885c0b13 100644 --- a/iree/compiler/Utils/FlatbufferUtils.cpp +++ b/iree/compiler/Utils/FlatbufferUtils.cpp @@ -32,6 +32,7 @@ static SmallVector cloneBufferIntoContiguousBytes( void *result = flatcc_builder_copy_buffer(fbb, packedData.data(), packedData.size()); assert(result && "flatcc_emitter_t impl failed (non-default?)"); + (void)result; return packedData; } From 84b5afca2ce4d678c3602e552dba2be4c6cdf6bb Mon Sep 17 00:00:00 2001 From: powderluv Date: Tue, 26 Oct 2021 13:30:59 -0700 Subject: [PATCH 08/22] Fix Bytecode disasm to export type_def (#7460) TEST=builds --- iree/vm/bytecode_disasm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/iree/vm/bytecode_disasm.c b/iree/vm/bytecode_disasm.c index 1bdff07a906c..2dbeed0fb134 100644 --- a/iree/vm/bytecode_disasm.c +++ b/iree/vm/bytecode_disasm.c @@ -1001,6 +1001,7 @@ iree_status_t iree_vm_bytecode_disasm_op( IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, ", ")); EMIT_I32_REG_NAME(index_reg); EMIT_OPTIONAL_VALUE_I32(regs->i32[index_reg]); + EMIT_TYPE_NAME(type_def); break; } @@ -1068,6 +1069,7 @@ iree_status_t iree_vm_bytecode_disasm_op( IREE_RETURN_IF_ERROR(iree_string_builder_append_cstring(b, " : ")); EMIT_REF_REG_NAME(false_value_reg); EMIT_OPTIONAL_VALUE_REF(®s->ref[false_value_reg]); + EMIT_TYPE_NAME(type_def); break; } From c4e0f3b01a874e1bb0b3d3b71d54d5b70f3129e6 Mon Sep 17 00:00:00 2001 From: CindyLiu Date: Tue, 26 Oct 2021 20:44:19 +0000 Subject: [PATCH 09/22] Revert "Don't outline splat constants. (#6816)" (#7462) This reverts commit cc4f1734db7aab52873b3a8c1fbd5945419813d9. Reason to revert: the change creates unaligned buffer fill Change-Id: Iff9ba0d9f4b4993f8388ac9af8a2ba6aba577d77 --- .../Dialect/Flow/IR/FlowOpFolders.cpp | 9 +++++++ iree/compiler/Dialect/Flow/IR/FlowOps.td | 1 + .../Dialect/Flow/IR/test/tensor_folding.mlir | 22 +++++++++++++++++ .../Transforms/ConvertLinalgTensorOps.cpp | 24 ++----------------- .../Flow/Transforms/OutlineLargeConstants.cpp | 19 ++++++++------- .../compiler/Dialect/Flow/Transforms/Passes.h | 8 +++++-- .../Dialect/Flow/Transforms/Passes.td | 8 ++----- .../test/outline_large_constants.mlir | 12 ++++------ .../Conversion/FlowToHAL/ConvertStreamOps.cpp | 3 --- 9 files changed, 57 insertions(+), 49 deletions(-) diff --git a/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp b/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp index ef12029b718f..c009f37ab99f 100644 --- a/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp +++ b/iree/compiler/Dialect/Flow/IR/FlowOpFolders.cpp @@ -769,6 +769,15 @@ void TensorSplatOp::getCanonicalizationPatterns( results.insert(context); } +OpFoldResult TensorSplatOp::fold(ArrayRef operands) { + if (operands.size() == 1 && operands.front()) { + // Splat value is constant and we can fold the operation. + return SplatElementsAttr::get(result().getType().cast(), + operands[0]); + } + return {}; +} + OpFoldResult TensorCloneOp::fold(ArrayRef operands) { if (operands[0]) { // Constants always fold. diff --git a/iree/compiler/Dialect/Flow/IR/FlowOps.td b/iree/compiler/Dialect/Flow/IR/FlowOps.td index 102d86190eab..be3caede69f2 100644 --- a/iree/compiler/Dialect/Flow/IR/FlowOps.td +++ b/iree/compiler/Dialect/Flow/IR/FlowOps.td @@ -839,6 +839,7 @@ def FLOW_TensorSplatOp : FLOW_PureOp<"tensor.splat", [ }]; let hasCanonicalizer = 1; + let hasFolder = 1; } def FLOW_TensorCloneOp : FLOW_PureOp<"tensor.clone", [ diff --git a/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir b/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir index bd843fb1a2b7..792b88dfe57c 100644 --- a/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir +++ b/iree/compiler/Dialect/Flow/IR/test/tensor_folding.mlir @@ -154,6 +154,28 @@ func @storeConstScalar() -> tensor { // ----- +// CHECK-LABEL: @splatConst +func @splatConst() -> tensor<4xi32> { + %0 = arith.constant 4 : i32 + // CHECK-NEXT: %[[C:.+]] = arith.constant dense<4> : tensor<4xi32> + %1 = flow.tensor.splat %0 : tensor<4xi32> + // CHECK-NEXT: return %[[C]] + return %1 : tensor<4xi32> +} + +// ----- + +// CHECK-LABEL: @splatConstScalar +func @splatConstScalar() -> tensor { + %0 = arith.constant 4 : i32 + // CHECK-NEXT: %[[C:.+]] = arith.constant dense<4> : tensor + %1 = flow.tensor.splat %0 : tensor + // CHECK-NEXT: return %[[C]] + return %1 : tensor +} + +// ----- + // CHECK-LABEL: @splatDynamicShape // CHECK-SAME: (%[[DIM0:.+]]: index, %[[DIM1:.+]]: index) func @splatDynamicShape(%dim0: index, %dim1: index) -> tensor { diff --git a/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp b/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp index 992878b07445..869143aca268 100644 --- a/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp +++ b/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgTensorOps.cpp @@ -81,6 +81,7 @@ struct LinalgFillToFlowTensorSplat final // Don't convert linalg.fill ops that were fused together with other ops. return failure(); } + SmallVector dynamicDims = getDynamicDimValues(rewriter, fillOp.getLoc(), fillOp.output()); rewriter.replaceOpWithNewOp( @@ -89,26 +90,6 @@ struct LinalgFillToFlowTensorSplat final } }; -struct ConvertSplatConstantOp : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - LogicalResult matchAndRewrite(mlir::ConstantOp op, - PatternRewriter &rewriter) const override { - if (op->getParentOfType()) { - return rewriter.notifyMatchFailure(op, "ignoring dispatch ops"); - } - auto splatAttr = op.getValue().dyn_cast(); - if (!splatAttr) { - return rewriter.notifyMatchFailure(op, "only looking for splats"); - } - auto tensorType = op.getType().cast(); - auto elementValue = rewriter.createOrFold( - op.getLoc(), tensorType.getElementType(), splatAttr.getSplatValue()); - rewriter.replaceOpWithNewOp( - op, tensorType, elementValue, ValueRange{}); - return success(); - } -}; - /// Converts linalg operations that can map to flow.tensor.* operations. struct ConvertLinalgTensorOpsPass : public ConvertLinalgTensorOpsBase { @@ -135,8 +116,7 @@ struct ConvertLinalgTensorOpsPass LinalgTensorReshapeToFlowTensorReshape>( context); } else { - patterns.insert( - context); + patterns.insert(context); } IREE::Flow::TensorReshapeOp::getCanonicalizationPatterns(patterns, context); if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns)))) { diff --git a/iree/compiler/Dialect/Flow/Transforms/OutlineLargeConstants.cpp b/iree/compiler/Dialect/Flow/Transforms/OutlineLargeConstants.cpp index 24de6786fcd1..27c3e80b565c 100644 --- a/iree/compiler/Dialect/Flow/Transforms/OutlineLargeConstants.cpp +++ b/iree/compiler/Dialect/Flow/Transforms/OutlineLargeConstants.cpp @@ -29,15 +29,11 @@ namespace Flow { // more efficient and fewer bindings. static bool isConstantLarge(arith::ConstantOp constantOp, size_t minLargeConstantSize) { - if (constantOp.value().isa()) { - // Never outline splats; we want those transient within streams. - return false; - } auto type = constantOp.getType(); if (auto shapedType = type.dyn_cast()) { size_t unpackedByteLength = (shapedType.getNumElements() * shapedType.getElementTypeBitWidth()) / 8; - if (unpackedByteLength > minLargeConstantSize) { + if (unpackedByteLength >= minLargeConstantSize) { return true; } } @@ -67,6 +63,8 @@ class OutlineLargeConstantsPass : public OutlineLargeConstantsBase { public: OutlineLargeConstantsPass() = default; + OutlineLargeConstantsPass(size_t minLargeConstantSize) + : minLargeConstantSize(minLargeConstantSize){}; void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); @@ -86,7 +84,7 @@ class OutlineLargeConstantsPass std::vector> replacements; for (auto &largeConstantOp : - findLargeConstantsInModule(moduleOp, minStorageSize.getValue())) { + findLargeConstantsInModule(moduleOp, minLargeConstantSize)) { std::string name; do { name = baseName + std::to_string(uniqueId++); @@ -116,11 +114,14 @@ class OutlineLargeConstantsPass constantOp.erase(); } } + + private: + size_t minLargeConstantSize; }; -std::unique_ptr> -createOutlineLargeConstantsPass() { - return std::make_unique(); +std::unique_ptr> createOutlineLargeConstantsPass( + size_t minLargeConstantSize) { + return std::make_unique(minLargeConstantSize); } } // namespace Flow diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.h b/iree/compiler/Dialect/Flow/Transforms/Passes.h index dafb8d635262..123cfb05956e 100644 --- a/iree/compiler/Dialect/Flow/Transforms/Passes.h +++ b/iree/compiler/Dialect/Flow/Transforms/Passes.h @@ -131,8 +131,12 @@ createPadLinalgOpsToIntegerMultiplePass(int paddingSize = 4); //===----------------------------------------------------------------------===// // Outlines large tensor constants into util.globals at the module level. -std::unique_ptr> -createOutlineLargeConstantsPass(); +// +// TODO(#5493): implement the support for inlining constants into the command +// buffer and raise this value to one that is measured to be good. +static constexpr size_t kMinLargeConstantSize = 1; +std::unique_ptr> createOutlineLargeConstantsPass( + size_t minLargeConstantSize = kMinLargeConstantSize); // Deduplicates equivalent executables. std::unique_ptr> diff --git a/iree/compiler/Dialect/Flow/Transforms/Passes.td b/iree/compiler/Dialect/Flow/Transforms/Passes.td index 4fee8c0ab20d..64941b2d5093 100644 --- a/iree/compiler/Dialect/Flow/Transforms/Passes.td +++ b/iree/compiler/Dialect/Flow/Transforms/Passes.td @@ -102,12 +102,8 @@ def OutlineDispatchRegions : def OutlineLargeConstants : Pass<"iree-flow-outline-large-constants", "mlir::ModuleOp"> { let summary = "Outlines large tensor constants into util.globals at the module level."; - let constructor = "mlir::iree_compiler::IREE::Flow::createOutlineLargeConstantsPass()"; - let options = [ - Option<"minStorageSize", "min-storage-size", - "int64_t", /*default=*/"64", - "Outlines constants with storage sizes > than this byte size."> - ]; + // TODO(#5493): add a flag for this. + let constructor = "mlir::iree_compiler::IREE::Flow::createOutlineLargeConstantsPass(25)"; } def PadLinalgOps : diff --git a/iree/compiler/Dialect/Flow/Transforms/test/outline_large_constants.mlir b/iree/compiler/Dialect/Flow/Transforms/test/outline_large_constants.mlir index 739411928a2c..cdc299f036fd 100644 --- a/iree/compiler/Dialect/Flow/Transforms/test/outline_large_constants.mlir +++ b/iree/compiler/Dialect/Flow/Transforms/test/outline_large_constants.mlir @@ -1,12 +1,10 @@ -// RUN: iree-opt -split-input-file -iree-flow-outline-large-constants='min-storage-size=9' %s | IreeFileCheck %s +// RUN: iree-opt -split-input-file -iree-flow-outline-large-constants %s | IreeFileCheck %s -// CHECK: util.global private @[[LARGE_VARIABLE:.+]] {noinline} = dense<{{.+}}> : tensor<8xf32> -func @fn1() -> (tensor<2xf32>, tensor<512x128xf32>, tensor<8xf32>) { +// CHECK: util.global private @[[LARGE_VARIABLE:.+]] {noinline} = dense<1.200000e+00> : tensor<512x128xf32> +func @fn1() -> (tensor<2xf32>, tensor<512x128xf32>) { // CHECK-DAG: %[[SMALL_VALUE:.+]] = arith.constant dense<{{.+}}> : tensor<2xf32> %cst_0 = arith.constant dense<[0.0287729427, 0.0297581609]> : tensor<2xf32> - // CHECK-DAG: %[[SPLATG_VALUE:.+]] = arith.constant dense<{{.+}}> : tensor<512x128xf32> + // CHECK-DAG: %[[LARGE_VALUE:.+]] = util.global.load @[[LARGE_VARIABLE]] : tensor<512x128xf32> %cst_1 = arith.constant dense<1.2> : tensor<512x128xf32> - // CHECK-DAG: %[[LARGE_VALUE:.+]] = util.global.load @[[LARGE_VARIABLE]] : tensor<8xf32> - %cst_2 = arith.constant dense<[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]> : tensor<8xf32> - return %cst_0, %cst_1, %cst_2 : tensor<2xf32>, tensor<512x128xf32>, tensor<8xf32> + return %cst_0, %cst_1 : tensor<2xf32>, tensor<512x128xf32> } diff --git a/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertStreamOps.cpp b/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertStreamOps.cpp index 8fe6df3203de..19a03980141f 100644 --- a/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertStreamOps.cpp +++ b/iree/compiler/Dialect/HAL/Conversion/FlowToHAL/ConvertStreamOps.cpp @@ -949,9 +949,6 @@ static Value splatFillPattern(Location loc, Value baseValue, baseValue = builder.createOrFold( loc, builder.getIntegerType(baseBitWidth), baseValue); - // Treat i1 as i8. - if (baseBitWidth == 1) baseBitWidth = 8; - switch (baseBitWidth) { case 8: { // (v << 24) | (v << 16) | (v << 8) | v From 7d53fb144f97af5db53e47a8119a904cd5135b4b Mon Sep 17 00:00:00 2001 From: MaheshRavishankar <1663364+MaheshRavishankar@users.noreply.github.com> Date: Tue, 26 Oct 2021 14:19:12 -0700 Subject: [PATCH 10/22] Remove direct distribution pass pipeline. (#7457) Since flow dispatch region formation now creates dispatches through tile + fuse + distribute, the direct distribution path is no more used. Remove this code. --- .../Codegen/Dialect/LoweringConfig.td | 6 +- iree/compiler/Codegen/Passes.h | 7 - iree/compiler/Codegen/Passes.td | 9 - iree/compiler/Codegen/SPIRV/BUILD | 1 - iree/compiler/Codegen/SPIRV/CMakeLists.txt | 1 - iree/compiler/Codegen/SPIRV/KernelConfig.cpp | 47 ---- iree/compiler/Codegen/SPIRV/Passes.cpp | 4 - .../SPIRV/SPIRVDistributeToGlobalID.cpp | 225 ----------------- .../SPIRV/SPIRVLowerExecutableTargetPass.cpp | 4 - iree/compiler/Codegen/SPIRV/test/BUILD | 1 - .../Codegen/SPIRV/test/CMakeLists.txt | 1 - .../SPIRV/test/distribute_to_global_id.mlir | 236 ------------------ 12 files changed, 2 insertions(+), 540 deletions(-) delete mode 100644 iree/compiler/Codegen/SPIRV/SPIRVDistributeToGlobalID.cpp delete mode 100644 iree/compiler/Codegen/SPIRV/test/distribute_to_global_id.mlir diff --git a/iree/compiler/Codegen/Dialect/LoweringConfig.td b/iree/compiler/Codegen/Dialect/LoweringConfig.td index 13489da1e152..2e7b5470de7e 100644 --- a/iree/compiler/Codegen/Dialect/LoweringConfig.td +++ b/iree/compiler/Codegen/Dialect/LoweringConfig.td @@ -28,8 +28,6 @@ def SPIRV_SimpleDistribute : StrEnumAttrCase<"SPIRVDistribute">; def SPIRV_Vectorize : StrEnumAttrCase<"SPIRVVectorize">; -def SPIRV_DistributeToGlobalID - : StrEnumAttrCase<"SPIRVDistributeToGlobalID">; def SPIRV_VectorizeToCooperativeOps : StrEnumAttrCase<"SPIRVVectorizeToCooperativeOps">; @@ -43,8 +41,8 @@ def DispatchLoweringPassPipelineEnum : StrEnumAttr< "identifier for pass pipeline use to lower dispatch region", [CPU_Default, CPU_TensorToVectors, CPU_Vectorization, LLVMGPU_SimpleDistribute, LLVMGPU_Vectorize, LLVMGPU_MatmulSimt, - SPIRV_SimpleDistribute, SPIRV_DistributeToGlobalID, - SPIRV_Vectorize, SPIRV_VectorizeToCooperativeOps, None]> { + SPIRV_SimpleDistribute, SPIRV_Vectorize, + SPIRV_VectorizeToCooperativeOps, None]> { let cppNamespace = "::mlir::iree_compiler::IREE::Codegen"; } diff --git a/iree/compiler/Codegen/Passes.h b/iree/compiler/Codegen/Passes.h index 84b4fb5677cc..d550517c3a32 100644 --- a/iree/compiler/Codegen/Passes.h +++ b/iree/compiler/Codegen/Passes.h @@ -234,10 +234,6 @@ std::unique_ptr> createLLVMGPUPipeliningPass(); /// distribution to threads without vectorization. void addSPIRVTileAndDistributePassPipeline(OpPassManager &pm); -/// Pass pipeline to lower IREE HAL executables that contain Linalg ops that are -/// not tiled/distributed. Performs distribution to global invocations. -void addSPIRVDistributeToGlobalIDPassPipeline(OpPassManager &pm); - /// Pass pipeline to lower IREE HAL executables with workgroup tiled and /// distributed Linalg ops to SPIR-V scalar and vector code. Additionally /// performs distribution to threads with vectorization. @@ -255,9 +251,6 @@ void addSPIRVTileAndVectorizeToCooperativeOpsPassPipeline(OpPassManager &pm); /// corresponding SPIR-V ops. std::unique_ptr> createConvertToSPIRVPass(); -/// Pass to distribute Linalg ops with buffer semantics to global invocations. -std::unique_ptr> createSPIRVDistributeToGlobalIDPass(); - /// Creates a pass to fold processor ID uses where possible. std::unique_ptr> createSPIRVFoldProcessorIDUsesPass(); diff --git a/iree/compiler/Codegen/Passes.td b/iree/compiler/Codegen/Passes.td index eba35a657f71..83e376fb5898 100644 --- a/iree/compiler/Codegen/Passes.td +++ b/iree/compiler/Codegen/Passes.td @@ -206,15 +206,6 @@ def ConvertToSPIRV : Pass<"iree-convert-to-spirv", "ModuleOp"> { let constructor = "mlir::iree_compiler::createConvertToSPIRVPass()"; } -// TODO: Rename argument to be fully qualified. -def SPIRVDistributeToGlobalID : - Pass<"iree-spirv-distribute-to-global-id", "FuncOp"> { - let summary = "Distribute Linalg ops with buffer semantics to global " - "invocations"; - let constructor = - "mlir::iree_compiler::createSPIRVDistributeToGlobalIDPass()"; -} - // TODO: Rename argument to be fully qualified. def SPIRVFoldProcessorIDUses : Pass<"iree-spirv-fold-gpu-procid-uses", "FuncOp"> { diff --git a/iree/compiler/Codegen/SPIRV/BUILD b/iree/compiler/Codegen/SPIRV/BUILD index bc50e24df4b4..c4dea5d497e2 100644 --- a/iree/compiler/Codegen/SPIRV/BUILD +++ b/iree/compiler/Codegen/SPIRV/BUILD @@ -20,7 +20,6 @@ cc_library( "NVIDIAConfig.cpp", "Passes.cpp", "SPIRVCopyToWorkgroupMemory.cpp", - "SPIRVDistributeToGlobalID.cpp", "SPIRVFoldGPUProcessorIDUses.cpp", "SPIRVLowerExecutableTargetPass.cpp", "SPIRVRemoveOneTripTiledLoops.cpp", diff --git a/iree/compiler/Codegen/SPIRV/CMakeLists.txt b/iree/compiler/Codegen/SPIRV/CMakeLists.txt index 9929b6367b8b..0c0bd96a15a3 100644 --- a/iree/compiler/Codegen/SPIRV/CMakeLists.txt +++ b/iree/compiler/Codegen/SPIRV/CMakeLists.txt @@ -25,7 +25,6 @@ iree_cc_library( "NVIDIAConfig.cpp" "Passes.cpp" "SPIRVCopyToWorkgroupMemory.cpp" - "SPIRVDistributeToGlobalID.cpp" "SPIRVFoldGPUProcessorIDUses.cpp" "SPIRVLowerExecutableTargetPass.cpp" "SPIRVRemoveOneTripTiledLoops.cpp" diff --git a/iree/compiler/Codegen/SPIRV/KernelConfig.cpp b/iree/compiler/Codegen/SPIRV/KernelConfig.cpp index e4439d601c29..d2bb4e14d20a 100644 --- a/iree/compiler/Codegen/SPIRV/KernelConfig.cpp +++ b/iree/compiler/Codegen/SPIRV/KernelConfig.cpp @@ -25,36 +25,6 @@ namespace mlir { namespace iree_compiler { -//===----------------------------------------------------------------------===// -// Utilities -//===----------------------------------------------------------------------===// - -/// Defines the workgroup count region on entry point ops for the -/// `SPIRVDistributeToGlobalID` pipeline. -// TODO(ravishankarm): Remove this when that pipeline is deprecated. -static LogicalResult setTranslationUsingDistributeToGlobalId( - FuncOp funcOp, ArrayRef workgroupSize) { - setTranslationInfo( - funcOp, - IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistributeToGlobalID, - /*workloadPerWorkgroup=*/{}, workgroupSize); - MLIRContext *context = funcOp.getContext(); - OpBuilder builder(context); - int64_t workgroupSizeX = workgroupSize[0]; - auto numWorkgroupsFn = [workgroupSizeX](OpBuilder &b, Location loc, - std::array workload) { - AffineExpr e1, e2, e3; - bindSymbols(b.getContext(), e1, e2, e3); - AffineExpr expr = e1 * e2 * e3; - expr = expr.ceilDiv(workgroupSizeX); - Value numWorkgroupsX = linalg::applyMapToValues( - b, loc, AffineMap::get(0, 3, expr), workload)[0]; - Value one = b.create(loc, 1); - return std::array{numWorkgroupsX, one, one}; - }; - return defineWorkgroupCountRegion(builder, funcOp, numWorkgroupsFn); -} - //===----------------------------------------------------------------------===// // Convolution Default Configuration //===----------------------------------------------------------------------===// @@ -533,24 +503,7 @@ LogicalResult initSPIRVLaunchConfig(ModuleOp module) { return funcOp.emitOpError("failed to get compute ops"); } - int64_t subgroupSize = limits.subgroup_size().getValue().getSExtValue(); - - // If the dispatch region does not contain tiled and distributed Linalg ops, - // invoke the pipeline to distribute to global invocations. - if (tiledLoops.empty() && llvm::none_of(computeOps, [](Operation *op) { - return hasMarker(op, getWorkgroupMarker()); - })) { - std::array workgroupSize = {subgroupSize, 1, 1}; - if (failed( - setTranslationUsingDistributeToGlobalId(funcOp, workgroupSize))) { - return computeOps[0]->emitOpError( - "failed to set translation info for distributing to global IDs"); - } - continue; - } - Operation *rootOperation = nullptr; - // Try to find a configuration according to a matmul/convolution op and use // it as the root op. for (Operation *computeOp : computeOps) { diff --git a/iree/compiler/Codegen/SPIRV/Passes.cpp b/iree/compiler/Codegen/SPIRV/Passes.cpp index 02964f7e3d76..bf98d6796eaa 100644 --- a/iree/compiler/Codegen/SPIRV/Passes.cpp +++ b/iree/compiler/Codegen/SPIRV/Passes.cpp @@ -150,10 +150,6 @@ void addSPIRVTileAndDistributePassPipeline(OpPassManager &pm) { addLoopMaterializationPasses(pm); } -void addSPIRVDistributeToGlobalIDPassPipeline(OpPassManager &pm) { - pm.addNestedPass(createSPIRVDistributeToGlobalIDPass()); -} - void buildSPIRVCodegenPassPipeline(OpPassManager &pm) { addLinalgBufferizePasses(pm.nest(), gpuAllocationFunction); pm.addPass(createSPIRVLowerExecutableTargetPass()); diff --git a/iree/compiler/Codegen/SPIRV/SPIRVDistributeToGlobalID.cpp b/iree/compiler/Codegen/SPIRV/SPIRVDistributeToGlobalID.cpp deleted file mode 100644 index b8df3b46d159..000000000000 --- a/iree/compiler/Codegen/SPIRV/SPIRVDistributeToGlobalID.cpp +++ /dev/null @@ -1,225 +0,0 @@ -// Copyright 2020 The IREE Authors -// -// Licensed under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -//===- SPIRVDistributeToGlobalIDPass.cpp ----------------------------------===// -// -// This pass distributes Linalg ops with buffer semantics to global invocations. -// -//===----------------------------------------------------------------------===// - -#include -#include - -#include "iree/compiler/Codegen/PassDetail.h" -#include "iree/compiler/Codegen/Passes.h" -#include "iree/compiler/Codegen/SPIRV/Utils.h" -#include "iree/compiler/Codegen/Transforms/Transforms.h" -#include "iree/compiler/Codegen/Utils/MarkerUtils.h" -#include "iree/compiler/Codegen/Utils/Utils.h" -#include "iree/compiler/Dialect/HAL/IR/HALOps.h" -#include "iree/compiler/Dialect/Shape/IR/ShapeDialect.h" -#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" -#include "mlir/Dialect/Affine/IR/AffineOps.h" -#include "mlir/Dialect/GPU/GPUDialect.h" -#include "mlir/Dialect/Linalg/IR/LinalgOps.h" -#include "mlir/Dialect/Linalg/Transforms/Transforms.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" -#include "mlir/Dialect/SCF/SCF.h" -#include "mlir/Dialect/SPIRV/IR/TargetAndABI.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/IR/AffineMap.h" -#include "mlir/IR/FunctionSupport.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/Support/LLVM.h" -#include "mlir/Transforms/DialectConversion.h" -#include "mlir/Transforms/LoopUtils.h" - -namespace mlir { -namespace iree_compiler { - -//===----------------------------------------------------------------------===// -// Loop utilities -//===----------------------------------------------------------------------===// - -/// Serializes the dimensions of the scf.parallel specified in -/// `serializedDimensions`, by creating an nested scf.for operation for each -/// dimension. -// TODO(ravishankarm): Move this into LoopUtils.h in MLIR. -static Operation *serializeDimensions(ConversionPatternRewriter &rewriter, - scf::ParallelOp pLoopOp, - ArrayRef serializedDimensions) { - assert(!serializedDimensions.empty() && - "unhandled corner case of no serializing dims"); - OpBuilder::InsertionGuard guard(rewriter); - DenseSet serializedDimSet; - serializedDimSet.insert(serializedDimensions.begin(), - serializedDimensions.end()); - assert(serializedDimSet.size() == serializedDimensions.size() && - "cannot repeat dimensions during serialization of scf.parallel"); - SmallVector newPLoopBounds, forBounds; - SmallVector permutation; - auto lbs = pLoopOp.lowerBound(); - auto ubs = pLoopOp.upperBound(); - auto steps = pLoopOp.step(); - for (unsigned i : llvm::seq(0, pLoopOp.getNumLoops())) { - if (serializedDimSet.count(i)) { - forBounds.push_back({lbs[i], ubs[i], steps[i]}); - } else { - newPLoopBounds.push_back({lbs[i], ubs[i], steps[i]}); - permutation.push_back(i); - } - } - permutation.append(serializedDimensions.begin(), serializedDimensions.end()); - return replacePLoopOp(rewriter, pLoopOp, newPLoopBounds, forBounds, - permutation); -} - -/// Serialize all inner dimensions of a `pLoopOp` starting from `serializeFrom`. -static Operation *serializeDimensionsFrom(ConversionPatternRewriter &rewriter, - scf::ParallelOp pLoopOp, - unsigned serializeFrom) { - unsigned numLoops = pLoopOp.getNumLoops(); - assert(serializeFrom < numLoops && - "unhandled corner case of no serialization"); - SmallVector serializedDimensions; - for (unsigned dim : llvm::seq(serializeFrom, numLoops)) - serializedDimensions.push_back(dim); - return serializeDimensions(rewriter, pLoopOp, serializedDimensions); -} - -//===----------------------------------------------------------------------===// -// GPU processor ID mapping utilities -//===----------------------------------------------------------------------===// - -/// Distributes scf.parallel to processors where `IdOp` is used to get the -/// processor ID and `DimOp` is used to get the number of processors along a -/// dimension. Assumes that the number of processors will be less than equal to -/// the number of iterations of the pLoopOp along all dimensions. -template -static LogicalResult distributeSingleIterationPerProcessor( - ConversionPatternRewriter &rewriter, scf::ParallelOp pLoopOp, - bool generateGuard = true) { - unsigned numLoops = pLoopOp.getNumLoops(); - if (numLoops > 3) { - pLoopOp = - cast(serializeDimensionsFrom(rewriter, pLoopOp, 3)); - numLoops = 3; - } - auto procInfo = getGPUProcessorIdsAndCounts( - rewriter, pLoopOp.getLoc(), numLoops); - return distributeSingleIterationPerProcessor(rewriter, pLoopOp, procInfo, - generateGuard); -} - -//===----------------------------------------------------------------------===// -// Pass and patterns. -//===----------------------------------------------------------------------===// - -namespace { -/// Pass to convert from tiled and fused linalg ops into gpu.func. -struct SPIRVDistributeToGlobalIDPass - : public SPIRVDistributeToGlobalIDBase { - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } - void runOnOperation() override; -}; - -/// Given the workload return the workgroup count along X obtained by -/// linearizing the workload and dividing by the workgroup size. -static Value getWorkgroupCountX(OpBuilder &builder, Location loc, - ArrayRef values, - int64_t workgroupSizeX) { - AffineExpr expr = builder.getAffineConstantExpr(1); - for (auto val : enumerate(values)) { - expr = expr * builder.getAffineSymbolExpr(val.index()); - } - expr = expr.ceilDiv(workgroupSizeX); - return linalg::applyMapToValues( - builder, loc, AffineMap::get(0, values.size(), expr), values)[0]; -} - -/// Map linalg operation to execute on GPU in parallel by mapping the parallel -/// loops to "GlobalInvocationId". -template -struct MapLinalgOpToGlobalInvocationId - : public OpConversionPattern { - MapLinalgOpToGlobalInvocationId(MLIRContext *context, - PatternBenefit benefit = 1) - : OpConversionPattern(context, benefit) {} - - LogicalResult matchAndRewrite( - LinalgOpTy linalgOp, ArrayRef operands, - ConversionPatternRewriter &rewriter) const override { - // If marker exists do nothing. - if (hasMarker(linalgOp)) return failure(); - FuncOp funcOp = linalgOp->template getParentOfType(); - if (!funcOp) return failure(); - Optional loops = - linalg::linalgOpToParallelLoops(rewriter, linalgOp); - if (!loops) return failure(); - - if (!loops.getValue().empty()) { - scf::ParallelOp pLoopOp = dyn_cast(loops.getValue()[0]); - // If there are parallel loops partition them to threads using global - // invocation ID. - if (pLoopOp) { - pLoopOp = collapseParallelLoops(rewriter, pLoopOp); - if (!pLoopOp) return failure(); - if (failed(distributeSingleIterationPerProcessor( - rewriter, pLoopOp))) { - return rewriter.notifyMatchFailure( - linalgOp, "mapping to GlobalInvocationID failed"); - } - } - } - rewriter.eraseOp(linalgOp); - return success(); - } -}; - -} // namespace - -void SPIRVDistributeToGlobalIDPass::runOnOperation() { - FuncOp funcOp = getOperation(); - if (!isEntryPoint(funcOp)) return; - - MLIRContext *context = &getContext(); - ConversionTarget target(*context); - // After this pass Linalg and scf.parallel ops should be gone. - target.addIllegalOp(); - target.addIllegalDialect(); - // Reshape ops are treated legal since they just change the way the underlying - // buffer is viewed. These are legalized downstream. They become no ops when - // lowering to SPIR-V since the SPIR-V code uses linearized arrays. - target.addLegalOp(); - // Let the rest fall through. - target.markUnknownOpDynamicallyLegal([](Operation *) { return true; }); - - OwningRewritePatternList patterns(&getContext()); - - patterns.insert, - MapLinalgOpToGlobalInvocationId, - MapLinalgOpToGlobalInvocationId>(context); - FrozenRewritePatternSet frozenPatterns(std::move(patterns)); - - Region &body = funcOp.getBody(); - if (!llvm::hasSingleElement(body)) { - funcOp.emitError("unhandled dispatch function with multiple blocks"); - return signalPassFailure(); - } - if (failed(applyFullConversion(funcOp, target, frozenPatterns))) - return signalPassFailure(); -} - -std::unique_ptr> createSPIRVDistributeToGlobalIDPass() { - return std::make_unique(); -} - -} // namespace iree_compiler -} // namespace mlir diff --git a/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp b/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp index f559ee488bbe..49de3abea2c9 100644 --- a/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp +++ b/iree/compiler/Codegen/SPIRV/SPIRVLowerExecutableTargetPass.cpp @@ -101,10 +101,6 @@ void SPIRVLowerExecutableTargetPass::runOnOperation() { case IREE::Codegen::DispatchLoweringPassPipeline::SPIRVDistribute: addSPIRVTileAndDistributePassPipeline(nestedModulePM); break; - case IREE::Codegen::DispatchLoweringPassPipeline:: - SPIRVDistributeToGlobalID: - addSPIRVDistributeToGlobalIDPassPipeline(nestedModulePM); - break; case IREE::Codegen::DispatchLoweringPassPipeline::SPIRVVectorize: addSPIRVTileAndVectorizePassPipeline(nestedModulePM); break; diff --git a/iree/compiler/Codegen/SPIRV/test/BUILD b/iree/compiler/Codegen/SPIRV/test/BUILD index f3c95cb5c061..c57ce23e6644 100644 --- a/iree/compiler/Codegen/SPIRV/test/BUILD +++ b/iree/compiler/Codegen/SPIRV/test/BUILD @@ -28,7 +28,6 @@ iree_lit_test_suite( "config_mali_matmul.mlir", "config_nvidia_matmul_cooperative_ops.mlir", "convert_to_spirv.mlir", - "distribute_to_global_id.mlir", "fold_gpu_procid_uses.mlir", "pipeline_matmul_cooperative_ops.mlir", "pipeline_matmul_vectorization.mlir", diff --git a/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt b/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt index c3aefd1eb29b..6fbb76274e56 100644 --- a/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt +++ b/iree/compiler/Codegen/SPIRV/test/CMakeLists.txt @@ -23,7 +23,6 @@ iree_lit_test_suite( "config_mali_matmul.mlir" "config_nvidia_matmul_cooperative_ops.mlir" "convert_to_spirv.mlir" - "distribute_to_global_id.mlir" "fold_gpu_procid_uses.mlir" "pipeline_matmul_cooperative_ops.mlir" "pipeline_matmul_vectorization.mlir" diff --git a/iree/compiler/Codegen/SPIRV/test/distribute_to_global_id.mlir b/iree/compiler/Codegen/SPIRV/test/distribute_to_global_id.mlir deleted file mode 100644 index c7d9c882fd62..000000000000 --- a/iree/compiler/Codegen/SPIRV/test/distribute_to_global_id.mlir +++ /dev/null @@ -1,236 +0,0 @@ -// RUN: iree-opt -split-input-file -pass-pipeline='hal.executable(hal.executable.variant(builtin.module(builtin.func(iree-spirv-distribute-to-global-id))))' -canonicalize -cse %s | IreeFileCheck %s - -#map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -hal.executable private @parallel_4D { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> { - hal.executable.entry_point @parallel_4D attributes {interface = @io, ordinal = 0 : index} - builtin.module { - func @parallel_4D() { - %c0 = arith.constant 0 : index - %dim0 = hal.interface.load.constant offset = 0 : index - %dim1 = hal.interface.load.constant offset = 1 : index - %dim2 = hal.interface.load.constant offset = 2 : index - %dim3 = hal.interface.load.constant offset = 3 : index - %arg0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref{%dim0, %dim1, %dim2, %dim3} - %arg1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref{%dim0, %dim1, %dim2, %dim3} - %arg2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref{%dim0, %dim1, %dim2, %dim3} - linalg.generic { - indexing_maps = [#map0, #map0, #map0], - iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) { - ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): - %0 = arith.addf %arg3, %arg4 : f32 - linalg.yield %0 : f32 - } - return - } - func private @parallel_4D__num_workgroups__ - (!shapex.ranked_shape<[?,?,?,?]>, !shapex.ranked_shape<[?,?,?,?]>, - !shapex.ranked_shape<[?,?,?,?]>) -> (index, index, index) - hal.interface private @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} -// CHECK-LABEL: func @parallel_4D -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index -// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index -// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index -// CHECK-DAG: %[[UB0:.+]] = memref.dim %{{.+}}, %[[C0]] -// CHECK-DAG: %[[UB1:.+]] = memref.dim %{{.+}}, %[[C1]] -// CHECK-DAG: %[[UB2:.+]] = memref.dim %{{.+}}, %[[C2]] -// CHECK-DAG: %[[UB3:.+]] = memref.dim %{{.+}}, %[[C3]] -// CHECK: %[[T4:.+]] = arith.muli %[[UB3]], %[[UB2]] -// CHECK: %[[T5:.+]] = arith.muli %[[T4]], %[[UB1]] -// CHECK: %[[UB:.+]] = arith.muli %[[T5]], %[[UB0]] -// CHECK-DAG: %[[BID:.+]] = "gpu.block_id"() {dimension = "x"} -// CHECK-DAG: %[[BDIM:.+]] = "gpu.block_dim"() {dimension = "x"} -// CHECK-DAG: %[[TID:.+]] = "gpu.thread_id"() {dimension = "x"} -// CHECK: %[[BOFFSET:.+]] = arith.muli %[[BID]], %[[BDIM]] -// CHECK: %[[IV:.+]] = arith.addi %[[BOFFSET]], %[[TID]] -// CHECK: %[[COND:.+]] = arith.cmpi slt, %[[IV]], %[[UB]] -// CHECK: scf.if %[[COND]] -// CHECK: %[[IV0:.+]] = arith.divsi %[[IV]], %[[T5]] -// CHECK: %[[T14:.+]] = arith.remsi %[[IV]], %[[T5]] -// CHECK: %[[IV1:.+]] = arith.divsi %[[T14]], %[[T4]] -// CHECK: %[[T16:.+]] = arith.remsi %[[T14]], %[[T4]] -// CHECK: %[[IV2:.+]] = arith.divsi %[[T16]], %[[UB3]] -// CHECK: %[[IV3:.+]] = arith.remsi %[[T16]], %[[UB3]] -// CHECK: load %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] -// CHECK: load %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] -// CHECK: store %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] - -// ----- - -#map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -hal.executable private @parallel_4D_static { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> { - hal.executable.entry_point @parallel_4D_static attributes {interface = @io, ordinal = 0 : index} - builtin.module { - func @parallel_4D_static() { - %c0 = arith.constant 0 : index - %arg0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<3x4x5x6xf32> - %arg1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref<3x4x5x6xf32> - %arg2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<3x4x5x6xf32> - linalg.generic { - indexing_maps = [#map0, #map0, #map0], - iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%arg0, %arg1 : memref<3x4x5x6xf32>, memref<3x4x5x6xf32>) - outs(%arg2 : memref<3x4x5x6xf32>) { - ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): - %0 = arith.addf %arg3, %arg4 : f32 - linalg.yield %0 : f32 - } - return - } - hal.interface private @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} -// CHECK-LABEL: func @parallel_4D_static() -// CHECK-DAG: %[[C360:.+]] = arith.constant 360 : index -// CHECK-DAG: %[[C120:.+]] = arith.constant 120 : index -// CHECK-DAG: %[[C30:.+]] = arith.constant 30 : index -// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index -// CHECK-DAG: %[[BID:.+]] = "gpu.block_id"() {dimension = "x"} -// CHECK-DAG: %[[BDIM:.+]] = "gpu.block_dim"() {dimension = "x"} -// CHECK-DAG: %[[TID:.+]] = "gpu.thread_id"() {dimension = "x"} -// CHECK: %[[BOFFSET:.+]] = arith.muli %[[BID]], %[[BDIM]] -// CHECK: %[[IV:.+]] = arith.addi %[[BOFFSET]], %[[TID]] -// CHECK: %[[COND:.+]] = arith.cmpi slt, %[[IV]], %[[C360]] -// CHECK: scf.if %[[COND]] -// CHECK: %[[IV0:.+]] = arith.divsi %[[IV]], %[[C120]] -// CHECK: %[[T14:.+]] = arith.remsi %[[IV]], %[[C120]] -// CHECK: %[[IV1:.+]] = arith.divsi %[[T14]], %[[C30]] -// CHECK: %[[T16:.+]] = arith.remsi %[[T14]], %[[C30]] -// CHECK: %[[IV2:.+]] = arith.divsi %[[T16]], %[[C6]] -// CHECK: %[[IV3:.+]] = arith.remsi %[[T16]], %[[C6]] -// CHECK: load %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] -// CHECK: load %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] -// CHECK: store %{{.+}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] - -// ----- - -#map0 = affine_map<() -> ()> -#accesses = [#map0, #map0, #map0] -#trait = { - indexing_maps = #accesses, - iterator_types = [] -} - -hal.executable private @scalar_add { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> { - hal.executable.entry_point @scalar_add attributes {interface = @io, ordinal = 0 : index} - builtin.module { - func @scalar_add() attributes {hal.num_workgroups_fn = @scalar_add__num_workgroups__} { - %c0 = arith.constant 0 : index - %arg0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref - %arg1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref - %arg2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref - linalg.generic #trait - ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) { - ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): - %0 = arith.addf %arg3, %arg4 : f32 - linalg.yield %0 : f32 - } - return - } - hal.interface private @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} -// CHECK-LABEL: func @scalar_add() -// CHECK: load -// CHECK-NEXT: load -// CHECK-NEXT: addf -// CHECK-NEXT: store -// CHECK-NEXT: return - -// ----- - -// TODO(GH-4901): Convert these tests back to use dynamic shapes when linalg on tensors becomes default. -hal.executable private @reduce_sum { - hal.interface @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - hal.executable.variant @vulkan, target = #hal.executable.target<"vulkan-spirv", "vulkan-spirv-fb"> { - hal.executable.entry_point @reduce_sum attributes { - interface = @io, - ordinal = 0 : index - } - builtin.module { - func @reduce_sum() { - %c0 = arith.constant 0 : index - %arg0 = hal.interface.binding.subspan @io::@arg0[%c0] : memref<40x50x75xf32> - %arg1 = hal.interface.binding.subspan @io::@arg1[%c0] : memref - %arg2 = hal.interface.binding.subspan @io::@ret0[%c0] : memref<40xf32> - linalg.generic { - indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, - affine_map<(d0, d1, d2) -> ()>, - affine_map<(d0, d1, d2) -> (d0)>], - iterator_types = ["parallel", "reduction", "reduction"]} - ins(%arg0, %arg1 : memref<40x50x75xf32>, memref) - outs(%arg2 : memref<40xf32>) { - ^bb0(%arg6: f32, %arg7: f32, %arg8: f32): // no predecessors - %idx1 = linalg.index 1 : index - %idx2 = linalg.index 2 : index - %zero = arith.constant 0 : index - %0 = arith.cmpi eq, %idx2, %zero : index - %1 = arith.cmpi eq, %idx1, %zero : index - %2 = arith.andi %0, %1 : i1 - %3 = select %2, %arg7, %arg8 : f32 - %4 = arith.addf %arg6, %3 : f32 - linalg.yield %4 : f32 - } - return - } - hal.interface private @io { - hal.interface.binding @arg0, set=0, binding=0, type="StorageBuffer", access="Read" - hal.interface.binding @arg1, set=0, binding=1, type="StorageBuffer", access="Read" - hal.interface.binding @ret0, set=0, binding=2, type="StorageBuffer", access="Write|Discard" - } - } - } -} -//CHECK-LABEL: func @reduce_sum -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[C40:.+]] = arith.constant 40 : index -// CHECK-DAG: %[[C50:.+]] = arith.constant 50 : index -// CHECK-DAG: %[[C75:.+]] = arith.constant 75 : index -// CHECK: %[[COND:.+]] = arith.cmpi slt, %{{.+}}, %[[C40]] -// CHECK: scf.if %[[COND]] -// CHECK: scf.for %[[IV0:.+]] = %{{.+}} to %[[C50]] -// CHECK: scf.for %[[IV1:.+]] = %{{.+}} to %[[C75]] -// CHECK-DAG: %[[ISZERO0:.+]] = arith.cmpi eq, %[[IV0]], %[[C0]] -// CHECK-DAG: %[[ISZERO1:.+]] = arith.cmpi eq, %[[IV1]], %[[C0]] From d89a5af0cfcf827046998889092130df02dd859d Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Tue, 26 Oct 2021 19:38:21 -0700 Subject: [PATCH 11/22] Allow more modern CMake policies (#7424) See https://cliutils.gitlab.io/modern-cmake/chapters/basics.html --- CMakeLists.txt | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d4240cc753cc..a2b44d9da86d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,13 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -cmake_minimum_required(VERSION 3.16.3) - -# Do not set MSVC warning flags like /W3 by default (since 3.15): -# https://cmake.org/cmake/help/v3.15/policy/CMP0092.html -if(POLICY CMP0092) - cmake_policy(SET CMP0092 NEW) -endif() +cmake_minimum_required(VERSION 3.16.3...3.21) # LLVM requires CMP0116 for tblgen: https://reviews.llvm.org/D101083 # CMP0116: Ninja generators transform `DEPFILE`s from `add_custom_command()` From e13e65c4798ebe4ae5439db97b271c30d7d2bb6f Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Tue, 26 Oct 2021 20:23:11 -0700 Subject: [PATCH 12/22] Clean up run_binary_test on android (#7466) --- build_tools/cmake/iree_run_binary_test.cmake | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/build_tools/cmake/iree_run_binary_test.cmake b/build_tools/cmake/iree_run_binary_test.cmake index c233760b1927..5d689004e749 100644 --- a/build_tools/cmake/iree_run_binary_test.cmake +++ b/build_tools/cmake/iree_run_binary_test.cmake @@ -74,8 +74,7 @@ function(iree_run_binary_test) endif() if(ANDROID) - set(_ANDROID_REL_DIR "${_PACKAGE_PATH}/${_RULE_NAME}") - set(_ANDROID_ABS_DIR "/data/local/tmp/${_ANDROID_REL_DIR}") + set(_ANDROID_ABS_DIR "/data/local/tmp/${_PACKAGE_PATH}/${_RULE_NAME}") endif() if (DEFINED _RULE_TEST_INPUT_FILE_ARG) @@ -92,9 +91,6 @@ function(iree_run_binary_test) string(REGEX REPLACE "^::" "${_PACKAGE_NS}::" _TEST_BINARY_TARGET ${_RULE_TEST_BINARY}) if(ANDROID) - set(_ANDROID_REL_DIR "${_PACKAGE_PATH}/${_RULE_NAME}") - set(_ANDROID_ABS_DIR "/data/local/tmp/${_ANDROID_REL_DIR}") - # Define a custom target for pushing and running the test on Android device. set(_TEST_NAME ${_TEST_NAME}_on_android_device) add_test( @@ -102,7 +98,7 @@ function(iree_run_binary_test) ${_TEST_NAME} COMMAND "${CMAKE_SOURCE_DIR}/build_tools/cmake/run_android_test.${IREE_HOST_SCRIPT_EXT}" - "${_ANDROID_REL_DIR}/$" + "${_ANDROID_ABS_DIR}/$" ${_RULE_ARGS} ) # Use environment variables to instruct the script to push artifacts From 687471989a94449d21bda587f1f6cb8b7056655e Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Tue, 26 Oct 2021 20:38:45 -0700 Subject: [PATCH 13/22] Switching to using our own flatcc compiler library. (#7468) We were already doing our own runtime library slicing, and we have to do our own compiler build for bazel, so this is consistent and lets us avoid including their cmake file (and the issues that creates). --- CMakeLists.txt | 10 +++--- .../iree_third_party_cmake_options.cmake | 13 -------- build_tools/third_party/flatcc/CMakeLists.txt | 31 ++++++++++++++++--- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a2b44d9da86d..2d937d199ba7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -434,17 +434,17 @@ if(IREE_ENABLE_THREADING) add_subdirectory(third_party/cpuinfo EXCLUDE_FROM_ALL) endif() -iree_set_flatcc_cmake_options() add_subdirectory(build_tools/third_party/flatcc EXCLUDE_FROM_ALL) -add_subdirectory(third_party/flatcc EXCLUDE_FROM_ALL) add_subdirectory(third_party/vulkan_headers EXCLUDE_FROM_ALL) # TODO(scotttodd): Iterate some more and find a better place for this. if (NOT CMAKE_CROSSCOMPILING) - install(TARGETS iree-flatcc-cli - COMPONENT iree-flatcc-cli - RUNTIME DESTINATION bin) + install( + TARGETS iree-flatcc-cli + COMPONENT iree-flatcc-cli + RUNTIME DESTINATION bin + ) endif() if(IREE_BUILD_COMPILER) diff --git a/build_tools/cmake/iree_third_party_cmake_options.cmake b/build_tools/cmake/iree_third_party_cmake_options.cmake index 2efba4e42db9..37fd9da2508b 100644 --- a/build_tools/cmake/iree_third_party_cmake_options.cmake +++ b/build_tools/cmake/iree_third_party_cmake_options.cmake @@ -17,19 +17,6 @@ macro(iree_set_cpuinfo_cmake_options) set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE BOOL "" FORCE) endmacro() -macro(iree_set_flatcc_cmake_options) - set(FLATCC_TEST OFF CACHE BOOL "" FORCE) - set(FLATCC_CXX_TEST OFF CACHE BOOL "" FORCE) - set(FLATCC_REFLECTION OFF CACHE BOOL "" FORCE) - set(FLATCC_ALLOW_WERROR OFF CACHE BOOL "" FORCE) - - if(CMAKE_CROSSCOMPILING) - set(FLATCC_RTONLY ON CACHE BOOL "" FORCE) - else() - set(FLATCC_RTONLY OFF CACHE BOOL "" FORCE) - endif() -endmacro() - macro(iree_set_googletest_cmake_options) set(INSTALL_GTEST OFF CACHE BOOL "" FORCE) set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) diff --git a/build_tools/third_party/flatcc/CMakeLists.txt b/build_tools/third_party/flatcc/CMakeLists.txt index f707b567f03b..74caf63763c1 100644 --- a/build_tools/third_party/flatcc/CMakeLists.txt +++ b/build_tools/third_party/flatcc/CMakeLists.txt @@ -85,14 +85,35 @@ external_cc_library( if(NOT CMAKE_CROSSCOMPILING) add_executable(iree-flatcc-cli "${FLATCC_ROOT}/src/cli/flatcc_cli.c" + "${FLATCC_ROOT}/external/hash/cmetrohash64.c" + "${FLATCC_ROOT}/external/hash/str_set.c" + "${FLATCC_ROOT}/external/hash/ptr_set.c" + "${FLATCC_ROOT}/src/compiler/hash_tables/symbol_table.c" + "${FLATCC_ROOT}/src/compiler/hash_tables/scope_table.c" + "${FLATCC_ROOT}/src/compiler/hash_tables/name_table.c" + "${FLATCC_ROOT}/src/compiler/hash_tables/schema_table.c" + "${FLATCC_ROOT}/src/compiler/hash_tables/value_set.c" + "${FLATCC_ROOT}/src/compiler/fileio.c" + "${FLATCC_ROOT}/src/compiler/parser.c" + "${FLATCC_ROOT}/src/compiler/semantics.c" + "${FLATCC_ROOT}/src/compiler/coerce.c" + "${FLATCC_ROOT}/src/compiler/codegen_schema.c" + "${FLATCC_ROOT}/src/compiler/flatcc.c" + "${FLATCC_ROOT}/src/compiler/codegen_c.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_reader.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_sort.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_builder.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_verifier.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_sorter.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_json_parser.c" + "${FLATCC_ROOT}/src/compiler/codegen_c_json_printer.c" + "${FLATCC_ROOT}/src/runtime/builder.c" + "${FLATCC_ROOT}/src/runtime/emitter.c" + "${FLATCC_ROOT}/src/runtime/refmap.c" ) - - target_link_libraries(iree-flatcc-cli - flatcc - ) - target_include_directories(iree-flatcc-cli SYSTEM PUBLIC + "${FLATCC_ROOT}/external" "${FLATCC_ROOT}/include" "${FLATCC_ROOT}/config" ) From c7d79e3f2d026a0fccaeb7bb920db7d94b864aec Mon Sep 17 00:00:00 2001 From: powderluv Date: Tue, 26 Oct 2021 22:49:48 -0700 Subject: [PATCH 14/22] Add -lSystem and SDK path on OSX (#7471) TEST=build samples and tests on M1 --- .../Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp b/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp index 87987a8ec214..cdc1b2111cbf 100644 --- a/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp +++ b/iree/compiler/Dialect/HAL/Target/LLVM/internal/UnixLinkerTool.cpp @@ -62,6 +62,9 @@ class UnixLinkerTool : public LinkerTool { // Produce a Mach-O dylib file. flags.push_back("-dylib"); flags.push_back("-flat_namespace"); + flags.push_back( + "-L /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib " + "-lSystem"); // HACK: we insert libm calls. This is *not good*. // Until the MLIR LLVM lowering paths no longer introduce these, From 527b7a0b3677d991e8f3e19047ab7d8bb0df87dc Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Wed, 27 Oct 2021 06:19:17 -0700 Subject: [PATCH 15/22] [NFC] Refactor benchmark suites to use parameter names (#7472) This is mostly with the goal of adding additional fields, like a TFLite input file. Includes some new CMake functions/macros that I think will be helpful in general. Tested: Verified no diff in the CMake build dir, excluding log files and a timestamp-dependent SPIRV thing. ```shell $ diff -r -q \ --exclude '*.log' \ --exclude gitversion.h \ /tmp/benchmarks_old \ ~/build/iree/benchmarks_test/ ``` --- benchmarks/TFLite/CMakeLists.txt | 85 +++++++------ benchmarks/TensorFlow/CMakeLists.txt | 116 ++++++++++-------- build_tools/cmake/iree_macros.cmake | 50 +++++++- .../cmake/iree_mlir_benchmark_suite.cmake | 87 ++++++------- 4 files changed, 202 insertions(+), 136 deletions(-) diff --git a/benchmarks/TFLite/CMakeLists.txt b/benchmarks/TFLite/CMakeLists.txt index e78feaff8524..bfa27c357932 100644 --- a/benchmarks/TFLite/CMakeLists.txt +++ b/benchmarks/TFLite/CMakeLists.txt @@ -7,37 +7,52 @@ ################################################################################ # # -# Benchmark models for Tosa # +# Benchmark models from TFLite # # # -# Each module specification should be a list that contains the following # -# fields: MODULE_NAME, MODULE_TAGS, MLIR_SOURCE, ENTRY_FUNCTION, # -# FUNCTION_INPUTS. See iree_mlir_benchmark_suite definition for details about # -# these fields. # +# Each module specification should be a list containing alternating keys and # +# values. The fields are: NAME, TAGS, MLIR_SOURCE, ENTRY_FUNCTION, and # +# FUNCTION_INPUTS. See the iree_mlir_benchmark_suite definition for details # +# about these fields. # # # ################################################################################ set(DEEPLABV3_FP32_MODULE - "DeepLabV3" # MODULE_NAME - "fp32" # MODULE_TAGS - "https://storage.googleapis.com/iree-model-artifacts/DeepLabV3-2bcafb1.tar.gz" # MLIR_SOURCE - "main" # ENTRY_FUNCTION - "1x257x257x3xf32" # FUNCTION_INPUTS + NAME + "DeepLabV3" + TAGS + "fp32" + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/DeepLabV3-2bcafb1.tar.gz" + ENTRY_FUNCTION + "main" + FUNCTION_INPUTS + "1x257x257x3xf32" ) set(MOBILESSD_FP32_MODULE - "MobileSSD" # MODULE_NAME - "fp32" # MODULE_TAGS - "https://storage.googleapis.com/iree-model-artifacts/MobileSSD-2bcafb1.tar.gz" # MLIR_SOURCE - "main" # ENTRY_FUNCTION - "1x320x320x3xf32" # FUNCTION_INPUTS + NAME + "MobileSSD" + TAGS + "fp32" + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/MobileSSD-2bcafb1.tar.gz" + ENTRY_FUNCTION + "main" + FUNCTION_INPUTS + "1x320x320x3xf32" ) set(POSENET_FP32_MODULE - "PoseNet" # MODULE_NAME - "fp32" # MODULE_TAGS - "https://storage.googleapis.com/iree-model-artifacts/PoseNet-2bcafb1.tar.gz" # MLIR_SOURCE - "main" # ENTRY_FUNCTION - "1x353x257x3xf32" # FUNCTION_INPUTS + NAME + "PoseNet" + TAGS + "fp32" + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/PoseNet-2bcafb1.tar.gz" + ENTRY_FUNCTION + "main" + FUNCTION_INPUTS + "1x353x257x3xf32" ) ################################################################################ @@ -53,9 +68,9 @@ set(POSENET_FP32_MODULE # CPU, Dylib-Sync, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${DEEPLABV3_FP32_MODULE} - ${MOBILESSD_FP32_MODULE} - ${POSENET_FP32_MODULE} + "${DEEPLABV3_FP32_MODULE}" + "${MOBILESSD_FP32_MODULE}" + "${POSENET_FP32_MODULE}" BENCHMARK_MODES "big-core,full-inference" @@ -77,9 +92,9 @@ iree_mlir_benchmark_suite( # CPU, Dylib, 1-thread, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${DEEPLABV3_FP32_MODULE} - ${MOBILESSD_FP32_MODULE} - ${POSENET_FP32_MODULE} + "${DEEPLABV3_FP32_MODULE}" + "${MOBILESSD_FP32_MODULE}" + "${POSENET_FP32_MODULE}" BENCHMARK_MODES "1-thread,big-core,full-inference" @@ -102,9 +117,9 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Adreno, full-inference iree_mlir_benchmark_suite( MODULES - ${DEEPLABV3_FP32_MODULE} - ${MOBILESSD_FP32_MODULE} - ${POSENET_FP32_MODULE} + "${DEEPLABV3_FP32_MODULE}" + "${MOBILESSD_FP32_MODULE}" + "${POSENET_FP32_MODULE}" BENCHMARK_MODES "full-inference" @@ -124,9 +139,9 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Mali, full-inference iree_mlir_benchmark_suite( MODULES - ${DEEPLABV3_FP32_MODULE} - ${MOBILESSD_FP32_MODULE} - ${POSENET_FP32_MODULE} + "${DEEPLABV3_FP32_MODULE}" + "${MOBILESSD_FP32_MODULE}" + "${POSENET_FP32_MODULE}" BENCHMARK_MODES "full-inference" @@ -146,9 +161,9 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Mali, kernel-execution iree_mlir_benchmark_suite( MODULES - ${DEEPLABV3_FP32_MODULE} - ${MOBILESSD_FP32_MODULE} - ${POSENET_FP32_MODULE} + "${DEEPLABV3_FP32_MODULE}" + "${MOBILESSD_FP32_MODULE}" + "${POSENET_FP32_MODULE}" BENCHMARK_MODES "kernel-execution" diff --git a/benchmarks/TensorFlow/CMakeLists.txt b/benchmarks/TensorFlow/CMakeLists.txt index 48c79b3ebdb6..827b11b41474 100644 --- a/benchmarks/TensorFlow/CMakeLists.txt +++ b/benchmarks/TensorFlow/CMakeLists.txt @@ -7,49 +7,69 @@ ################################################################################ # # -# Benchmark models # +# Benchmark models from TensorFlow # # # -# Each module specification should be a list that contains the following # -# fields: MODULE_NAME, MODULE_TAGS, MLIR_SOURCE, ENTRY_FUNCTION, # -# FUNCTION_INPUTS. See iree_mlir_benchmark_suite definition for details about # -# these fields. # +# Each module specification should be a list containing alternating keys and # +# values. The fields are: NAME, TAGS, MLIR_SOURCE, ENTRY_FUNCTION, and # +# FUNCTION_INPUTS. See the iree_mlir_benchmark_suite definition for details # +# about these fields. # # # ################################################################################ set(MOBILEBERT_FP16_MODULE - "MobileBertSquad" # MODULE_NAME - "fp16" # MODULE_TAGS + NAME + "MobileBertSquad" + TAGS + "fp16" # This uses the same input MLIR source as fp32 to save download time. # It requires users to have "--iree-flow-demote-f32-to-f16". - "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-89edfa50d.tar.gz" # MLIR_SOURCE - "serving_default" # ENTRY_FUNCTION + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-89edfa50d.tar.gz" + ENTRY_FUNCTION + "serving_default" # The conversion done by "--iree-flow-demote-f32-to-f16" won't change the # original input signature. - "1x384xi32,1x384xi32,1x384xi32" # FUNCTION_INPUTS + FUNCTION_INPUTS + "1x384xi32,1x384xi32,1x384xi32" ) set(MOBILEBERT_FP32_MODULE - "MobileBertSquad" # MODULE_NAME - "fp32" # MODULE_TAGS - "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-89edfa50d.tar.gz" # MLIR_SOURCE - "serving_default" # ENTRY_FUNCTION - "1x384xi32,1x384xi32,1x384xi32" # FUNCTION_INPUTS + NAME + "MobileBertSquad" + TAGS + "fp32" + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/MobileBertSquad-89edfa50d.tar.gz" + ENTRY_FUNCTION + "serving_default" + FUNCTION_INPUTS + "1x384xi32,1x384xi32,1x384xi32" ) set(MOBILENET_V2_MODULE - "MobileNetV2" # MODULE_NAME - "fp32,imagenet" # MODULE_TAGS - "https://storage.googleapis.com/iree-model-artifacts/MobileNetV2-89edfa50d.tar.gz" # MLIR_SOURCE - "call" # ENTRY_FUNCTION - "1x224x224x3xf32" # FUNCTION_INPUTS + NAME + "MobileNetV2" + TAGS + "fp32,imagenet" + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/MobileNetV2-89edfa50d.tar.gz" + ENTRY_FUNCTION + "call" + FUNCTION_INPUTS + "1x224x224x3xf32" ) set(MOBILENET_V3SMALL_MODULE - "MobileNetV3Small" # MODULE_NAME - "fp32,imagenet" # MODULE_TAGS - "https://storage.googleapis.com/iree-model-artifacts/MobileNetV3Small-89edfa50d.tar.gz" # MLIR_SOURCE - "call" # ENTRY_FUNCTION - "1x224x224x3xf32" # FUNCTION_INPUTS + NAME + "MobileNetV3Small" + TAGS + "fp32,imagenet" + MLIR_SOURCE + "https://storage.googleapis.com/iree-model-artifacts/MobileNetV3Small-89edfa50d.tar.gz" + ENTRY_FUNCTION + "call" + FUNCTION_INPUTS + "1x224x224x3xf32" ) ################################################################################ @@ -65,8 +85,8 @@ set(MOBILENET_V3SMALL_MODULE # CPU, VMVX, 3-thread, little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "3-thread,little-core,full-inference" @@ -86,8 +106,8 @@ iree_mlir_benchmark_suite( # CPU, Dylib-Sync, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "big-core,full-inference" @@ -109,8 +129,8 @@ iree_mlir_benchmark_suite( # CPU, Dylib, 1-thread, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "1-thread,big-core,full-inference" @@ -133,8 +153,8 @@ iree_mlir_benchmark_suite( # CPU, Dylib, 3-thread, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "3-thread,big-core,full-inference" @@ -157,9 +177,9 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Adreno, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP32_MODULE} - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILEBERT_FP32_MODULE}" + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "full-inference" @@ -179,8 +199,8 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Adreno, kernel-execution iree_mlir_benchmark_suite( MODULES - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "kernel-execution" @@ -203,9 +223,9 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Mali, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP32_MODULE} - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILEBERT_FP32_MODULE}" + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "full-inference" @@ -225,8 +245,8 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Mali, kernel-execution iree_mlir_benchmark_suite( MODULES - ${MOBILENET_V2_MODULE} - ${MOBILENET_V3SMALL_MODULE} + "${MOBILENET_V2_MODULE}" + "${MOBILENET_V3SMALL_MODULE}" BENCHMARK_MODES "kernel-execution" @@ -249,7 +269,7 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Mali, kernel-execution iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP16_MODULE} + "${MOBILEBERT_FP16_MODULE}" BENCHMARK_MODES "kernel-execution" @@ -273,7 +293,7 @@ iree_mlir_benchmark_suite( # GPU, Vulkan, Mali, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP16_MODULE} + "${MOBILEBERT_FP16_MODULE}" BENCHMARK_MODES "full-inference" @@ -303,7 +323,7 @@ iree_mlir_benchmark_suite( # CPU, Dylib-Sync, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP32_MODULE} + "${MOBILEBERT_FP32_MODULE}" BENCHMARK_MODES "big-core,full-inference" @@ -325,7 +345,7 @@ iree_mlir_benchmark_suite( # CPU, Dylib, 1-thread, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP32_MODULE} + "${MOBILEBERT_FP32_MODULE}" BENCHMARK_MODES "1-thread,big-core,full-inference" @@ -348,7 +368,7 @@ iree_mlir_benchmark_suite( # CPU, Dylib, 3-thread, big/little-core, full-inference iree_mlir_benchmark_suite( MODULES - ${MOBILEBERT_FP32_MODULE} + "${MOBILEBERT_FP32_MODULE}" BENCHMARK_MODES "3-thread,big-core,full-inference" diff --git a/build_tools/cmake/iree_macros.cmake b/build_tools/cmake/iree_macros.cmake index 3dccb5af0828..aebbf9f15c16 100644 --- a/build_tools/cmake/iree_macros.cmake +++ b/build_tools/cmake/iree_macros.cmake @@ -284,7 +284,7 @@ endfunction() # Adds test environment variable properties based on the current build options. # # Parameters: -# TEST_NAME: the test name, e.g. iree/base:math_test +# TEST_NAME: the test name, e.g. iree/base:math_test function(iree_add_test_environment_properties TEST_NAME) # IREE_*_DISABLE environment variables may used to skip test cases which # require both a compiler target backend and compatible runtime HAL driver. @@ -303,3 +303,51 @@ function(iree_add_test_environment_properties TEST_NAME) set_property(TEST ${TEST_NAME} APPEND PROPERTY ENVIRONMENT "IREE_LLVMAOT_DISABLE=1") endif() endfunction() + +# iree_check_defined +# +# A lightweight way to check that all the given variables are defined. Useful +# in cases like checking that a function has been passed all required arguments. +# Doesn't give usage-specific error messages, but still significantly better +# than no error checking. +# Variable names should be passed directly without quoting or dereferencing. +# Example: +# iree_check_defined(_SOME_VAR _AND_ANOTHER_VAR) +macro(iree_check_defined) + foreach(_VAR ${ARGN}) + if(NOT DEFINED "${_VAR}") + message(SEND_ERROR "${_VAR} is not defined") + endif() + endforeach() +endmacro() + +# iree_validate_required_arguments +# +# Validates that no arguments went unparsed or were given no values and that all +# required arguments have values. Expects to be called after +# cmake_parse_arguments and verifies that the variables it creates have been +# populated as appropriate. +function(iree_validate_required_arguments + PREFIX + REQUIRED_ONE_VALUE_KEYWORDS + REQUIRED_MULTI_VALUE_KEYWORDS) + if(DEFINED ${PREFIX}_UNPARSED_ARGUMENTS) + message(SEND_ERROR "Unparsed argument(s): '${${PREFIX}_UNPARSED_ARGUMENTS}'") + endif() + if(DEFINED ${PREFIX}_KEYWORDS_MISSING_VALUES) + message(SEND_ERROR + "No values for field(s) '${${PREFIX}_KEYWORDS_MISSING_VALUES}'") + endif() + + foreach(_ONE_VALUE_KEYWORD IN LISTS REQUIRED_ONE_VALUE_KEYWORDS) + if(NOT DEFINED ${PREFIX}_${_ONE_VALUE_KEYWORD}) + message(SEND_ERROR "Missing required argument ${_ONE_VALUE_KEYWORD}") + endif() + endforeach() + + foreach(_MULTI_VALUE_KEYWORD IN LISTS REQUIRED_MULTI_VALUE_KEYWORDS) + if(NOT DEFINED ${PREFIX}_${_MULTI_VALUE_KEYWORD}) + message(SEND_ERROR "Missing required argument ${_MULTI_VALUE_KEYWORD}") + endif() + endforeach() +endfunction() diff --git a/build_tools/cmake/iree_mlir_benchmark_suite.cmake b/build_tools/cmake/iree_mlir_benchmark_suite.cmake index 8028809c8ce0..f2971bd99fc1 100644 --- a/build_tools/cmake/iree_mlir_benchmark_suite.cmake +++ b/build_tools/cmake/iree_mlir_benchmark_suite.cmake @@ -71,62 +71,44 @@ function(iree_mlir_benchmark_suite) "BENCHMARK_MODES;MODULES;TRANSLATION_FLAGS;RUNTIME_FLAGS" ) - # All fields' names for each module. - set(_FIELD_NAMES "_MODULE_NAME" "_MODULE_TAGS" - "_MLIR_SOURCE" "_ENTRY_FUNCTION" "_FUNCTION_INPUTS") - list(LENGTH _FIELD_NAMES _FIELD_COUNT) - math(EXPR _MAX_FIELD_INDEX "${_FIELD_COUNT} - 1") - - # Make sure we have some multiple of six elements. - list(LENGTH _RULE_MODULES _MODULE_TOTAL_ELEMENT_COUNT) - math(EXPR _MODULE_COUNT - "${_MODULE_TOTAL_ELEMENT_COUNT} / ${_FIELD_COUNT}") - math(EXPR _MODULE_ELEMENT_REMAINDER - "${_MODULE_TOTAL_ELEMENT_COUNT} % ${_FIELD_COUNT}") - if(NOT ${_MODULE_ELEMENT_REMAINDER} EQUAL 0) - message(SEND_ERROR "MODULES expected to have some multiple of six " - "elements; some module has missing/redundant fields.") - endif() + iree_validate_required_arguments( + _RULE + "DRIVER;TARGET_BACKEND;TARGET_ARCHITECTURE" + "BENCHMARK_MODES;MODULES" + ) - # Loop over all modules to create targets. - math(EXPR _MAX_MODULE_INDEX "${_MODULE_COUNT} - 1") - foreach(_MODULE_INDEX RANGE 0 "${_MAX_MODULE_INDEX}") - # Loop over all elements for the current module and assign them to the - # corresponding field names for later use. - foreach(_FIELD_INDEX RANGE 0 "${_MAX_FIELD_INDEX}") - list(GET _FIELD_NAMES ${_FIELD_INDEX} _FIELD_NAME) - math(EXPR _INDEX "${_MODULE_INDEX} * ${_FIELD_COUNT} + ${_FIELD_INDEX}") - list(GET _RULE_MODULES ${_INDEX} ${_FIELD_NAME}) - endforeach() - - # Use the last directory's name as the category. - get_filename_component(_CATEGORY "${CMAKE_CURRENT_SOURCE_DIR}" NAME) + foreach(_MODULE IN LISTS _RULE_MODULES) + cmake_parse_arguments( + _MODULE + "" + "NAME;TAGS;MLIR_SOURCE;ENTRY_FUNCTION;FUNCTION_INPUTS" + "" + ${_MODULE} + ) + iree_validate_required_arguments( + _MODULE + "NAME;TAGS;MLIR_SOURCE;ENTRY_FUNCTION;FUNCTION_INPUTS" + "" + ) - # Generate all benchmarks to the root build directory. This helps for - # discovering them and execute them on devices. + get_filename_component(_CATEGORY "${CMAKE_CURRENT_SOURCE_DIR}" NAME) set(_ROOT_ARTIFACTS_DIR "${IREE_BINARY_DIR}/benchmark_suites/${_CATEGORY}") set(_VMFB_ARTIFACTS_DIR "${_ROOT_ARTIFACTS_DIR}/vmfb") - # The source file used to generate benchmark artifacts. - set(_SOURCE_FILE "${_MLIR_SOURCE}") # The CMake target's name if we need to download from the web. set(_DOWNLOAD_TARGET_NAME "") - # If the source file is from the web, create a custom command to download it. - # And wrap that with a custom target so later we can use for dependency. + # If the source file is from the web, create a custom command to download + # it and wrap that with a custom target so later we can use for dependency. # # Note: We actually should not do this; instead, we should directly compile # from the initial source (i.e., TensorFlow Python models). But that is - # tangled with the pending Python testing infrastructure revamp so we'd prefer - # to not do that right now. - if("${_MLIR_SOURCE}" MATCHES "^https?://") + # tangled with the pending Python testing infrastructure revamp so we'd + # prefer to not do that right now. + if("${_MODULE_MLIR_SOURCE}" MATCHES "^https?://") # Update the source file to the downloaded-to place. - string(REPLACE "/" ";" _SOURCE_URL_SEGMENTS "${_MLIR_SOURCE}") - # TODO: we can do `list(POP_BACK _SOURCE_URL_SEGMENTS _LAST_URL_SEGMENT)` - # after migrating to CMake 3.15. - list(LENGTH _SOURCE_URL_SEGMENTS _URL_SEGMENT_COUNT) - math(EXPR _SEGMENT_LAST_INDEX "${_URL_SEGMENT_COUNT} - 1") - list(GET _SOURCE_URL_SEGMENTS ${_SEGMENT_LAST_INDEX} _LAST_URL_SEGMENT) + string(REPLACE "/" ";" _SOURCE_URL_SEGMENTS "${_MODULE_MLIR_SOURCE}") + list(POP_BACK _SOURCE_URL_SEGMENTS _LAST_URL_SEGMENT) set(_DOWNLOAD_TARGET_NAME "iree-download-benchmark-source-${_LAST_URL_SEGMENT}") string(REPLACE "tar.gz" "mlir" _FILE_NAME "${_LAST_URL_SEGMENT}") @@ -137,10 +119,10 @@ function(iree_mlir_benchmark_suite) OUTPUT "${_SOURCE_FILE}" COMMAND "${Python3_EXECUTABLE}" "${IREE_ROOT_DIR}/scripts/download_file.py" - "${_MLIR_SOURCE}" -o "${_ROOT_ARTIFACTS_DIR}" + "${_MODULE_MLIR_SOURCE}" -o "${_ROOT_ARTIFACTS_DIR}" DEPENDS "${IREE_ROOT_DIR}/scripts/download_file.py" - COMMENT "Downloading ${_MLIR_SOURCE}" + COMMENT "Downloading ${_MODULE_MLIR_SOURCE}" ) add_custom_target("${_DOWNLOAD_TARGET_NAME}" DEPENDS "${_SOURCE_FILE}" @@ -160,8 +142,8 @@ function(iree_mlir_benchmark_suite) string(REPLACE "," "-" _TAGS "${_MODULE_TAGS}") string(REPLACE "," "-" _MODE "${_BENCHMARK_MODE}") list(APPEND _COMMON_NAME_SEGMENTS - "${_TAGS}" "${_MODE}" "${_RULE_TARGET_BACKEND}" - "${_RULE_TARGET_ARCHITECTURE}") + "${_TAGS}" "${_MODE}" "${_RULE_TARGET_BACKEND}" + "${_RULE_TARGET_ARCHITECTURE}") # The full list of translation flags. set(_TRANSLATION_ARGS "--iree-mlir-to-vm-bytecode-module") @@ -220,8 +202,8 @@ function(iree_mlir_benchmark_suite) "${Python3_EXECUTABLE}" "${IREE_ROOT_DIR}/scripts/generate_flagfile.py" --module_file="../../vmfb/compiled-${_VMFB_HASH}.vmfb" --driver=${_RULE_DRIVER} - --entry_function=${_ENTRY_FUNCTION} - --function_inputs=${_FUNCTION_INPUTS} + --entry_function=${_MODULE_ENTRY_FUNCTION} + --function_inputs=${_MODULE_FUNCTION_INPUTS} "${_ADDITIONAL_ARGS_CL}" -o "${_FLAG_FILE}" DEPENDS @@ -241,5 +223,6 @@ function(iree_mlir_benchmark_suite) # Mark dependency so that we have one target to drive them all. add_dependencies(iree-benchmark-suites "${_FLAGFILE_GEN_TARGET_NAME}") endforeach(_BENCHMARK_MODE IN LISTS _RULE_BENCHMARK_MODES) - endforeach(_MODULE_INDEX RANGE 0 "${_MAX_MODULE_INDEX}") -endfunction() + + endforeach(_MODULE IN LISTS _RULE_MODULES) +endfunction(iree_mlir_benchmark_suite) From 67c8d3f27c113ef7b3ec63d3df2b6bbf738689b4 Mon Sep 17 00:00:00 2001 From: bjacob Date: Wed, 27 Oct 2021 15:05:10 -0400 Subject: [PATCH 16/22] Trim e2e matmul tests and share MLIR code across testcases (#7475) When I wrote these tests, I put great care in ensuring low test run latency. But I didn't think about test compilation latency. So this PR reduces these build times in two different ways: 1. By commenting out half of the shapes in get_test_shapes. I've retained ~ 50% of the shapes that I believe provide ~ 90% of the coverage. The remaining 10% coverage will only start to matter later when we start to make the matmul implementation do more complicated things, and we can uncomment those shapes then. 2. By ensuring that testcases that test the same exact code (differing only by runtime data) actually share that code already at the source level (without relying on CSE, which might kick in too late to recover the best compilation latency), by changing generate_function_name to generate the same exact name, so that we're sure that we insert only one. There are two sub-cases here: a. Testcases that differ in dynamic shape dimensions. Before we could have functions foo_2x2(tensor) and foo_3x3(tensor) doing the same thing, only differing in the dynamic shapes that they are called on. Now it's foo_DYNxDYN(tensor). b. Testcases that differ in the generator of matrix element that they are called with. Before we would have foo_identity(tensor<4x4xf32>) and foo_random(tensor<4x4xf32). Now it's just foo(tensor<4x4xf32>). Before: ``` $ time cmake --build . [0/2] Re-checking globbed directories... [28/28] Generating e2e_matmul_direct_i8_small_dylib-llvm-aot_dylib.vmfb real 0m56.333s user 10m24.481s sys 3m46.072s ``` After: ``` $ time cmake --build . [0/2] Re-checking globbed directories... [28/28] Generating e2e_matmul_mmt4d_i8_small_dylib-llvm-aot_dylib.vmfb real 0m22.573s user 3m34.928s sys 1m23.833s ``` --- .../regression/generate_e2e_matmul_tests.py | 189 ++++++++++++------ 1 file changed, 130 insertions(+), 59 deletions(-) diff --git a/iree/test/e2e/regression/generate_e2e_matmul_tests.py b/iree/test/e2e/regression/generate_e2e_matmul_tests.py index bc983c352da6..d37d9f1a1bd5 100644 --- a/iree/test/e2e/regression/generate_e2e_matmul_tests.py +++ b/iree/test/e2e/regression/generate_e2e_matmul_tests.py @@ -11,9 +11,9 @@ import os import yaml import re - import enum import dataclasses +import typing # Data type of matrix entries. The string values must match MLIR data types. @@ -73,59 +73,69 @@ class TestGenerator: # Returns the list of TestShape's to use for the collection of shapes # identified by shapes_id. def get_test_shapes(shapes_id: ShapesId): + # Notes: + # 1. Be conservative in adding more shapes, as that can include both the + # build and execution latency of tests. The build latency is nearly the + # same for all shapes, while execution latency grows cubicly i.e. + # linearly with m*k*n. + # 2. Some shapes are commented out: they used to be tested but have been + # disabled to improve the trade-off between test coverage and build + # latency. if shapes_id == ShapesId.SMALL: return [ # Small sizes, square matrices - TestShape(m=x, k=x, n=x) for x in range(1, 40) + # was range(1, 40) before trimming. The choice of 18 is so that we + # exercise a case just above 16, as 16 will be a common kernel width. + TestShape(m=x, k=x, n=x) for x in range(1, 18) ] + [ # Small sizes, slightly rectangular matrices TestShape(m=2, k=3, n=4), - TestShape(m=8, k=7, n=6), - TestShape(m=15, k=16, n=17), + #TestShape(m=8, k=7, n=6), + #TestShape(m=15, k=16, n=17), TestShape(m=14, k=19, n=23), - TestShape(m=31, k=33, n=32), + #TestShape(m=31, k=33, n=32), TestShape(m=25, k=41, n=35), # Small sizes, involving vectors (i.e. most rectangular cases) TestShape(m=10, k=1, n=1), TestShape(m=1, k=10, n=1), TestShape(m=1, k=1, n=10), - TestShape(m=1, k=10, n=10), - TestShape(m=10, k=1, n=10), - TestShape(m=10, k=10, n=1), + #TestShape(m=1, k=10, n=10), + #TestShape(m=10, k=1, n=10), + #TestShape(m=10, k=10, n=1), # Small sizes, involving other very small dimensions just above 1 TestShape(m=13, k=14, n=2), TestShape(m=3, k=17, n=12), TestShape(m=21, k=4, n=18), # Medium sizes, square matrices - TestShape(m=100, k=100, n=100), + #TestShape(m=100, k=100, n=100), # Medium sizes, slightly rectangular matrices TestShape(m=101, k=102, n=103), # Medium sizes, involving vectors (i.e. most rectangular cases) TestShape(m=10000, k=1, n=1), TestShape(m=1, k=10000, n=1), TestShape(m=1, k=1, n=10000), - TestShape(m=1, k=1000, n=1000), - TestShape(m=1000, k=1, n=1000), - TestShape(m=1000, k=1000, n=1), + #TestShape(m=1, k=1000, n=1000), + #TestShape(m=1000, k=1, n=1000), + #TestShape(m=1000, k=1000, n=1), # Medium sizes, involving other very small dimensions just above 1 TestShape(m=1300, k=1300, n=2), - TestShape(m=1300, k=1300, n=3), - TestShape(m=1300, k=1300, n=4), + #TestShape(m=1300, k=1300, n=3), + #TestShape(m=1300, k=1300, n=4), ] if shapes_id == ShapesId.LARGE: return [ # Large sizes, powers of two TestShape(m=256, k=256, n=512), - TestShape(m=512, k=512, n=128), - TestShape(m=1024, k=512, n=512), - TestShape(m=512, k=1024, n=512), + #TestShape(m=512, k=512, n=128), + #TestShape(m=1024, k=512, n=512), + #TestShape(m=512, k=1024, n=512), # Large sizes, powers of two minus one TestShape(m=127, k=63, n=511), # Large sizes, powers of two plus one TestShape(m=129, k=65, n=513), # Large sizes, misc. - TestShape(m=200, k=300, n=400), + #TestShape(m=200, k=300, n=400), TestShape(m=123, k=456, n=789), - TestShape(m=500, k=500, n=50), + #TestShape(m=500, k=500, n=50), # Be conservative in adding larger shapes. They can result in # high latency tests. If you have to, consider splitting them # out in a way that constrains the latency impact, e.g. by @@ -191,21 +201,6 @@ def get_test_generators(shapes_id: ShapesId): raise ValueError(shapes_id) -# Generates a name for a test function in the generated MLIR code. -def function_name(lhs_rhs_type: MatrixElemTypeId, acc_type: MatrixElemTypeId, - shape: TestShape, gen: TestGenerator): - dyn = gen.dynamicity.value - lhs_g = gen.lhs.value - rhs_g = gen.rhs.value - acc_g = gen.acc.value - input_t = lhs_rhs_type.value - acc_t = acc_type.value - m = shape.m - k = shape.k - n = shape.n - return f"{input_t}_{dyn}_{lhs_g}_{m}x{k}_times_{rhs_g}_{k}x{n}_plus_{acc_g}_{acc_t}" - - # Intentionally fixed seed! We want full reproducibility here, both across runs # and across machines. # Intentionally not shared with pseudorandom_generator_seed to limit the ways @@ -213,44 +208,120 @@ def function_name(lhs_rhs_type: MatrixElemTypeId, acc_type: MatrixElemTypeId, local_pseudorandom_state = 1 +# A static size value, i.e. a size value that could appear in a MLIR type +# such as 'tensor'. None means a dynamic size, similar to '?' in MLIR. +@dataclasses.dataclass +class DimSize: + value: typing.Optional[int] + + # Generates a compile-time MLIR size value, i.e. either a fixed positive integer -# or a '?' depending on dynamicity. +# or None (which maps to MLIR '?') depending on dynamicity. def static_size(x: int, dynamicity: Dynamicity): if dynamicity == Dynamicity.DYNAMIC: - return "?" + return DimSize(None) elif dynamicity == Dynamicity.STATIC: - return x + return DimSize(x) elif dynamicity == Dynamicity.MIXED: global local_pseudorandom_state # Same as C++ std::minstd_rand. # Using a local pseudorandom generator implementation ensures that it's # completely reproducible, across runs and across machines. local_pseudorandom_state = (local_pseudorandom_state * 48271) % 2147483647 - return x if local_pseudorandom_state > 1073741824 else "?" + return DimSize(x if local_pseudorandom_state > 1073741824 else None) else: raise ValueError(dynamicity) +# Stringification used for generating MLIR types, e.g. tensor. +def int_or_question_mark(s: DimSize): + return s.value or "?" + + +# Stringification used for generating alphanumeric identifiers, e.g. +# func @somefunction_DYNxDYNxf32, where we can't use "?" characters. +def int_or_DYN(s: DimSize): + return s.value or "DYN" + + +# Describes the fully resolved static dimensions of all 3 input matrices, +# LHS, RHS, and Accumulator, in a testcase. +# Each value is a string, which may either represent a positive integer such as "123", +# or a "?" string, meaning a dynamic dimension as in MLIR. +# These string values are used to generate MLIR function names and tensor shapes. +@dataclasses.dataclass +class TestInputMatricesStaticShapes: + lhs_rows: DimSize + lhs_cols: DimSize + rhs_rows: DimSize + rhs_cols: DimSize + acc_rows: DimSize + acc_cols: DimSize + + +# Helper for generate_function. Generates TestInputMatricesStaticShapes, i.e. +# converts from the runtime shape dimensions in TestShape and given dynamicity to +# the set of static shapes to be used in a test function's input tensors. +def generate_static_shapes(shape: TestShape, dynamicity: Dynamicity): + return TestInputMatricesStaticShapes( + lhs_rows=static_size(shape.m, dynamicity), + lhs_cols=static_size(shape.k, dynamicity), + rhs_rows=static_size(shape.k, dynamicity), + rhs_cols=static_size(shape.n, dynamicity), + acc_rows=static_size(shape.m, dynamicity), + acc_cols=static_size(shape.n, dynamicity), + ) + + +# Helper for generate_function. +# Generates a name for a test function in the generated MLIR code. +def generate_function_name(lhs_rhs_type: MatrixElemTypeId, + acc_type: MatrixElemTypeId, + static_shapes: TestInputMatricesStaticShapes): + input_t = lhs_rhs_type.value + acc_t = acc_type.value + lhs_m = int_or_DYN(static_shapes.lhs_rows) + lhs_k = int_or_DYN(static_shapes.lhs_cols) + rhs_k = int_or_DYN(static_shapes.rhs_rows) + rhs_n = int_or_DYN(static_shapes.rhs_cols) + acc_m = int_or_DYN(static_shapes.acc_rows) + acc_n = int_or_DYN(static_shapes.acc_cols) + return f"matmul_{lhs_m}x{lhs_k}x{input_t}_times_{rhs_k}x{rhs_n}x{input_t}_into_{acc_m}x{acc_n}x{acc_t}" + + +# Represents a generated test function. +@dataclasses.dataclass +class MLIRFunction: + name: str + definition: str + + # Generates a test function in the generated MLIR code. # The generated function will take the same arguments as linalg.matmul and # will just call linalg.matmul with them, returning its result. -def generate_function(func_name: str, lhs_rhs_type: MatrixElemTypeId, +def generate_function(lhs_rhs_type: MatrixElemTypeId, acc_type: MatrixElemTypeId, shape: TestShape, - gen: TestGenerator): - lhs_m = static_size(shape.m, gen.dynamicity) - lhs_k = static_size(shape.k, gen.dynamicity) - rhs_k = static_size(shape.k, gen.dynamicity) - rhs_n = static_size(shape.n, gen.dynamicity) - acc_m = static_size(shape.m, gen.dynamicity) - acc_n = static_size(shape.n, gen.dynamicity) + dynamicity: Dynamicity): + static_shapes = generate_static_shapes(shape, dynamicity) + func_name = generate_function_name(lhs_rhs_type, acc_type, static_shapes) + lhs_m = int_or_question_mark(static_shapes.lhs_rows) + lhs_k = int_or_question_mark(static_shapes.lhs_cols) + rhs_k = int_or_question_mark(static_shapes.rhs_rows) + rhs_n = int_or_question_mark(static_shapes.rhs_cols) + acc_m = int_or_question_mark(static_shapes.acc_rows) + acc_n = int_or_question_mark(static_shapes.acc_cols) lhs_tensor_type = f"tensor<{lhs_m}x{lhs_k}x{lhs_rhs_type.value}>" rhs_tensor_type = f"tensor<{rhs_k}x{rhs_n}x{lhs_rhs_type.value}>" acc_tensor_type = f"tensor<{acc_m}x{acc_n}x{acc_type.value}>" - return ( + func_definition = ( f"func @{func_name}(%lhs: {lhs_tensor_type}, %rhs: {rhs_tensor_type}, %acc: {acc_tensor_type}) -> {acc_tensor_type} {{\n" f" %result = linalg.matmul ins(%lhs, %rhs: {lhs_tensor_type}, {rhs_tensor_type}) outs(%acc: {acc_tensor_type}) -> {acc_tensor_type}\n" f" return %result: {acc_tensor_type}\n" f"}}\n") + return MLIRFunction( + name=func_name, + definition=func_definition, + ) # Intentionally fixed seed! We want full reproducibility here, both across runs @@ -315,22 +386,22 @@ def generate_trace(func_name: str, lhs_rhs_type: MatrixElemTypeId, # Generates all output files' contents as strings. def generate(lhs_rhs_type: MatrixElemTypeId, acc_type: MatrixElemTypeId, shapes_id: ShapesId): - functions = {} + function_definitions = {} traces = [] for shape in get_test_shapes(shapes_id): for gen in get_test_generators(shapes_id): - func_name = function_name(lhs_rhs_type, acc_type, shape, gen) + function = generate_function(lhs_rhs_type, acc_type, shape, + gen.dynamicity) # Different testcases may differ only by runtime parameters but # share the same code. For example, dynamic-shapes testcases # share the same code involing tensor even though the runtime # value in the trace are different. That's why we call # generate_function conditionally, and generate_trace unconditionally. - if func_name not in functions: - functions[func_name] = generate_function(func_name, lhs_rhs_type, - acc_type, shape, gen) + if function.name not in function_definitions: + function_definitions[function.name] = function.definition traces.append( - generate_trace(func_name, lhs_rhs_type, acc_type, shape, gen)) - return (functions, traces) + generate_trace(function.name, lhs_rhs_type, acc_type, shape, gen)) + return (function_definitions, traces) def parse_arguments(): @@ -363,10 +434,10 @@ def parse_arguments(): return parser.parse_args() -def write_code_file(functions, filename): +def write_code_file(function_definitions, filename): with open(filename, "w") as file: - for funcname in functions: - file.write(functions[funcname] + "\n") + for funcname in function_definitions: + file.write(function_definitions[funcname] + "\n") def write_trace_file(traces, filename, module_path): @@ -417,8 +488,8 @@ def main(args): lhs_rhs_type = MatrixElemTypeId(args.lhs_rhs_type) acc_type = infer_acc_type(lhs_rhs_type) shapes_id = ShapesId(args.shapes) - (functions, traces) = generate(lhs_rhs_type, acc_type, shapes_id) - write_code_file(functions, args.output_code) + (function_definitions, traces) = generate(lhs_rhs_type, acc_type, shapes_id) + write_code_file(function_definitions, args.output_code) write_trace_file(traces, args.output_trace, args.module_path) From a902a443429f56b5868984ca637db70709e5cd0d Mon Sep 17 00:00:00 2001 From: MaheshRavishankar <1663364+MaheshRavishankar@users.noreply.github.com> Date: Wed, 27 Oct 2021 13:34:59 -0700 Subject: [PATCH 17/22] Avoid tie-ing input and output for dispatch from `tensor.extract_slice`. (#7478) Fixes #7467 --- .../Flow/Transforms/DispatchLinalgOnTensors.cpp | 3 ++- .../test/dispatch_linalg_on_tensors.mlir | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/iree/compiler/Dialect/Flow/Transforms/DispatchLinalgOnTensors.cpp b/iree/compiler/Dialect/Flow/Transforms/DispatchLinalgOnTensors.cpp index 6ba9c4bac62a..7861e2de71b3 100644 --- a/iree/compiler/Dialect/Flow/Transforms/DispatchLinalgOnTensors.cpp +++ b/iree/compiler/Dialect/Flow/Transforms/DispatchLinalgOnTensors.cpp @@ -476,7 +476,8 @@ static BlockArgument getTiedOperandBlockArgument(BlockArgument resultArg) { // block argument. Single use can potentially be relaxed. auto loadArg = loadOp.source().template dyn_cast(); - if (!loadArg || !loadArg.hasOneUse()) { + if (!loadArg || !loadArg.hasOneUse() || + loadArg.use_begin()->get() != storeOp.target()) { return nullptr; } return loadArg; diff --git a/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir b/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir index c42e9b3be3be..63fbc2f9d5bf 100644 --- a/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir +++ b/iree/compiler/Dialect/Flow/Transforms/test/dispatch_linalg_on_tensors.mlir @@ -1114,3 +1114,17 @@ func @dynamic_slice(%arg0 : i32, %arg1 : i32, %arg2 : tensor, // CHECK-SAME: tensor{%[[D1]], %[[D2]]}, tensor{%[[D0]]} // CHECK-NEXT: %[[ARG4:.+]]: !flow.dispatch.tensor // CHECK-SAME: %[[ARG5:.+]]: !flow.dispatch.tensor + +// ----- + +func @extract_slice(%arg0 : tensor, %arg1 : index, %arg2 : index, + %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) -> tensor { + %0 = tensor.extract_slice %arg0[%arg1, %arg2] [%arg3, %arg4] [%arg5, %arg6] : + tensor to tensor + return %0 : tensor +} +// CHECK: flow.dispatch.workgroups +// CHECK-NEXT: %[[INPUT:[a-zA-Z0-9]+]]: !flow.dispatch.tensor +// CHECK-SAME: %[[OUTPUT:[a-zA-Z0-9]+]]: !flow.dispatch.tensor +// CHECK: %[[SLICE:.+]] = flow.dispatch.tensor.load %[[INPUT]] +// CHECK: flow.dispatch.tensor.store %[[SLICE]], %[[OUTPUT]] From 19ebd9d3a11cdbf8dbd4e8fa003eb9854168c7bd Mon Sep 17 00:00:00 2001 From: bjacob Date: Wed, 27 Oct 2021 23:27:00 -0400 Subject: [PATCH 18/22] Enable matmul to mmt4d transformation for all types (not just f32) (#7477) Background: earlier I attempted to make mixed types work in vector.contract lowerings, see https://reviews.llvm.org/D112508 . See the closing comment there explaining the approach and why we abandoned it in favor of promoting inputs to the destination element type in vector.contract. Another minor cleanup is folded into this PR: we are dropping the flag --iree-codegen-vectorize-linalg-mmt4d from custom iree-opt flags in the build rules for e2e matmul tests, because this pass is already enabled by default in iree-translate. --- .../Codegen/Common/VectorizeMMT4d.cpp | 58 ++++++++++++++----- .../Transforms/ConvertLinalgMatmulToMmt4D.cpp | 5 -- iree/test/e2e/regression/BUILD | 2 - iree/test/e2e/regression/CMakeLists.txt | 4 -- 4 files changed, 44 insertions(+), 25 deletions(-) diff --git a/iree/compiler/Codegen/Common/VectorizeMMT4d.cpp b/iree/compiler/Codegen/Common/VectorizeMMT4d.cpp index 34340efeeb6b..c7db560c70bf 100644 --- a/iree/compiler/Codegen/Common/VectorizeMMT4d.cpp +++ b/iree/compiler/Codegen/Common/VectorizeMMT4d.cpp @@ -14,6 +14,23 @@ namespace iree_compiler { namespace { +Value promoteVector(Location loc, Value inputVector, Type promotedElementType, + PatternRewriter &rewriter) { + VectorType inputVectorType = inputVector.getType().cast(); + if (inputVectorType.getElementType() == promotedElementType) { + return inputVector; + } else { + auto promotedVectorType = inputVectorType.clone(promotedElementType); + if (promotedElementType.isIntOrIndex()) { + return rewriter.create(loc, inputVector, + promotedVectorType); + } else { + return rewriter.create(loc, inputVector, + promotedVectorType); + } + } +} + /// Converts linalg.mmt4d into vector.contract. /// This converts linalg.mmt4d with operands <1x1xM0xK0>, <1x1xK0xN0> /// to vector.contract where K0 is the contraction dimension. @@ -22,12 +39,13 @@ struct VectorizeMMT4DOp : public OpRewritePattern { LogicalResult matchAndRewrite(linalg::Mmt4DOp mmt4DOp, PatternRewriter &rewriter) const override { - auto lhs = mmt4DOp.inputs()[0]; - auto rhs = mmt4DOp.inputs()[1]; - auto dst = mmt4DOp.outputs()[0]; + Value lhs = mmt4DOp.inputs()[0]; + Value rhs = mmt4DOp.inputs()[1]; + Value dst = mmt4DOp.outputs()[0]; - auto lhsType = lhs.getType().dyn_cast(); - auto rhsType = rhs.getType().dyn_cast(); + ShapedType lhsType = lhs.getType().dyn_cast(); + ShapedType rhsType = rhs.getType().dyn_cast(); + ShapedType dstType = dst.getType().dyn_cast(); // This pattern expects tensors of static shapes. // In practice, dynamic shapes are meant to be handled by other passes, @@ -55,16 +73,20 @@ struct VectorizeMMT4DOp : public OpRewritePattern { int N0 = rhsType.getShape()[2]; int K0 = lhsType.getShape()[3]; - auto loc = mmt4DOp.getLoc(); - auto c0 = rewriter.create(loc, 0); + Location loc = mmt4DOp.getLoc(); + Value c0 = rewriter.create(loc, 0); + + Type lhsElementType = lhsType.getElementType(); + Type rhsElementType = rhsType.getElementType(); + Type dstElementType = dstType.getElementType(); - auto lhsVecType = VectorType::get({1, 1, M0, K0}, rewriter.getF32Type()); - auto rhsVecType = VectorType::get({1, 1, N0, K0}, rewriter.getF32Type()); - auto dstVecType = VectorType::get({1, 1, M0, N0}, rewriter.getF32Type()); + auto lhsVecType = VectorType::get({1, 1, M0, K0}, lhsElementType); + auto rhsVecType = VectorType::get({1, 1, N0, K0}, rhsElementType); + auto dstVecType = VectorType::get({1, 1, M0, N0}, dstElementType); - auto lhsVecType2D = VectorType::get({M0, K0}, rewriter.getF32Type()); - auto rhsVecType2D = VectorType::get({N0, K0}, rewriter.getF32Type()); - auto dstVecType2D = VectorType::get({M0, N0}, rewriter.getF32Type()); + auto lhsVecType2D = VectorType::get({M0, K0}, lhsElementType); + auto rhsVecType2D = VectorType::get({N0, K0}, rhsElementType); + auto dstVecType2D = VectorType::get({M0, N0}, dstElementType); auto identityMap = rewriter.getMultiDimIdentityMap(4); @@ -84,6 +106,14 @@ struct VectorizeMMT4DOp : public OpRewritePattern { Value dstVec2D = rewriter.create(loc, dstVecType2D, dstVec); + // Promote, if needed, the element type in the lhs and rhs vectors to + // match the dst vector, so that the vector.contract below will involve + // only one element type. This is in line with planned design, see + // the closing comment on https://reviews.llvm.org/D112508 where the + // alternative of using mixed types was considered. + Value promLhsVec2d = promoteVector(loc, lhsVec2D, dstElementType, rewriter); + Value promRhsVec2d = promoteVector(loc, rhsVec2D, dstElementType, rewriter); + // Generate the vector.contract on 2D vectors replacing the mmt4d op. auto m = rewriter.getAffineDimExpr(0); auto n = rewriter.getAffineDimExpr(1); @@ -96,7 +126,7 @@ struct VectorizeMMT4DOp : public OpRewritePattern { {getParallelIteratorTypeName(), getParallelIteratorTypeName(), getReductionIteratorTypeName()}); Value contractResult = rewriter.create( - loc, lhsVec2D, rhsVec2D, dstVec2D, indexingMaps, iterators); + loc, promLhsVec2d, promRhsVec2d, dstVec2D, indexingMaps, iterators); // Convert the output vector from 2D shape (M0xN0) to 4D shape (1x1xM0xN0) Value contractResult4D = diff --git a/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgMatmulToMmt4D.cpp b/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgMatmulToMmt4D.cpp index 1a103f853901..7e6cecf01c4d 100644 --- a/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgMatmulToMmt4D.cpp +++ b/iree/compiler/Dialect/Flow/Transforms/ConvertLinalgMatmulToMmt4D.cpp @@ -112,11 +112,6 @@ class LinalgMatmulOpToLinalgMmt4DOpPattern return failure(); } - // This is for float only matmul for now. Integer data type might require - // r.h.s layout change. - if (!lhsType.getElementType().isF32() || !rhsType.getElementType().isF32()) - return failure(); - int m = lhsType.getShape()[0]; int k = rhsType.getShape()[0]; int n = rhsType.getShape()[1]; diff --git a/iree/test/e2e/regression/BUILD b/iree/test/e2e/regression/BUILD index 77e48c3a3cda..71f6df50d603 100644 --- a/iree/test/e2e/regression/BUILD +++ b/iree/test/e2e/regression/BUILD @@ -131,7 +131,6 @@ iree_check_single_backend_test_suite( ], opt_flags = [ "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=%d N0=8" % (4 if lhs_rhs_type == "i8" else 1), - "--iree-codegen-vectorize-linalg-mmt4d", ], target_backends_and_drivers = [ ("dylib-llvm-aot", "dylib"), @@ -152,7 +151,6 @@ iree_check_single_backend_test_suite( ], opt_flags = [ "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=%d N0=8" % (4 if lhs_rhs_type == "i8" else 1), - "--iree-codegen-vectorize-linalg-mmt4d", ], target_backends_and_drivers = [ ("dylib-llvm-aot", "dylib"), diff --git a/iree/test/e2e/regression/CMakeLists.txt b/iree/test/e2e/regression/CMakeLists.txt index 795a8747bb66..309f907987df 100644 --- a/iree/test/e2e/regression/CMakeLists.txt +++ b/iree/test/e2e/regression/CMakeLists.txt @@ -176,7 +176,6 @@ iree_generated_trace_runner_test( "vmvx" OPT_FLAGS "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=4 N0=8" - "--iree-codegen-vectorize-linalg-mmt4d" ) iree_generated_trace_runner_test( @@ -197,7 +196,6 @@ iree_generated_trace_runner_test( "vmvx" OPT_FLAGS "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=1 N0=8" - "--iree-codegen-vectorize-linalg-mmt4d" ) iree_generated_trace_runner_test( @@ -216,7 +214,6 @@ iree_generated_trace_runner_test( "dylib" OPT_FLAGS "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=4 N0=8" - "--iree-codegen-vectorize-linalg-mmt4d" ) iree_generated_trace_runner_test( @@ -235,7 +232,6 @@ iree_generated_trace_runner_test( "dylib" OPT_FLAGS "--iree-flow-convert-linalg-matmul-to-mmt4d=M0=8 K0=1 N0=8" - "--iree-codegen-vectorize-linalg-mmt4d" ) ### BAZEL_TO_CMAKE_PRESERVES_ALL_CONTENT_BELOW_THIS_LINE ### From fe74bd486b587ce1df63e4264443010d317acfbc Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Wed, 27 Oct 2021 22:17:13 -0700 Subject: [PATCH 19/22] Refactor CMake rules for connecting to TF binaries (#7479) This makes the imported TF binaries more uniform with our other binaries in terms of their naming and the convenience aliases. It also factors some common logic into a function. This is only one option for changing the way the binaries are referenced. We could also use unqualified name (since we set it up so our binaries are globally unique and available by their basename). I don't think we ever came up with a consistent rule for that, other than that we shouldn't use the underscore-mangled name that only exists because CMake has weird restrictions on some target names (but not aliases). --- CMakeLists.txt | 15 +++--- integrations/tensorflow/CMakeLists.txt | 49 +------------------ .../python/iree/tools/tf/CMakeLists.txt | 8 +-- .../python/iree/tools/tflite/CMakeLists.txt | 8 +-- .../python/iree/tools/xla/CMakeLists.txt | 8 +-- .../iree_tf_compiler/CMakeLists.txt | 40 +++++++++++++++ 6 files changed, 59 insertions(+), 69 deletions(-) create mode 100644 integrations/tensorflow/iree_tf_compiler/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d937d199ba7..2dd1695ba311 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,7 @@ endif() option(IREE_ENABLE_RUNTIME_TRACING "Enables instrumented runtime tracing." OFF) option(IREE_ENABLE_COMPILER_TRACING "Enables instrumented compiler tracing." OFF) +option(IREE_ENABLE_THREADING "Builds IREE in with thread library support." ON) option(IREE_BUILD_COMPILER "Builds the IREE compiler." ON) option(IREE_BUILD_TESTS "Builds IREE unit tests." ON) @@ -40,10 +41,9 @@ option(IREE_BUILD_SAMPLES "Builds IREE sample projects." ON) option(IREE_BUILD_TRACY "Builds tracy server tools." OFF) option(IREE_BUILD_TENSORFLOW_ALL "Builds all TensorFlow compiler frontends." OFF) -option(IREE_BUILD_TENSORFLOW_COMPILER "Builds TensorFlow compiler frontend." OFF) -option(IREE_BUILD_TFLITE_COMPILER "Builds the TFLite compiler frontend." OFF) -option(IREE_BUILD_XLA_COMPILER "Builds TensorFlow XLA compiler frontend." OFF) -option(IREE_ENABLE_THREADING "Builds IREE in with thread library support." ON) +option(IREE_BUILD_TENSORFLOW_COMPILER "Builds TensorFlow compiler frontend." "${IREE_BUILD_TENSORFLOW_ALL}") +option(IREE_BUILD_TFLITE_COMPILER "Builds the TFLite compiler frontend." "${IREE_BUILD_TENSORFLOW_ALL}") +option(IREE_BUILD_XLA_COMPILER "Builds TensorFlow XLA compiler frontend." "${IREE_BUILD_TENSORFLOW_ALL}") set(IREE_HAL_DRIVERS_TO_BUILD "all" CACHE STRING "Semicolon-separated list of HAL drivers to build, or \"all\".") @@ -66,15 +66,12 @@ if(${IREE_BUILD_TENSORFLOW_ALL} OR set(IREE_ENABLE_TENSORFLOW ON) endif() + option(IREE_BUILD_BINDINGS_TFLITE "Builds the IREE TFLite C API compatibility shim" ON) option(IREE_BUILD_BINDINGS_TFLITE_JAVA "Builds the IREE TFLite Java bindings with the C API compatibility shim" ON) # Default python bindings to enabled for some features. -if(${IREE_ENABLE_TENSORFLOW}) - option(IREE_BUILD_PYTHON_BINDINGS "Builds the IREE python bindings" ON) -else() - option(IREE_BUILD_PYTHON_BINDINGS "Builds the IREE python bindings" OFF) -endif() +option(IREE_BUILD_PYTHON_BINDINGS "Builds the IREE python bindings" "${IREE_ENABLE_TENSORFLOW}") #------------------------------------------------------------------------------- # Experimental project flags diff --git a/integrations/tensorflow/CMakeLists.txt b/integrations/tensorflow/CMakeLists.txt index 777bf753b35b..71c188ee396d 100644 --- a/integrations/tensorflow/CMakeLists.txt +++ b/integrations/tensorflow/CMakeLists.txt @@ -8,57 +8,10 @@ # dependent code under this directory tree. The CMake support is limited to # compiler binaries and python bindings. # -# Bazel is a beast that likes to be the center of the universe. There is some -# fragility in delegating to it in this fashion. -# # If this directory is included, then building TensorFlow is assumed (the # config option happens at the higher level). -set(_bazel_targets) -set(_executable_paths) - -set(IREE_TF_TOOLS_ROOT - "${CMAKE_SOURCE_DIR}/integrations/tensorflow/bazel-bin/iree_tf_compiler" - CACHE STRING "Root directory for IREE TensorFlow integration binaries") - - -if(${IREE_BUILD_TENSORFLOW_COMPILER} OR ${IREE_BUILD_TENSORFLOW_ALL}) - add_executable(iree_tf_compiler_iree-import-tf IMPORTED GLOBAL) - set_property(TARGET iree_tf_compiler_iree-import-tf - PROPERTY IMPORTED_LOCATION - "${IREE_TF_TOOLS_ROOT}/iree-import-tf" - ) -endif() - -if(${IREE_BUILD_TFLITE_COMPILER} OR ${IREE_BUILD_TENSORFLOW_ALL}) - add_executable(iree_tf_compiler_iree-import-tflite IMPORTED GLOBAL) - set_property(TARGET iree_tf_compiler_iree-import-tflite - PROPERTY IMPORTED_LOCATION - "${IREE_TF_TOOLS_ROOT}/iree-import-tflite" - ) -endif() - -if(${IREE_BUILD_XLA_COMPILER} OR ${IREE_BUILD_TENSORFLOW_ALL}) - add_executable(iree_tf_compiler_iree-import-xla IMPORTED GLOBAL) - set_property(TARGET iree_tf_compiler_iree-import-xla - PROPERTY IMPORTED_LOCATION - "${IREE_TF_TOOLS_ROOT}/iree-import-xla" - ) -endif() - -if(${IREE_BUILD_TESTS}) - add_executable(iree_tf_compiler_iree-tf-opt IMPORTED GLOBAL) - set_property(TARGET iree_tf_compiler_iree-tf-opt - PROPERTY IMPORTED_LOCATION - "${IREE_TF_TOOLS_ROOT}/iree-tf-opt" - ) - - add_executable(iree_tf_compiler_iree-opt-tflite IMPORTED GLOBAL) - set_property(TARGET iree_tf_compiler_iree-opt-tflite - PROPERTY IMPORTED_LOCATION - "${IREE_TF_TOOLS_ROOT}/iree-opt-tflite" - ) -endif() +add_subdirectory(iree_tf_compiler) if(${IREE_BUILD_PYTHON_BINDINGS}) add_subdirectory(bindings/python) diff --git a/integrations/tensorflow/bindings/python/iree/tools/tf/CMakeLists.txt b/integrations/tensorflow/bindings/python/iree/tools/tf/CMakeLists.txt index 0cd23102a47d..77fd6c117012 100644 --- a/integrations/tensorflow/bindings/python/iree/tools/tf/CMakeLists.txt +++ b/integrations/tensorflow/bindings/python/iree/tools/tf/CMakeLists.txt @@ -14,12 +14,12 @@ iree_py_library( tf SRCS ${_srcs} DEPS - iree_tf_compiler_iree-import-tf + integrations::tensorflow::iree_tf_compiler::iree-import-tf ) iree_symlink_tool( TARGET tf - FROM_TOOL_TARGET iree_tf_compiler_iree-import-tf + FROM_TOOL_TARGET integrations::tensorflow::iree_tf_compiler::iree-import-tf TO_EXE_NAME iree-import-tf ) @@ -29,13 +29,13 @@ iree_py_install_package( MODULE_PATH iree/tools/tf FILES_MATCHING ${_srcs} DEPS - iree_tf_compiler_iree-import-tf + integrations::tensorflow::iree_tf_compiler::iree-import-tf ) # Since imported, need to resolve the TARGET_FILE ourselves instead of # install TARGETS form. install( - PROGRAMS "$" + PROGRAMS "$" DESTINATION "${PY_INSTALL_MODULE_DIR}" COMPONENT "${PY_INSTALL_COMPONENT}" ) diff --git a/integrations/tensorflow/bindings/python/iree/tools/tflite/CMakeLists.txt b/integrations/tensorflow/bindings/python/iree/tools/tflite/CMakeLists.txt index 69d301f7ab27..d5cbb650ea71 100644 --- a/integrations/tensorflow/bindings/python/iree/tools/tflite/CMakeLists.txt +++ b/integrations/tensorflow/bindings/python/iree/tools/tflite/CMakeLists.txt @@ -14,12 +14,12 @@ iree_py_library( tflite SRCS ${_srcs} DEPS - iree_tf_compiler_iree-import-tflite + integrations::tensorflow::iree_tf_compiler::iree-import-tflite ) iree_symlink_tool( TARGET tflite - FROM_TOOL_TARGET iree_tf_compiler_iree-import-tflite + FROM_TOOL_TARGET integrations::tensorflow::iree_tf_compiler::iree-import-tflite TO_EXE_NAME iree-import-tflite ) @@ -29,13 +29,13 @@ iree_py_install_package( MODULE_PATH iree/tools/tflite FILES_MATCHING ${_srcs} DEPS - iree_tf_compiler_iree-import-tflite + integrations::tensorflow::iree_tf_compiler::iree-import-tflite ) # Since imported, need to resolve the TARGET_FILE ourselves instead of # install TARGETS form. install( - PROGRAMS "$" + PROGRAMS "$" DESTINATION "${PY_INSTALL_MODULE_DIR}" COMPONENT "${PY_INSTALL_COMPONENT}" ) diff --git a/integrations/tensorflow/bindings/python/iree/tools/xla/CMakeLists.txt b/integrations/tensorflow/bindings/python/iree/tools/xla/CMakeLists.txt index 8e7133d600b3..637bb8179abb 100644 --- a/integrations/tensorflow/bindings/python/iree/tools/xla/CMakeLists.txt +++ b/integrations/tensorflow/bindings/python/iree/tools/xla/CMakeLists.txt @@ -14,12 +14,12 @@ iree_py_library( xla SRCS ${_srcs} DEPS - iree_tf_compiler_iree-import-xla + integrations::tensorflow::iree_tf_compiler::iree-import-xla ) iree_symlink_tool( TARGET xla - FROM_TOOL_TARGET iree_tf_compiler_iree-import-xla + FROM_TOOL_TARGET integrations::tensorflow::iree_tf_compiler::iree-import-xla TO_EXE_NAME iree-import-xla ) @@ -29,13 +29,13 @@ iree_py_install_package( MODULE_PATH iree/tools/xla FILES_MATCHING ${_srcs} DEPS - iree_tf_compiler_iree-import-xla + integrations::tensorflow::iree_tf_compiler::iree-import-xla ) # Since imported, need to resolve the TARGET_FILE ourselves instead of # install TARGETS form. install( - PROGRAMS "$" + PROGRAMS "$" DESTINATION "${PY_INSTALL_MODULE_DIR}" COMPONENT "${PY_INSTALL_COMPONENT}" ) diff --git a/integrations/tensorflow/iree_tf_compiler/CMakeLists.txt b/integrations/tensorflow/iree_tf_compiler/CMakeLists.txt new file mode 100644 index 000000000000..0e3e42b4c018 --- /dev/null +++ b/integrations/tensorflow/iree_tf_compiler/CMakeLists.txt @@ -0,0 +1,40 @@ +# Copyright 2020 The IREE Authors +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +set(IREE_TF_TOOLS_ROOT + "${CMAKE_SOURCE_DIR}/integrations/tensorflow/bazel-bin/iree_tf_compiler" + CACHE STRING "Root directory for IREE TensorFlow integration binaries") + +iree_package_name(_PACKAGE_NAME) +iree_package_ns(_PACKAGE_NS) + +function(configure_tf_binary BINARY_NAME) + set(_NAME "${_PACKAGE_NAME}_${BINARY_NAME}") + add_executable("${_NAME}" IMPORTED GLOBAL) + set_property(TARGET ${_NAME} + PROPERTY IMPORTED_LOCATION + "${IREE_TF_TOOLS_ROOT}/${BINARY_NAME}" + ) + add_executable(${_PACKAGE_NS}::${BINARY_NAME} ALIAS ${_NAME}) + add_executable(${BINARY_NAME} ALIAS ${_NAME}) +endfunction() + +if(${IREE_BUILD_TENSORFLOW_COMPILER}) + configure_tf_binary("iree-import-tf") +endif() + +if(${IREE_BUILD_TFLITE_COMPILER}) + configure_tf_binary("iree-import-tflite") +endif() + +if(${IREE_BUILD_XLA_COMPILER}) + configure_tf_binary("iree-import-xla") +endif() + +if(${IREE_BUILD_TESTS}) + configure_tf_binary("iree-tf-opt") + configure_tf_binary("iree-opt-tflite") +endif() From 18a59f7b30bb47d6d9befca3a3222ecbb464dd54 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Thu, 28 Oct 2021 09:09:51 -0700 Subject: [PATCH 20/22] Adding iree_make_status_with_location for cuda/vulkan statuses. (#7469) This lets us see the location the status is raised from and not the status_util file formatting the error text. --- iree/base/status.h | 13 ++++ iree/hal/cuda/status_util.c | 6 +- iree/hal/vulkan/status_util.c | 122 ++++++++++++++++++++-------------- 3 files changed, 87 insertions(+), 54 deletions(-) diff --git a/iree/base/status.h b/iree/base/status.h index 790c29797e9c..81baad3a8004 100644 --- a/iree/base/status.h +++ b/iree/base/status.h @@ -230,6 +230,8 @@ typedef struct iree_status_handle_t* iree_status_t; #if IREE_STATUS_FEATURES == 0 #define IREE_STATUS_IMPL_MAKE_(code, ...) \ (iree_status_t)(uintptr_t)((code)&IREE_STATUS_CODE_MASK) +#define IREE_STATUS_IMPL_MAKE_LOC_(file, line, code, ...) \ + IREE_STATUS_IMPL_MAKE_(code) #undef IREE_STATUS_IMPL_RETURN_IF_API_ERROR_ #define IREE_STATUS_IMPL_RETURN_IF_API_ERROR_(var, ...) \ iree_status_t var = (IREE_STATUS_IMPL_IDENTITY_( \ @@ -254,6 +256,8 @@ typedef struct iree_status_handle_t* iree_status_t; #else #define IREE_STATUS_IMPL_MAKE_(...) \ IREE_STATUS_IMPL_MAKE_SWITCH_(__FILE__, __LINE__, __VA_ARGS__) +#define IREE_STATUS_IMPL_MAKE_LOC_(file, line, ...) \ + IREE_STATUS_IMPL_MAKE_SWITCH_(file, line, __VA_ARGS__) #endif // !IREE_STATUS_FEATURES // Returns an IREE_STATUS_OK. @@ -271,6 +275,15 @@ typedef struct iree_status_handle_t* iree_status_t; // return iree_make_status(IREE_STATUS_CANCELLED, "because %d > %d", a, b); #define iree_make_status IREE_STATUS_IMPL_MAKE_ +// Makes an iree_status_t with the given iree_status_code_t code using the given +// source location. Besides taking the file and line of the source location this +// is the same as iree_make_status. +// +// Examples: +// return iree_make_status_with_location( +// "file.c", 40, IREE_STATUS_CANCELLED, "because %d > %d", a, b); +#define iree_make_status_with_location IREE_STATUS_IMPL_MAKE_LOC_ + // Propagates the error returned by (expr) by returning from the current // function on non-OK status. Optionally annotates the status with additional // information (see iree_status_annotate for more information). diff --git a/iree/hal/cuda/status_util.c b/iree/hal/cuda/status_util.c index b6a1b9480cd3..7532ecd22c71 100644 --- a/iree/hal/cuda/status_util.c +++ b/iree/hal/cuda/status_util.c @@ -26,7 +26,7 @@ iree_status_t iree_hal_cuda_result_to_status( if (syms->cuGetErrorString(result, &error_string) != CUDA_SUCCESS) { error_string = "Unknown error."; } - return iree_make_status(IREE_STATUS_INTERNAL, - "CUDA driver error '%s' (%d): %s", error_name, result, - error_string); + return iree_make_status_with_location(file, line, IREE_STATUS_INTERNAL, + "CUDA driver error '%s' (%d): %s", + error_name, result, error_string); } diff --git a/iree/hal/vulkan/status_util.c b/iree/hal/vulkan/status_util.c index 705f299ec213..e61008c44900 100644 --- a/iree/hal/vulkan/status_util.c +++ b/iree/hal/vulkan/status_util.c @@ -37,17 +37,19 @@ iree_status_t iree_hal_vulkan_result_to_status(VkResult result, // Error codes. case VK_ERROR_OUT_OF_HOST_MEMORY: // A host memory allocation has failed. - return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, - "VK_ERROR_OUT_OF_HOST_MEMORY"); + return iree_make_status_with_location(file, line, + IREE_STATUS_RESOURCE_EXHAUSTED, + "VK_ERROR_OUT_OF_HOST_MEMORY"); case VK_ERROR_OUT_OF_DEVICE_MEMORY: // A device memory allocation has failed. - return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, - "VK_ERROR_OUT_OF_DEVICE_MEMORY"); + return iree_make_status_with_location(file, line, + IREE_STATUS_RESOURCE_EXHAUSTED, + "VK_ERROR_OUT_OF_DEVICE_MEMORY"); case VK_ERROR_INITIALIZATION_FAILED: // Initialization of an object could not be completed for // implementation-specific reasons. - return iree_make_status(IREE_STATUS_UNAVAILABLE, - "VK_ERROR_INITIALIZATION_FAILED"); + return iree_make_status_with_location(file, line, IREE_STATUS_UNAVAILABLE, + "VK_ERROR_INITIALIZATION_FAILED"); case VK_ERROR_DEVICE_LOST: // The logical or physical device has been lost. // @@ -116,87 +118,101 @@ iree_status_t iree_hal_vulkan_result_to_status(VkResult result, // command buffer is in the pending state, or whether resources are // considered in-use by the device, a return value of // VK_ERROR_DEVICE_LOST is equivalent to VK_SUCCESS. - return iree_make_status(IREE_STATUS_INTERNAL, "VK_ERROR_DEVICE_LOST"); + return iree_make_status_with_location(file, line, IREE_STATUS_INTERNAL, + "VK_ERROR_DEVICE_LOST"); case VK_ERROR_MEMORY_MAP_FAILED: // Mapping of a memory object has failed. - return iree_make_status(IREE_STATUS_INTERNAL, - "VK_ERROR_MEMORY_MAP_FAILED"); + return iree_make_status_with_location(file, line, IREE_STATUS_INTERNAL, + "VK_ERROR_MEMORY_MAP_FAILED"); case VK_ERROR_LAYER_NOT_PRESENT: // A requested layer is not present or could not be loaded. - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "VK_ERROR_LAYER_NOT_PRESENT"); + return iree_make_status_with_location( + file, line, IREE_STATUS_UNIMPLEMENTED, "VK_ERROR_LAYER_NOT_PRESENT"); case VK_ERROR_EXTENSION_NOT_PRESENT: // A requested extension is not supported. - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "VK_ERROR_EXTENSION_NOT_PRESENT"); + return iree_make_status_with_location(file, line, + IREE_STATUS_UNIMPLEMENTED, + "VK_ERROR_EXTENSION_NOT_PRESENT"); case VK_ERROR_FEATURE_NOT_PRESENT: // A requested feature is not supported. - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "VK_ERROR_FEATURE_NOT_PRESENT"); + return iree_make_status_with_location(file, line, + IREE_STATUS_UNIMPLEMENTED, + "VK_ERROR_FEATURE_NOT_PRESENT"); case VK_ERROR_INCOMPATIBLE_DRIVER: // The requested version of Vulkan is not supported by the driver or is // otherwise incompatible for implementation-specific reasons. - return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, - "VK_ERROR_INCOMPATIBLE_DRIVER"); + return iree_make_status_with_location(file, line, + IREE_STATUS_FAILED_PRECONDITION, + "VK_ERROR_INCOMPATIBLE_DRIVER"); case VK_ERROR_TOO_MANY_OBJECTS: // Too many objects of the type have already been created. - return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, - "VK_ERROR_TOO_MANY_OBJECTS"); + return iree_make_status_with_location(file, line, + IREE_STATUS_RESOURCE_EXHAUSTED, + "VK_ERROR_TOO_MANY_OBJECTS"); case VK_ERROR_FORMAT_NOT_SUPPORTED: // A requested format is not supported on this device. - return iree_make_status(IREE_STATUS_UNIMPLEMENTED, - "VK_ERROR_FORMAT_NOT_SUPPORTED"); + return iree_make_status_with_location(file, line, + IREE_STATUS_UNIMPLEMENTED, + "VK_ERROR_FORMAT_NOT_SUPPORTED"); case VK_ERROR_FRAGMENTED_POOL: // A pool allocation has failed due to fragmentation of the pool’s // memory. This must only be returned if no attempt to allocate host // or device memory was made to accommodate the new allocation. - return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, - "VK_ERROR_FRAGMENTED_POOL"); + return iree_make_status_with_location(file, line, + IREE_STATUS_RESOURCE_EXHAUSTED, + "VK_ERROR_FRAGMENTED_POOL"); case VK_ERROR_OUT_OF_POOL_MEMORY: // A pool memory allocation has failed. This must only be returned if no // attempt to allocate host or device memory was made to accommodate the // new allocation. If the failure was definitely due to fragmentation of // the pool, VK_ERROR_FRAGMENTED_POOL should be returned instead. - return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, - "VK_ERROR_OUT_OF_POOL_MEMORY"); + return iree_make_status_with_location(file, line, + IREE_STATUS_RESOURCE_EXHAUSTED, + "VK_ERROR_OUT_OF_POOL_MEMORY"); case VK_ERROR_INVALID_EXTERNAL_HANDLE: // An external handle is not a valid handle of the specified type. - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "VK_ERROR_INVALID_EXTERNAL_HANDLE"); + return iree_make_status_with_location(file, line, + IREE_STATUS_INVALID_ARGUMENT, + "VK_ERROR_INVALID_EXTERNAL_HANDLE"); case VK_ERROR_SURFACE_LOST_KHR: // A surface is no longer available. - return iree_make_status(IREE_STATUS_UNAVAILABLE, - "VK_ERROR_SURFACE_LOST_KHR"); + return iree_make_status_with_location(file, line, IREE_STATUS_UNAVAILABLE, + "VK_ERROR_SURFACE_LOST_KHR"); case VK_ERROR_NATIVE_WINDOW_IN_USE_KHR: // The requested window is already in use by Vulkan or another API in a // manner which prevents it from being used again. - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR"); + return iree_make_status_with_location( + file, line, IREE_STATUS_INVALID_ARGUMENT, + "VK_ERROR_NATIVE_WINDOW_IN_USE_KHR"); case VK_ERROR_OUT_OF_DATE_KHR: // A surface has changed in such a way that it is no longer compatible // with the swapchain, and further presentation requests using the // swapchain will fail. Applications must query the new surface properties // and recreate their swapchain if they wish to continue presenting to the // surface. - return iree_make_status(IREE_STATUS_FAILED_PRECONDITION, - "VK_ERROR_OUT_OF_DATE_KHR"); + return iree_make_status_with_location(file, line, + IREE_STATUS_FAILED_PRECONDITION, + "VK_ERROR_OUT_OF_DATE_KHR"); case VK_ERROR_INCOMPATIBLE_DISPLAY_KHR: // The display used by a swapchain does not use the same presentable image // layout, or is incompatible in a way that prevents sharing an image. - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR"); + return iree_make_status_with_location( + file, line, IREE_STATUS_INVALID_ARGUMENT, + "VK_ERROR_INCOMPATIBLE_DISPLAY_KHR"); case VK_ERROR_VALIDATION_FAILED_EXT: // Validation layer testing failed. It is not expected that an // application would see this this error code during normal use of the // validation layers. - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "VK_ERROR_VALIDATION_FAILED_EXT"); + return iree_make_status_with_location(file, line, + IREE_STATUS_INVALID_ARGUMENT, + "VK_ERROR_VALIDATION_FAILED_EXT"); case VK_ERROR_INVALID_SHADER_NV: // One or more shaders failed to compile or link. More details are // reported back to the application when the validation layer is enabled // using the extension VK_EXT_debug_report. - return iree_make_status(IREE_STATUS_INVALID_ARGUMENT, - "VK_ERROR_INVALID_SHADER_NV"); + return iree_make_status_with_location(file, line, + IREE_STATUS_INVALID_ARGUMENT, + "VK_ERROR_INVALID_SHADER_NV"); case VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT: // When creating an image with // VkImageDrmFormatModifierExplicitCreateInfoEXT, it is the application’s @@ -208,33 +224,37 @@ iree_status_t iree_hal_vulkan_result_to_status(VkResult result, // outside the scope of Vulkan, and therefore not described by Valid Usage // requirements). If this validation fails, then vkCreateImage returns // VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT. - return iree_make_status( - IREE_STATUS_INVALID_ARGUMENT, + return iree_make_status_with_location( + file, line, IREE_STATUS_INVALID_ARGUMENT, "VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT"); case VK_ERROR_FRAGMENTATION_EXT: // A descriptor pool creation has failed due to fragmentation. - return iree_make_status(IREE_STATUS_RESOURCE_EXHAUSTED, - "VK_ERROR_FRAGMENTATION_EXT"); + return iree_make_status_with_location(file, line, + IREE_STATUS_RESOURCE_EXHAUSTED, + "VK_ERROR_FRAGMENTATION_EXT"); case VK_ERROR_NOT_PERMITTED_EXT: // When creating a queue, the caller does not have sufficient privileges // to request to acquire a priority above the default priority // (VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT). - return iree_make_status(IREE_STATUS_PERMISSION_DENIED, - "VK_ERROR_NOT_PERMITTED_EXT"); + return iree_make_status_with_location(file, line, + IREE_STATUS_PERMISSION_DENIED, + "VK_ERROR_NOT_PERMITTED_EXT"); case VK_ERROR_INVALID_DEVICE_ADDRESS_EXT: // A buffer creation failed because the requested address is not // available. - return iree_make_status(IREE_STATUS_OUT_OF_RANGE, - "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT"); + return iree_make_status_with_location( + file, line, IREE_STATUS_OUT_OF_RANGE, + "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT"); case VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT: // An operation on a swapchain created with // VK_FULL_SCREEN_EXCLUSIVE_APPLICATION_CONTROLLED_EXT failed as it did // not have exlusive full-screen access. This may occur due to // implementation-dependent reasons, outside of the application’s control. - return iree_make_status(IREE_STATUS_UNAVAILABLE, - "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"); + return iree_make_status_with_location( + file, line, IREE_STATUS_UNAVAILABLE, + "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"); default: - return iree_make_status(IREE_STATUS_UNKNOWN, "VkResult=%u", - (uint32_t)result); + return iree_make_status_with_location(file, line, IREE_STATUS_UNKNOWN, + "VkResult=%u", (uint32_t)result); } } From 7e3a374606123bc71aa46e8a5af11d55f02b93b9 Mon Sep 17 00:00:00 2001 From: Geoffrey Martin-Noble Date: Thu, 28 Oct 2021 11:55:14 -0700 Subject: [PATCH 21/22] Actually run asan on the asan build (#7483) :facepalm: Fixes https://github.com/google/iree/issues/6849 --- .../gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh | 6 ++++-- .../cmake/linux/x86-swiftshader-asan/build_kokoro.sh | 2 +- .../gcp_ubuntu/cmake/linux/x86-swiftshader-asan/common.cfg | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh index 3c6a6158b02d..1d9512db0484 100755 --- a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh +++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh @@ -100,6 +100,9 @@ label_exclude_regex="($(IFS="|" ; echo "${label_exclude_args[*]?}"))" # These tests currently have asan failures # TODO(#5715): Fix these declare -a excluded_tests=( + "iree/base/internal/file_io_test" + "iree/samples/static_library/static_library_demo_test" + "bindings/tflite/smoke_test" "iree/hal/cts/allocator_test" "iree/hal/cts/buffer_mapping_test" "iree/hal/cts/command_buffer_test" @@ -107,10 +110,9 @@ declare -a excluded_tests=( "iree/hal/cts/driver_test" "iree/hal/cts/event_test" "iree/hal/cts/executable_layout_test" - "iree/hal/cts/semaphore_test" "iree/hal/cts/semaphore_submission_test" + "iree/hal/cts/semaphore_test" "iree/modules/check/check_test" - "bindings/tflite/smoke_test" "iree/samples/simple_embedding/simple_embedding_vulkan_test" ) diff --git a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh index c4e0084c4603..4af8334c0970 100755 --- a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh +++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh @@ -25,7 +25,7 @@ docker_setup docker run "${DOCKER_RUN_ARGS[@]?}" \ gcr.io/iree-oss/cmake-swiftshader@sha256:031aded9cd66d30fcfa4dabea05a69721f33239516bc2e10ca216afd9ae4c012 \ - build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build.sh + build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build.sh # Kokoro will rsync this entire directory back to the executor orchestrating the # build which takes forever and is totally useless. diff --git a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/common.cfg b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/common.cfg index de6fb31e1e2e..73224e5e83f2 100644 --- a/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/common.cfg +++ b/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/common.cfg @@ -6,4 +6,4 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -build_file: "iree/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader/build_kokoro.sh" +build_file: "iree/build_tools/kokoro/gcp_ubuntu/cmake/linux/x86-swiftshader-asan/build_kokoro.sh" From dbdfd96dcb48af0718e30fd3070f491d13474760 Mon Sep 17 00:00:00 2001 From: bjacob Date: Thu, 28 Oct 2021 15:41:27 -0400 Subject: [PATCH 22/22] enable asserts in a majority of CMake CI builds (#7482) Bazel builds already had asserts enabled, so this is only about CMake builds. It's generally useful to build CI with asserts, but we wouldn't want to build only asserts-builds at all: after all, the notion that asserts are only actually assertions and don't play a part in the program's actual function, is left entirely up to the programmer, and we want to catch mistakes at that level e.g. instances of assert(is_status_ok(important_file_io()) expanding to nothing in non-asserts builds. So this PR adds IREE_ENABLE_ASSERTIONS=ON to some common build.sh scripts used for CI, but intentionally not all of them. For example, the -asan CI uses its own build.sh file and we are intentionally not adding asserts there just so we keep some non-asserts coverage (this isn't embodying an opinion about whether to conflate asserts and asan in the same builds or to keep them separate! it's just what was quick to do here). Fixes Issue #7320 . --- build_tools/cmake/build_android.sh | 2 ++ build_tools/cmake/build_riscv.sh | 2 ++ build_tools/cmake/rebuild.sh | 7 +++++++ 3 files changed, 11 insertions(+) diff --git a/build_tools/cmake/build_android.sh b/build_tools/cmake/build_android.sh index 12d1bd1175ae..dcf07f48862a 100755 --- a/build_tools/cmake/build_android.sh +++ b/build_tools/cmake/build_android.sh @@ -45,6 +45,7 @@ cd build-host # Configure, build, install. "${CMAKE_BIN?}" -G Ninja .. \ -DCMAKE_INSTALL_PREFIX=./install \ + -DIREE_ENABLE_ASSERTIONS=ON \ -DIREE_BUILD_COMPILER=ON \ -DIREE_BUILD_TESTS=OFF \ -DIREE_BUILD_BENCHMARKS=ON \ @@ -74,6 +75,7 @@ cd build-android -DANDROID_ABI="${ANDROID_ABI?}" \ -DANDROID_PLATFORM=android-29 \ -DIREE_HOST_BINARY_ROOT=$PWD/../build-host/install \ + -DIREE_ENABLE_ASSERTIONS=ON \ -DIREE_BUILD_COMPILER=OFF \ -DIREE_BUILD_TESTS=ON \ -DIREE_BUILD_SAMPLES=OFF diff --git a/build_tools/cmake/build_riscv.sh b/build_tools/cmake/build_riscv.sh index bf085acec339..4bc9ae4d5bc0 100755 --- a/build_tools/cmake/build_riscv.sh +++ b/build_tools/cmake/build_riscv.sh @@ -40,6 +40,7 @@ fi -DCMAKE_INSTALL_PREFIX="${BUILD_HOST_DIR?}/install" \ -DCMAKE_C_COMPILER=clang \ -DCMAKE_CXX_COMPILER=clang++ \ + -DIREE_ENABLE_ASSERTIONS=ON \ -DIREE_BUILD_COMPILER=ON \ -DIREE_BUILD_TESTS=OFF \ -DIREE_BUILD_SAMPLES=OFF \ @@ -66,6 +67,7 @@ args=( -DCMAKE_TOOLCHAIN_FILE="$(realpath ${ROOT_DIR?}/build_tools/cmake/riscv.toolchain.cmake)" -DIREE_HOST_BINARY_ROOT="$(realpath ${BUILD_HOST_DIR?}/install)" -DRISCV_CPU="${RISCV_CONFIG?}" + -DIREE_ENABLE_ASSERTIONS=ON -DIREE_BUILD_COMPILER=OFF -DIREE_BUILD_SAMPLES=ON ) diff --git a/build_tools/cmake/rebuild.sh b/build_tools/cmake/rebuild.sh index 2e202457b967..68e94200f089 100755 --- a/build_tools/cmake/rebuild.sh +++ b/build_tools/cmake/rebuild.sh @@ -41,6 +41,13 @@ CMAKE_ARGS=( # Enable building the python bindings on CI. Most heavy targets are gated on # IREE_ENABLE_TENSORFLOW, so what's left here should be fast. "-DIREE_BUILD_PYTHON_BINDINGS=ON" + + # Enable assertions. We wouldn't want to be testing *only* with assertions + # enabled, but at the moment only certain CI builds are using this script, + # e.g. ASan builds are not using this, so by enabling assertions here, we + # get a reasonable mix of {builds with asserts, builds with other features + # such as ASan but without asserts}. + "-DIREE_ENABLE_ASSERTIONS=ON" ) "$CMAKE_BIN" "${CMAKE_ARGS[@]?}" ..