diff --git a/docs/ptoas-tile-fusion-design.md b/docs/ptoas-tile-fusion-design.md index 88da712ca..2eefbc3ce 100644 --- a/docs/ptoas-tile-fusion-design.md +++ b/docs/ptoas-tile-fusion-design.md @@ -203,7 +203,7 @@ PTO 指令在 Davinci 架构上以 Unified Buffer(UB)中驻留的数据块 - `--pto-backend=vpto` - `--pto-arch=a5` -- 显式传入 `--enable-op-fusion` +- `--enable-op-fusion` 未显式设置时默认开启;可通过 `--enable-op-fusion=false` 关闭 #### 4.2.2 输入层级支持 @@ -255,7 +255,7 @@ PTO 指令在 Davinci 架构上以 Unified Buffer(UB)中驻留的数据块 #### 4.2.4 非目标路径 - EmitC 后端会忽略 `--enable-op-fusion`。 -- 未开启 `--enable-op-fusion` 时,普通 VPTO 路径不会形成 `pto.fusion_region`,也不会进入 post-lowering 融合生命周期。 +- 显式传入 `--enable-op-fusion=false` 时,普通 VPTO 路径不会形成 `pto.fusion_region`,也不会进入 post-lowering 融合生命周期。 - 后端分界线已固定为 `ExpandTileOp`;原有的 `View2Memref` / `PTOToA5VM` 主线已移除。 --- diff --git a/test/lit/tile_fusion/op_fusion_cli_flags.pto b/test/lit/tile_fusion/op_fusion_cli_flags.pto index 0c9ee9960..fabc5e443 100644 --- a/test/lit/tile_fusion/op_fusion_cli_flags.pto +++ b/test/lit/tile_fusion/op_fusion_cli_flags.pto @@ -6,13 +6,14 @@ // INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. // See LICENSE in the root of the software repository for the full text of the License. -// Guards --enable-op-fusion CLI visibility and warning behavior on the +// Guards --enable-op-fusion CLI visibility and tri-state behavior on the // EmitC mainline frontend-fusion gate. // // RUN: ptoas --help-hidden 2>&1 | FileCheck %s --check-prefix=HELP -// RUN: ptoas --pto-arch=a5 --pto-level=level2 --enable-op-fusion %s -o /dev/null 2>&1 | FileCheck %s --allow-empty --check-prefix=LEVEL2 -// RUN: ptoas --pto-arch=a5 --pto-level=level1 --enable-op-fusion %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LEVEL1 -// RUN: ptoas --pto-arch=a3 --enable-op-fusion %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=A3 +// RUN: ptoas --pto-arch=a5 --pto-level=level2 %s -o /dev/null 2>&1 | FileCheck %s --allow-empty --check-prefix=DEFAULT-A5 +// RUN: ptoas --pto-arch=a5 --pto-level=level2 --enable-op-fusion=false %s -o /dev/null 2>&1 | FileCheck %s --allow-empty --check-prefix=DISABLE-A5 +// RUN: ptoas --pto-arch=a5 --pto-level=level1 --enable-op-fusion=true %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LEVEL1 +// RUN: not ptoas --pto-arch=a3 --enable-op-fusion=true %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=A3 module { func.func @fusion_cli_flags() { @@ -22,8 +23,12 @@ module { // HELP: --enable-op-fusion -// LEVEL2-NOT: Warning: --enable-op-fusion is ignored +// DEFAULT-A5-NOT: Warning: +// DEFAULT-A5-NOT: Error: -// LEVEL1: Warning: --enable-op-fusion is ignored because --pto-level=level2 or level3 is required. +// DISABLE-A5-NOT: Warning: +// DISABLE-A5-NOT: Error: -// A3: Warning: --enable-op-fusion is ignored because --pto-arch=a5 is required. +// LEVEL1: Warning: --enable-op-fusion=true is ignored because --pto-level=level2 or level3 is required. + +// A3: Error: --enable-op-fusion=true requires --pto-arch=a5. diff --git a/test/lit/tile_fusion/op_fusion_nonfused_control.pto b/test/lit/tile_fusion/op_fusion_nonfused_control.pto index dc753b288..7bedae91b 100644 --- a/test/lit/tile_fusion/op_fusion_nonfused_control.pto +++ b/test/lit/tile_fusion/op_fusion_nonfused_control.pto @@ -7,13 +7,15 @@ // See LICENSE in the root of the software repository for the full text of the License. // Guards the non-fused control surface for frontend fusion: -// - no flag keeps the baseline unfused -// - A3 with the flag warns and stays unfused -// - level1 with the flag warns and stays unfused +// - A5 level2 defaults to fused unless explicitly disabled +// - A5 level2 with --enable-op-fusion=false stays unfused +// - A3 with --enable-op-fusion=true is rejected +// - level1 with --enable-op-fusion=true warns and stays unfused // -// RUN: ptoas --pto-arch=a5 --pto-level=level2 --emit-pto-ir %s -o - | FileCheck %s --check-prefix=NOFUSE -// RUN: ptoas --pto-arch=a3 --enable-op-fusion --emit-pto-ir %s -o - 2>&1 | FileCheck %s --check-prefix=A3 -// RUN: ptoas --pto-arch=a5 --pto-level=level1 --enable-op-fusion --emit-pto-ir %s -o - 2>&1 | FileCheck %s --check-prefix=LEVEL1 +// RUN: ptoas --pto-arch=a5 --pto-level=level2 --emit-pto-ir %s -o - | FileCheck %s --check-prefix=DEFAULTA5 +// RUN: ptoas --pto-arch=a5 --pto-level=level2 --enable-op-fusion=false --emit-pto-ir %s -o - | FileCheck %s --check-prefix=NOFUSE +// RUN: not ptoas --pto-arch=a3 --enable-op-fusion=true --emit-pto-ir %s -o - 2>&1 | FileCheck %s --check-prefix=A3 +// RUN: ptoas --pto-arch=a5 --pto-level=level1 --enable-op-fusion=true --emit-pto-ir %s -o - 2>&1 | FileCheck %s --check-prefix=LEVEL1 module { func.func @fusion_nonfused_control( @@ -36,13 +38,11 @@ module { // NOFUSE-NEXT: pto.tadd ins( // NOFUSE: return -// A3: Warning: --enable-op-fusion is ignored because --pto-arch=a5 is required. -// A3-LABEL: func.func @fusion_nonfused_control( -// A3-NOT: pto.fusion_region -// A3: pto.tadd ins( -// A3-NEXT: pto.tadd ins( +// DEFAULTA5-LABEL: func.func @fusion_nonfused_control( +// DEFAULTA5: pto.fusion_region -// LEVEL1: Warning: --enable-op-fusion is ignored because --pto-level=level2 or level3 is required. +// A3: Error: --enable-op-fusion=true requires --pto-arch=a5. +// LEVEL1: Warning: --enable-op-fusion=true is ignored because --pto-level=level2 or level3 is required. // LEVEL1-LABEL: func.func @fusion_nonfused_control( // LEVEL1-NOT: pto.fusion_region // LEVEL1: pto.tadd ins( diff --git a/tools/ptoas/ptoas.cpp b/tools/ptoas/ptoas.cpp index 5bb682167..9f9a89fff 100644 --- a/tools/ptoas/ptoas.cpp +++ b/tools/ptoas/ptoas.cpp @@ -422,11 +422,12 @@ static pto::ExpandTileOpOptions resolveExpandTileOpOptions(int argc, return expandOpts; } -static llvm::cl::opt enableOpFusion( +static llvm::cl::opt enableOpFusion( "enable-op-fusion", - llvm::cl::desc("Enable A5 tile fusion on level2/level3. EmitC uses " - "last-use annotation; VPTO uses fusion-region lifecycle."), - llvm::cl::init(false)); + llvm::cl::desc("Control A5 tile fusion on level2/level3. Defaults to " + "enabled on A5, disabled on A3. EmitC uses last-use " + "annotation; VPTO uses fusion-region lifecycle."), + llvm::cl::init(llvm::cl::BOU_UNSET)); static llvm::cl::opt disableInferLayout( "disable-infer-layout", @@ -1612,18 +1613,25 @@ int mlir::pto::compilePTOASModule( return 1; } - if (enableOpFusion) { - if (arch != "a5") { - llvm::errs() << "Warning: --enable-op-fusion is ignored because " - "--pto-arch=a5 is required.\n"; - } else if (effectiveLevel == PTOBuildLevel::Level1) { - llvm::errs() << "Warning: --enable-op-fusion is ignored because " - "--pto-level=level2 or level3 is required.\n"; - } + const bool requestedEnableOpFusion = enableOpFusion == llvm::cl::BOU_TRUE; + const bool explicitDisableOpFusion = enableOpFusion == llvm::cl::BOU_FALSE; + const bool defaultEnableOpFusion = + enableOpFusion == llvm::cl::BOU_UNSET && arch == "a5"; + const bool opFusionEnabled = + (requestedEnableOpFusion || defaultEnableOpFusion) && + !explicitDisableOpFusion; + + if (requestedEnableOpFusion && arch != "a5") { + llvm::errs() << "Error: --enable-op-fusion=true requires --pto-arch=a5.\n"; + return 1; + } + if (requestedEnableOpFusion && effectiveLevel == PTOBuildLevel::Level1) { + llvm::errs() << "Warning: --enable-op-fusion=true is ignored because " + "--pto-level=level2 or level3 is required.\n"; } const bool enableA5FusionPath = - enableOpFusion && arch == "a5" && + opFusionEnabled && arch == "a5" && effectiveLevel != PTOBuildLevel::Level1; const bool enableA5EmitCFusionPath = enableA5FusionPath && effectiveBackend == PTOBackend::EmitC;