diff --git a/.github/workflows/ci_sim.yml b/.github/workflows/ci_sim.yml index 20862b519e..498a730505 100644 --- a/.github/workflows/ci_sim.yml +++ b/.github/workflows/ci_sim.yml @@ -439,7 +439,6 @@ jobs: # They are split into separate pytest processes to avoid backend global-state conflicts. run_pypto_core_smoke() { local platform="$1" - local dyn_valid_shape="tests/st/runtime/control_flow/test_dyn_orch_shape.py::TestDynOrchShapeOperations::test_dyn_orch_valid_shape_add[shape0-valid_shape0-${platform}]" local cross_core="tests/st/runtime/cross_core/test_cross_core.py::TestCrossCore::test_tpush_tpop_v2c_updown[${platform}]" local pypto_smoke_tests=( tests/st/examples/00_hello_world/test_hello_world.py @@ -450,9 +449,15 @@ jobs: tests/st/runtime/framework_and_models/test_jit.py::TestJITExecution::test_cache_hit_reuses_compiled_program tests/st/runtime/framework_and_models/test_jit.py::TestJITDynamicBatch::test_one_artifact_serves_multiple_batches tests/st/runtime/framework_and_models/test_compiled_program.py::TestManualWorkerExtraction::test_block_dim_override_runs - "${dyn_valid_shape}" "${cross_core}" ) + # The a5sim self-hosted environment does not ship the PyPTO + # tensormap_and_ringbuffer runtime required by this dyn-orch case. + # Keep the coverage on a2a3sim instead of failing unrelated PTOAS PRs. + if [[ "${platform}" != "a5sim" ]]; then + local dyn_valid_shape="tests/st/runtime/control_flow/test_dyn_orch_shape.py::TestDynOrchShapeOperations::test_dyn_orch_valid_shape_add[shape0-valid_shape0-${platform}]" + pypto_smoke_tests+=("${dyn_valid_shape}") + fi run_pypto_pytest "${platform}" core_ptoas "${pypto_smoke_tests[@]}" } diff --git a/docs/designs/ptoas-debug-name-hints-design.md b/docs/designs/ptoas-debug-name-hints-design.md new file mode 100644 index 0000000000..0815e322c1 --- /dev/null +++ b/docs/designs/ptoas-debug-name-hints-design.md @@ -0,0 +1,291 @@ +# PTOAS 变量名保留与调试名提示设计 + +## 1. 文档范围 + +本文定义 issue `#337` 对应的能力: + +- 前端为 PTO IR 附带“原始变量名”提示信息 +- PTOAS 在 `.pto -> .cpp` 编译链路中输出稳定的溯源注释,提升问题定位效率 + +本文只讨论调试可定位性,不改变任何 IR 语义、优化行为或生成代码功能。 + +## 2. 背景问题 + +当前 PTOAS 在 `level3` 编译链路中会经过多轮 rewrite、CSE 和 EmitC 降低,最终 `kernel.cpp` +中的局部变量名通常被重新编号为 `v0`、`v1`、`v2`。 + +这会带来两个直接问题: + +- 输入 `.pto` 与输出 `.cpp` 不容易一一对照 +- 前端 Python 里的业务变量名无法保留下来,定位问题时大量 `vXXX` 可读性很差 + +issue `#337` 希望解决的是“可对照、可定位”,而不是改变编译结果本身。 + +## 3. 设计目标 + +本设计当前阶段的目标如下: + +- 支持前端向 PTO IR 传递可选的变量名提示信息 +- 生成 `kernel.cpp` 时为可恢复的值追加 `// pto: %name` 溯源注释 +- 默认打印 IR 时不明显增加噪声,避免把 IR 变丑 +- 在发生 CSE、控制流合并、lowering 新造临时值时,仍能给出稳定、可定位的来源提示 + +## 4. 非目标 + +本设计明确不保证以下行为: + +- 不保证输入 `.pto` 中的 `%v37` 在输出 `.cpp` 中仍然严格叫 `v37` +- 不保证所有中间优化后仍保留一一对应关系 +- 不为名字建立任何语义约束;名字仅用于调试与阅读 +- 不要求所有前端都必须提供名字提示 + +原因很直接:当前链路存在 CSE、值合并、值拆分、新值物化与 `emitc.variable` 提升,最终 C++ +中的很多值并不存在与输入 SSA 的严格一一映射关系。 + +## 5. 总体方案 + +### 5.1 核心思路 + +PTOAS 将“原始变量名”视为调试元数据,而不是语义属性。 + +当前阶段的具体做法: + +- 前端把名字提示写入 op 的 `Location` 元数据 +- 对直接输入的 textual `.pto`,PTOAS 额外从源码文本里提取 SSA 名、函数参数名和 block argument 名 +- PTOAS 在 rewrite / lowering 时尽量传播该元数据 +- EmitC lowering 后插入 provenance marker,并在最终 C++ 文本中转成 `// pto: %name` 注释 + +这样做的核心收益是: + +- IR 语义不受影响 +- 默认 textual IR 不需要把一堆 `name_hint` 属性直接打印出来 +- 只有在显式看调试信息时,这些名字才会显式出现 +- 不需要在 C++ 文本后处理阶段重建符号表和作用域来做重命名 + +### 5.2 为什么不会让 IR 变丑 + +本设计不建议把名字直接做成普通 op 属性,比如: + +```mlir +%0 = pto.foo ... { pto.result_name_hints = ["query_tile"] } +``` + +这种方案虽然直观,但会让所有 IR 都充满调试字段。 + +本设计改为把名字放进 `Location`: + +- 默认 IR 打印时,location 不会成为主要视觉噪声 +- 只有在显式打开 debug info 打印时,名字才会展示出来 +- 语义属性区保持干净 + +因此,IR 里是“多带了调试备注”,不是“把主体语法变复杂”。 + +## 6. IR 名字承载方式 + +### 6.1 单结果 op + +单结果 op 的名字提示记录在 `NameLoc` 或 `FusedLoc` 元数据中。 + +示意: + +```mlir +%0 = pto.tload ... loc("query_tile") +``` + +其含义是:该 op 的主结果推荐名字为 `query_tile`。 + +### 6.2 多结果 op + +多结果 op 不能只靠单个 `NameLoc` 表达所有结果名,因此使用 `FusedLoc` 的 metadata +携带结果名数组。 + +逻辑示意: + +```text +loc(fused[...]) +``` + +这里 metadata 只服务于调试命名,不参与语义。 + +### 6.3 前端接口约定 + +前端可以提供名字,也可以不提供。 + +- 提供时:PTOAS 尽量保留 +- 不提供时: + - 若输入是 textual `.pto`,PTOAS 会先尝试回收源码里的 SSA / 参数 / block arg 名 + - 若仍然没有名字提示,再使用现有 fallback 命名 + +因此该能力是增量增强,不破坏现有前端。 + +## 7. 名字传播规则 + +### 7.1 直接透传 + +如果一个 rewrite 基本是一对一替换: + +- 新值继承原值名字 + +示例: + +- `pto.xxx -> emitc.cast` +- `pto.xxx -> emitc.variable` +- `pto.xxx -> 某个一对一的 helper op` + +### 7.2 派生命名 + +如果 lowering 会从一个源值派生多个新值,则在源名字基础上追加稳定后缀。 + +建议后缀包括: + +- `_cast` +- `_addr` +- `_tile` +- `_shape` +- `_stride` +- `_tmp` + +例如: + +- `query_tile -> query_tile_addr` +- `query_tile -> query_tile_cast` + +### 7.3 合并场景 + +如果多个值被合并成一个值,例如 CSE 或公共表达式复用: + +- 优先保留支配值的已有名字 +- 若名字冲突或为空,回退到稳定生成名 + +### 7.4 控制流与 hoist 场景 + +`scf` / `cf` / `emitc.variable` 路径会引入额外临时变量,这些变量在源 `.pto` 中通常没有严格对应项。 + +对这类值: + +- 若来源明确,则使用来源名加后缀,如 `_phi`、`_cond`、`_tmp` +- 若来源不明确,则使用稳定 fallback 名 + +本次实现里,控制流合流出来的 block argument 采用保守传播策略: + +- 先尽量把 block argument 名字传播到 EmitC value loc +- 若某条 lowering 路径无法稳定维持该关联,则 fail-closed 为不附带该条名字提示,而不是在 C++ 文本层猜测性回填 +- 当前阶段的 provenance 注释只针对“有结果的 op”插 marker;因此 merged block argument 不保证一定落成单独的 `// pto: %name` 注释 + +## 8. C++ 输出策略 + +### 8.1 当前阶段 + +当前阶段只保证溯源注释,不保证把调试名重写成最终的 C++ 局部变量名。 + +原因是:重命名的正确性依赖完整的符号表、作用域和冲突检测,而这些信息属于 +`CppEmitter` 内部命名逻辑。若在 `translateToCpp` 之后对 C++ 文本做字符串层重命名, +需要重新近似构建一套作用域/碰撞分析,风险较高。 + +因此本阶段只保证: + +- provenance marker 会被清理干净,不会泄漏到最终输出 +- 可恢复的值会带 `// pto: %name` 注释 +- 对包含 `*/`、换行等特殊字符的原始名做 comment-safe 转义,保证输出合法 +- 无法稳定恢复的值保持 EmitC/CppEmitter 原有 `vN` 命名 + +### 8.2 后续阶段 + +若后续要恢复“语义重命名”能力,应改为在 `CppEmitter::getOrCreateName(Value)` 一类 +真正取名的位置消费这些调试名,并复用 emitter 自己的冲突/作用域逻辑,而不是在 +`translateToCpp` 之后重写 C++ 文本。 + +## 9. 可见性与开关 + +### 9.1 默认行为 + +默认情况下: + +- IR 语义不变 +- 若前端提供了名字提示,PTOAS 尽量在 provenance 注释里保留 +- 普通 IR 打印不要求显式展示这些提示 + +### 9.2 调试打印 + +若开发者需要查看 IR 中实际承载的调试名字,可通过调试打印模式展示 location。 + +也就是说: + +- 平时看 IR:保持干净 +- 排查名字传播问题时:打开 debug info 看 metadata + +## 10. 与现有链路的关系 + +该设计需要覆盖以下环节: + +- 前端生成 PTO IR 时附带名字提示 +- textual `.pto` 输入时,基于 `AsmParserState` 恢复 SSA / 参数 / block arg 名 +- PTOAS rewrite / lowering helper 在替换时传播名字 +- `PTOToEmitC` 中新建 `emitc::VariableOp`、`emitc::CastOp` 等值时继承或派生 provenance +- 最终 `translateToCpp` 后在 PTOAS 包装层中仅把 provenance marker 转成安全注释 + +## 11. 溯源注释(issue #337 第 1 点:可定位性) + +issue #337 的第 1 点要求“`.pto` 的 `%N` 与 `.cpp` 的 `vN` 序号一致以便定位”。 +但 level3 链路有 CSE、值合并、新值物化与 `emitc.variable` 提升,输出 `cpp` 的 `vN` +编号由 EmitC 按遍历顺序重新分配,与输入 SSA 编号在原理上无法逐号对齐。 +因此本设计当前阶段对该点采用“溯源注释”路线,而非强制的 `%N == vN`。 + +### 11.1 溯源注释 + +PTOAS 在 EmitC lowering 后,对每个结果可溯源到输入 `.pto` SSA 名的 op,额外插入 +`/* PTOAS_PROVENANCE:rawname */` 标记。该标记携带**未经 sanitize 的原始 SSA 名** +(如 `0`、`24`、`query_tile`、`c0`)。 + +在最终 C++ 后处理阶段,该标记被转换为 marker 原地位置的独立注释: + +```cpp +// pto: %0 +int32_t v3 = helper(v1, v2); +// pto: %query_tile +LocalTensor v12 = ...; +``` + +这样读者在 `kernel.cpp` 里看到任意一行,都能直接读到它对应的输入 SSA 名,从而在 +`.pto` 里定位到来源,无需依赖序号一致。 + +### 11.2 限制 + +溯源注释解决的是“能定位来源”,不是“变量本身可读”。因此最终 C++ 里的参数名、局部名 +仍可能是 `vN`。对被 CSE 合并、无法溯源到单一输入值的生成值,不强制挂注释(降级为无注释)。 + +## 12. 风险与限制 + +主要风险如下: + +- 某些 pass 新建值但没有传播 provenance,会导致局部缺少 `// pto: ...` +- 多结果 op 的 metadata 约定若不统一,前后端容易理解不一致 +- 名字传播若写成语义属性,容易污染 IR;因此必须坚持“调试元数据”定位 + +限制如下: + +- 该设计当前阶段只能提供“定位来源”,不能提供“最终变量名语义化” +- 对 aggressive CSE 后的公共值,只能保留最终幸存值的名字 +- 对 textual `.pto` 的 SSA 名恢复依赖 `AsmParserState` 暴露的解析结果;若 lowering 后的 CFG 形状不再稳定匹配,则相关 hint 会 fail-closed 丢弃,而不是猜测性错挂 + +## 13. 测试建议 + +建议至少覆盖以下测试: + +- 单结果 op:前端名字能出现在最终 `// pto: %name` 注释 +- 多结果 op:多个结果的 provenance 不会错位 +- 名字含特殊字符:注释能安全转义 +- 控制流 / `emitc.variable` / hoist:注释不会错挂到错误声明 +- textual `.pto`:函数参数名、局部 SSA 名要能在 provenance 注释中保留;CFG block arg 至少要验证“不发生错挂”,不要求当前阶段每个 merged arg 都生成单独注释 +- 未提供 hint:仍保持现有 `vN` 回退行为 + +## 14. 结论 + +本设计采用“名字作为调试元数据”的路线: + +- 不把名字当语义 +- 不要求逐号保真 +- 不把 IR 默认打印搞得很吵 +- 当前阶段重点解决 `.pto`、前端 Python 和最终 `kernel.cpp` 之间“能对得上来源”的问题 + +这条路线对现有编译链路侵入较小,也最符合 issue `#337` 的真实诉求。 diff --git a/lib/PTO/Transforms/PTOToEmitC.cpp b/lib/PTO/Transforms/PTOToEmitC.cpp index b2c0ca59b3..e8b5677755 100644 --- a/lib/PTO/Transforms/PTOToEmitC.cpp +++ b/lib/PTO/Transforms/PTOToEmitC.cpp @@ -144,6 +144,59 @@ static Value getSourceEmitCVariable(Value value) { return {}; } +static void appendRawLocationNameHints(Location loc, + SmallVectorImpl &hints) { + if (auto nameLoc = dyn_cast(loc)) { + std::string raw = nameLoc.getName().getValue().str(); + if (!raw.empty()) + hints.push_back(std::move(raw)); + return; + } + + if (auto fusedLoc = dyn_cast(loc)) { + if (Attribute metadata = fusedLoc.getMetadata()) { + if (auto strAttr = dyn_cast(metadata)) { + std::string raw = strAttr.getValue().str(); + if (!raw.empty()) + hints.push_back(std::move(raw)); + return; + } + if (auto arrayAttr = dyn_cast(metadata)) { + for (Attribute attr : arrayAttr) { + auto strAttr = dyn_cast(attr); + if (!strAttr) + continue; + std::string raw = strAttr.getValue().str(); + if (!raw.empty()) + hints.push_back(std::move(raw)); + } + if (!hints.empty()) + return; + } + } + + // Only metadata explicitly attached by PTOAS name-hint recovery carries an + // ordered result-name list. Ordinary fused child locations are debug + // provenance, not result-indexed name hints. + return; + } + + if (auto callSiteLoc = dyn_cast(loc)) { + appendRawLocationNameHints(callSiteLoc.getCallee(), hints); + if (hints.empty()) + appendRawLocationNameHints(callSiteLoc.getCaller(), hints); + } +} + +static Location getIndexedNameHintLoc(Location fallbackLoc, unsigned index) { + SmallVector hints; + appendRawLocationNameHints(fallbackLoc, hints); + if (index >= hints.size() || hints[index].empty()) + return fallbackLoc; + return NameLoc::get(StringAttr::get(fallbackLoc.getContext(), hints[index]), + fallbackLoc); +} + [[maybe_unused]] static constexpr llvm::StringLiteral kLoweredSetValidShapeAttrName = "__pto.lowered_set_validshape"; [[maybe_unused]] static constexpr llvm::StringLiteral kLoweredSetValidShapeConfigAttrName = @@ -6511,16 +6564,18 @@ struct PTOGetValidShapeToEmitC auto resultTy = getTypeConverter()->convertType(rewriter.getIndexType()); if (!resultTy) return failure(); + Location rowLoc = getIndexedNameHintLoc(op.getLoc(), 0); + Location colLoc = getIndexedNameHintLoc(op.getLoc(), 1); Value row = rewriter .create( - op.getLoc(), resultTy, + rowLoc, resultTy, "PTOAS__TILE_GET_VALID_ROW", ArrayAttr{}, ArrayAttr{}, ValueRange{src}) .getResult(0); Value col = rewriter .create( - op.getLoc(), resultTy, + colLoc, resultTy, "PTOAS__TILE_GET_VALID_COL", ArrayAttr{}, ArrayAttr{}, ValueRange{src}) .getResult(0); diff --git a/test/lit/pto/async_put_get_emitc.pto b/test/lit/pto/async_put_get_emitc.pto index 30998f7c3e..54b8ca3657 100644 --- a/test/lit/pto/async_put_get_emitc.pto +++ b/test/lit/pto/async_put_get_emitc.pto @@ -15,25 +15,24 @@ module { } // A3-LABEL: AICORE void async_put_get( -// A3: Tile [[SCRATCH:v[0-9]+]]; -// A3: Tile [[SCRATCH_COPY:v[0-9]+]] = [[SCRATCH]]; -// A3: TASSIGN([[SCRATCH_COPY]], [[SCRATCH_ADDR:v[0-9]+]]); -// A3: pto::comm::AsyncSession [[SESSION:v[0-9]+]]; -// A3: pto::comm::AsyncSession [[SESSION_COPY:v[0-9]+]] = [[SESSION]]; -// A3: pto::comm::sdma::SdmaBaseConfig [[CFG:v[0-9]+]] = {32768ULL, 0ULL, 1u}; -// A3: pto::comm::sdma::SdmaBaseConfig [[CFG_COPY:v[0-9]+]] = [[CFG]]; -// A3: pto::comm::BuildAsyncSession([[SCRATCH_COPY]], {{.*}}, [[SESSION_COPY]], {{.*}}, [[CFG_COPY]], {{.*}}); +// A3: Tile [[SCRATCH_STORAGE:[_A-Za-z][_A-Za-z0-9]*]] +// A3: Tile [[SCRATCH:[_A-Za-z][_A-Za-z0-9]*]] = [[SCRATCH_STORAGE]]; +// A3: TASSIGN([[SCRATCH]], {{[_A-Za-z][_A-Za-z0-9]*}}); +// A3: pto::comm::AsyncSession [[SESSION_STORAGE:[_A-Za-z][_A-Za-z0-9]*]] +// A3: pto::comm::AsyncSession [[SESSION:[_A-Za-z][_A-Za-z0-9]*]] = [[SESSION_STORAGE]]; +// A3: pto::comm::sdma::SdmaBaseConfig {{[_A-Za-z][_A-Za-z0-9]*}} = {32768ULL, 0ULL, 1u}; +// A3: pto::comm::BuildAsyncSession([[SCRATCH]], {{.*}}, [[SESSION]], {{.*}}, {{[_A-Za-z][_A-Za-z0-9]*}}, {{.*}}); // A3: using [[SHAPETY:.*]] = pto::Shape<1, 1, 1, 1, 128>; // A3: using [[STRIDETY:.*]] = pto::Stride<128, 128, 128, 128, 1>; // A3: constexpr pto::Layout [[LAYOUT:.*]] = pto::Layout::ND; -// A3: [[SHAPETY]] [[SHAPE0:v[0-9]+]] = [[SHAPETY]](); -// A3: [[STRIDETY]] [[STRIDE0:v[0-9]+]] = [[STRIDETY]](); +// A3: [[SHAPETY]] [[SHAPE0:[_A-Za-z][_A-Za-z0-9]*]] = [[SHAPETY]](); +// A3: [[STRIDETY]] [[STRIDE0:[_A-Za-z][_A-Za-z0-9]*]] = [[STRIDETY]](); // A3: using [[GLTNSRTY:.*]] = GlobalTensor; -// A3: [[GLTNSRTY]] [[GT0:v[0-9]+]] = [[GLTNSRTY]]({{.*}}, [[SHAPE0]], [[STRIDE0]]); -// A3: [[SHAPETY]] [[SHAPE1:v[0-9]+]] = [[SHAPETY]](); -// A3: [[STRIDETY]] [[STRIDE1:v[0-9]+]] = [[STRIDETY]](); -// A3: [[GLTNSRTY]] [[GT1:v[0-9]+]] = [[GLTNSRTY]]({{.*}}, [[SHAPE1]], [[STRIDE1]]); -// A3: pto::comm::AsyncEvent [[PUT_EVT:v[0-9]+]] = pto::comm::TPUT_ASYNC( -// A3: pto::comm::AsyncEvent [[GET_EVT:v[0-9]+]] = pto::comm::TGET_ASYNC( -// A3: bool [[PUT_DONE:v[0-9]+]] = [[PUT_EVT]].Wait([[SESSION_COPY]]); -// A3: bool [[GET_DONE:v[0-9]+]] = [[GET_EVT]].Test([[SESSION_COPY]]); +// A3: [[GLTNSRTY]] [[GT0:[_A-Za-z][_A-Za-z0-9]*]] = [[GLTNSRTY]]({{.*}}, [[SHAPE0]], [[STRIDE0]]); +// A3: [[SHAPETY]] [[SHAPE1:[_A-Za-z][_A-Za-z0-9]*]] = [[SHAPETY]](); +// A3: [[STRIDETY]] [[STRIDE1:[_A-Za-z][_A-Za-z0-9]*]] = [[STRIDETY]](); +// A3: [[GLTNSRTY]] [[GT1:[_A-Za-z][_A-Za-z0-9]*]] = [[GLTNSRTY]]({{.*}}, [[SHAPE1]], [[STRIDE1]]); +// A3: pto::comm::AsyncEvent [[PUT_EVT:[_A-Za-z][_A-Za-z0-9]*]] = pto::comm::TPUT_ASYNC( +// A3: pto::comm::AsyncEvent [[GET_EVT:[_A-Za-z][_A-Za-z0-9]*]] = pto::comm::TGET_ASYNC( +// A3: bool {{[_A-Za-z][_A-Za-z0-9]*}} = [[PUT_EVT]].Wait([[SESSION]]); +// A3: bool {{[_A-Za-z][_A-Za-z0-9]*}} = [[GET_EVT]].Test([[SESSION]]); diff --git a/test/lit/pto/backedge_nested_same_pipe_prune_regression.pto b/test/lit/pto/backedge_nested_same_pipe_prune_regression.pto index e54c8f9e18..b16e7c02da 100644 --- a/test/lit/pto/backedge_nested_same_pipe_prune_regression.pto +++ b/test/lit/pto/backedge_nested_same_pipe_prune_regression.pto @@ -5,13 +5,13 @@ // the wider same-pipe pair can be removed before event-id allocation. // // CHECK-LABEL: AICORE void backedge_nested_same_pipe_prune() -// CHECK: for (int64_t {{[ij][0-9]+}} = +// CHECK: for (int64_t {{[_A-Za-z][_A-Za-z0-9]*}} = // CHECK-NEXT: wait_flag(PIPE_FIX, PIPE_MTE1, EVENT_ID[[INNER:[0-9]+]]); -// CHECK-NEXT: TMOV({{v[0-9]+}}, {{v[0-9]+}}); -// CHECK-NEXT: TMOV({{v[0-9]+}}, {{v[0-9]+}}); -// CHECK: TMATMUL({{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}); -// CHECK: TMOV({{v[0-9]+}}, {{v[0-9]+}}); -// CHECK-NEXT: TMOV({{v[0-9]+}}, {{v[0-9]+}}); +// CHECK-NEXT: TMOV({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); +// CHECK-NEXT: TMOV({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); +// CHECK: TMATMUL({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); +// CHECK: TMOV({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); +// CHECK-NEXT: TMOV({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); // CHECK-NEXT: set_flag(PIPE_FIX, PIPE_MTE1, EVENT_ID[[INNER]]); // CHECK-NEXT: } // CHECK-NEXT: wait_flag(PIPE_FIX, PIPE_MTE1, EVENT_ID[[INNER]]); diff --git a/test/lit/pto/bias_tile_subview_emitc_pointer_type.pto b/test/lit/pto/bias_tile_subview_emitc_pointer_type.pto index 7e968d2dc4..4c3d698d9c 100644 --- a/test/lit/pto/bias_tile_subview_emitc_pointer_type.pto +++ b/test/lit/pto/bias_tile_subview_emitc_pointer_type.pto @@ -22,9 +22,9 @@ module { } // CHECK-LABEL: AICORE void bias_tile_subview_emitc_pointer_type() -// CHECK-DAG: const int64_t [[ONE:v[0-9]+]] = 1; -// CHECK-DAG: const int64_t [[THIRTYTWO:v[0-9]+]] = 32; -// CHECK: __gm__ float* [[BIAS_BASE:v[0-9]+]] = reinterpret_cast<__gm__ float*>({{v[0-9]+}}); -// CHECK: __gm__ float* [[BIAS_SUB:v[0-9]+]] = {{.*}}[[BIAS_BASE]]{{.*}}[[THIRTYTWO]] * [[ONE]]{{.*}}; +// CHECK-DAG: const int64_t [[ONE:[_A-Za-z][_A-Za-z0-9]*]] = 1; +// CHECK-DAG: const int64_t [[THIRTYTWO:[_A-Za-z][_A-Za-z0-9]*]] = 32; +// CHECK: __gm__ float* [[BIAS_BASE:[_A-Za-z][_A-Za-z0-9]*]] = reinterpret_cast<__gm__ float*>({{[_A-Za-z][_A-Za-z0-9]*}}); +// CHECK: __gm__ float* [[BIAS_SUB:[_A-Za-z][_A-Za-z0-9]*]] = {{.*}}[[BIAS_BASE]]{{.*}}[[THIRTYTWO]] * [[ONE]]{{.*}}; // CHECK: consume_bias([[BIAS_SUB]]); // CHECK-NOT: __gm__ float** diff --git a/test/lit/pto/debug_name_hints_cfg_emitc.pto b/test/lit/pto/debug_name_hints_cfg_emitc.pto new file mode 100644 index 0000000000..3f99b85cd1 --- /dev/null +++ b/test/lit/pto/debug_name_hints_cfg_emitc.pto @@ -0,0 +1,35 @@ +// RUN: ptoas %s | FileCheck %s + +module { + func.func @name_hints_cfg(%cond: i1, %lhs: i32, %rhs: i32) { + %0 = func.call @name_hints_cfg_helper(%cond, %lhs, %rhs) : (i1, i32, i32) -> i32 + return + } + + func.func private @name_hints_cfg_helper(%cond: i1, %lhs: i32, %rhs: i32) -> i32 { + cf.cond_br %cond, ^bb1, ^bb2 + + ^bb1: + %then = arith.addi %lhs, %rhs : i32 loc("branch.sum") + cf.br ^bb3(%then : i32) + + ^bb2: + %else = arith.subi %lhs, %rhs : i32 loc("branch-sum") + cf.br ^bb3(%else : i32) + + ^bb3(%merged: i32): + %out = arith.addi %merged, %rhs : i32 loc("result") + %ret = arith.addi %out, %out : i32 + return %ret : i32 + } +} + +// CHECK-LABEL: AICORE int32_t name_hints_cfg_helper( +// CHECK-NOT: PTOAS_NAME_HINTS +// CHECK-NOT: PTOAS_PROVENANCE +// CHECK: label2: +// CHECK-NEXT: // pto: %branch.sum +// CHECK: label3: +// CHECK-NEXT: // pto: %branch-sum +// CHECK: label4: +// CHECK-NEXT: // pto: %result diff --git a/test/lit/pto/debug_name_hints_emitc.pto b/test/lit/pto/debug_name_hints_emitc.pto new file mode 100644 index 0000000000..46031e1dcb --- /dev/null +++ b/test/lit/pto/debug_name_hints_emitc.pto @@ -0,0 +1,38 @@ +// RUN: ptoas %s | FileCheck %s + +module { + func.func @name_hints_basic(%lhs: i32, %rhs: i32) { + %0 = func.call @name_hints_basic_helper(%lhs, %rhs) : (i32, i32) -> i32 + return + } + + func.func private @name_hints_basic_helper(%lhs: i32, %rhs: i32) -> i32 { + %sum0 = arith.addi %lhs, %rhs : i32 loc("sum.value") + %sum1 = arith.addi %sum0, %sum0 : i32 loc("sum value") + %sum2 = arith.addi %sum1, %sum0 : i32 loc("class") + %sum3 = arith.addi %sum2, %sum1 : i32 loc("and") + %sum4 = arith.addi %sum3, %sum2 : i32 loc("module") + %sum5 = arith.addi %sum4, %sum3 : i32 loc("int32_t") + %sum6 = arith.addi %sum5, %sum4 : i32 loc("size_t") + %sum7 = arith.addi %sum6, %sum5 : i32 loc("AICORE") + func.call @sink_i32(%sum7) : (i32) -> () + %sum8 = arith.addi %sum7, %sum6 : i32 + return %sum8 : i32 + } + + func.func private @sink_i32(%v: i32) { + return + } + +} + +// CHECK-LABEL: AICORE int32_t name_hints_basic_helper( +// CHECK-NOT: PTOAS_NAME_HINTS +// CHECK-NOT: PTOAS_PROVENANCE +// CHECK: // pto: %sum.value +// CHECK: // pto: %sum value +// CHECK: // pto: %class +// CHECK: // pto: %and +// CHECK: // pto: %module +// CHECK: // pto: %int32_t +// CHECK: // pto: %AICORE diff --git a/test/lit/pto/debug_name_hints_expression_emitc.pto b/test/lit/pto/debug_name_hints_expression_emitc.pto new file mode 100644 index 0000000000..b8b056d6fd --- /dev/null +++ b/test/lit/pto/debug_name_hints_expression_emitc.pto @@ -0,0 +1,34 @@ +// Copyright (c) 2026 Huawei Technologies Co., Ltd. +// This program is free software, you can redistribute it and/or modify it under the terms of conditions of +// CANN Open Software License Agreement Version 2.0 (the "License"). +// Please refer to the License for details. + +// RUN: ptoas %s 2>&1 | FileCheck %s + +// Single-use value chains are commonly folded into one emitc.expression. +// Provenance comments should still preserve the collapsed intermediate names +// on the surviving expression root instead of dropping them after +// FormExpressions inlines the chain. + +module { + func.func @expression_textual_chain(%lhs: i32, %rhs: i32) -> i32 { + %sum0 = arith.addi %lhs, %rhs : i32 + %sum1 = arith.addi %sum0, %rhs : i32 + %ret = arith.addi %sum1, %lhs : i32 + return %ret : i32 + } + + func.func @expression_loc_chain(%lhs: i32, %rhs: i32) -> i32 { + %0 = arith.addi %lhs, %rhs : i32 loc("x") + %1 = arith.addi %0, %rhs : i32 loc("y") + %2 = arith.addi %1, %lhs : i32 loc("z") + return %2 : i32 + } +} + +// CHECK-LABEL: AICORE int32_t expression_textual_chain( +// CHECK: // pto: %sum0, %sum1, %ret +// CHECK-NEXT: return +// CHECK-LABEL: AICORE int32_t expression_loc_chain( +// CHECK: // pto: %x, %y, %z +// CHECK-NEXT: return diff --git a/test/lit/pto/debug_name_hints_function_name_collision_emitc.pto b/test/lit/pto/debug_name_hints_function_name_collision_emitc.pto new file mode 100644 index 0000000000..6e7df2926c --- /dev/null +++ b/test/lit/pto/debug_name_hints_function_name_collision_emitc.pto @@ -0,0 +1,23 @@ +// Copyright (c) 2026 Huawei Technologies Co., Ltd. +// This program is free software, you can redistribute it and/or modify it under the terms of conditions of +// CANN Open Software License Agreement Version 2.0 (the "License"). +// Please refer to the License for details. + +// RUN: ptoas %s | FileCheck %s + +module { + func.func @entry(%lhs: i32, %rhs: i32) { + %0 = func.call @v1(%lhs, %rhs) : (i32, i32) -> i32 + return + } + + func.func private @v1(%lhs: i32, %rhs: i32) -> i32 { + %sum = arith.addi %lhs, %rhs : i32 loc("sum") + return %sum : i32 + } +} + +// CHECK-LABEL: AICORE int32_t v1( +// CHECK: return (int32_t) +// CHECK-LABEL: AICORE void entry( +// CHECK: // pto: %0 diff --git a/test/lit/pto/debug_name_hints_identifier_collision_emitc.pto b/test/lit/pto/debug_name_hints_identifier_collision_emitc.pto new file mode 100644 index 0000000000..c538895886 --- /dev/null +++ b/test/lit/pto/debug_name_hints_identifier_collision_emitc.pto @@ -0,0 +1,20 @@ +// RUN: ptoas %s | FileCheck %s + +module { + func.func @identifier_collision(%lhs: i32, %rhs: i32) { + %0 = func.call @identifier_collision_helper(%lhs, %rhs) : (i32, i32) -> i32 + return + } + + func.func private @identifier_collision_helper(%lhs: i32, %rhs: i32) -> i32 { + %sum = arith.addi %lhs, %rhs : i32 loc("T") + %sum2 = arith.addi %sum, %lhs : i32 + %sum3 = arith.addi %sum2, %sum : i32 + return %sum3 : i32 + } +} + +// CHECK-LABEL: AICORE int32_t identifier_collision_helper( +// CHECK: using T = float; +// CHECK-NOT: int32_t T = ( +// CHECK: // pto: %T diff --git a/test/lit/pto/debug_name_hints_multi_result_emitc.pto b/test/lit/pto/debug_name_hints_multi_result_emitc.pto new file mode 100644 index 0000000000..f87f293d58 --- /dev/null +++ b/test/lit/pto/debug_name_hints_multi_result_emitc.pto @@ -0,0 +1,59 @@ +// Copyright (c) 2026 Huawei Technologies Co., Ltd. +// This program is free software, you can redistribute it and/or modify it under the terms of conditions of +// CANN Open Software License Agreement Version 2.0 (the "License"). +// Please refer to the License for details. + +// RUN: ptoas %s 2>&1 | FileCheck %s + +// Multi-result textual SSA names use the `%name:N =` MLIR syntax. The textual +// hint parser must still consume that op so later op-result hints do not shift +// onto the wrong operations, and the lowered EmitC temporaries must preserve +// per-result provenance instead of collapsing every comment onto `%name#0`. + +module { + func.func @multi_result_hint_alignment(%pred: i1, %lhs: i32, %rhs: i32) -> i32 { + %pair:2 = scf.if %pred -> (i32, i32) { + scf.yield %lhs, %rhs : i32, i32 + } else { + scf.yield %rhs, %lhs : i32, i32 + } loc(fused["lhs", "rhs"]) + %sum0 = func.call @add_helper(%pair#0, %pair#0) : (i32, i32) -> i32 + %sum1 = func.call @add_helper(%pair#1, %pair#1) : (i32, i32) -> i32 + %ret = arith.addi %sum0, %sum1 : i32 + return %ret : i32 + } + + func.func private @add_helper(%a: i32, %b: i32) -> i32 { + %r = arith.addi %a, %b : i32 + return %r : i32 + } + + func.func @partial_multi_result_hint_alignment(%pred: i1, %lhs: i32, %rhs: i32) -> i32 { + %pair:2 = scf.if %pred -> (i32, i32) { + scf.yield %lhs, %rhs : i32, i32 + } else { + scf.yield %rhs, %lhs : i32, i32 + } + %mix = arith.addi %pair#1, %pair#1 : i32 + return %mix : i32 + } +} + +// CHECK-LABEL: AICORE int32_t partial_multi_result_hint_alignment( +// CHECK-NOT: PTOAS_NAME_HINTS +// CHECK-NOT: PTOAS_PROVENANCE +// CHECK-NOT: // pto: %pair#0 +// CHECK: // pto: %pair#1 +// CHECK-NEXT: int32_t [[PARTIAL:v[0-9]+]] = v1 ? v3 : v2; +// CHECK: // pto: %mix +// CHECK-NEXT: return (int32_t) ((uint32_t) [[PARTIAL]] + (uint32_t) [[PARTIAL]]); +// CHECK-LABEL: AICORE int32_t multi_result_hint_alignment( +// CHECK-NOT: PTOAS_NAME_HINTS +// CHECK-NOT: PTOAS_PROVENANCE +// CHECK-NOT: // pto: %lhs +// CHECK-NOT: // pto: %rhs +// CHECK: // pto: %pair#0 +// CHECK-NEXT: int32_t [[PAIR0:v[0-9]+]] = v1 ? v2 : v3; +// CHECK: // pto: %pair#1 +// CHECK-NEXT: int32_t [[PAIR1:v[0-9]+]] = v1 ? v3 : v2; +// CHECK: return (int32_t) ((uint32_t) add_helper([[PAIR0]], [[PAIR0]]) + (uint32_t) add_helper([[PAIR1]], [[PAIR1]])); diff --git a/test/lit/pto/debug_name_hints_provenance_emitc.pto b/test/lit/pto/debug_name_hints_provenance_emitc.pto new file mode 100644 index 0000000000..35ea33dc15 --- /dev/null +++ b/test/lit/pto/debug_name_hints_provenance_emitc.pto @@ -0,0 +1,50 @@ +// Copyright (c) 2026 Huawei Technologies Co., Ltd. +// This program is free software, you can redistribute it and/or modify it under the terms of conditions of +// CANN Open Software License Agreement Version 2.0 (the "License"). +// Please refer to the License for details. + +// RUN: ptoas %s 2>&1 | FileCheck %s + +// Issue #337 point 1: locatability. A textual .pto input that uses pure-digit +// SSA names (%0, %1, ...) is compiled to C++. The output annotates each +// recoverable generated local with a `// pto: %` comment mapping it back +// to its source SSA value, so a reader profiling the cpp can find the +// responsible .pto line even though CSE/renumbering breaks strict %N == vN +// alignment. The pure-digit name %0 is sanitized to _0 (best-effort retention +// of the number) and additionally carries the provenance comment. + +module { + func.func @provenance_caller(%arg0: i32, %arg1: i32) { + %0 = func.call @provenance_helper(%arg0, %arg1) : (i32, i32) -> i32 + return + } + + func.func @provenance_escape(%lhs: i32, %rhs: i32) { + %sum = arith.addi %lhs, %rhs : i32 loc("a*/b") + %next = arith.addi %sum, %rhs : i32 + %last = arith.addi %next, %sum : i32 + func.call @sink_i32(%last) : (i32) -> () + return + } + + func.func private @provenance_helper(%lhs: i32, %rhs: i32) -> i32 { + %sum = arith.addi %lhs, %rhs : i32 loc("scaled_sum") + %scaled = arith.muli %sum, %lhs : i32 loc("scaled_out") + return %scaled : i32 + } + + func.func private @sink_i32(%v: i32) { + return + } +} + +// CHECK-LABEL: AICORE int32_t provenance_helper( +// CHECK-NOT: PTOAS_PROVENANCE +// CHECK-LABEL: AICORE void provenance_caller( +// CHECK-NOT: PTOAS_PROVENANCE +// CHECK: // pto: %0 +// CHECK-NEXT: int32_t [[R:v[0-9]+]] = provenance_helper( +// CHECK-LABEL: AICORE void provenance_escape( +// CHECK-NOT: PTOAS_PROVENANCE +// CHECK: // pto: %a*/b +// CHECK-NEXT: int32_t [[ESCAPED:v[0-9]+]] = ( diff --git a/test/lit/pto/debug_name_hints_textual_cfg_emitc.pto b/test/lit/pto/debug_name_hints_textual_cfg_emitc.pto new file mode 100644 index 0000000000..9312d28813 --- /dev/null +++ b/test/lit/pto/debug_name_hints_textual_cfg_emitc.pto @@ -0,0 +1,32 @@ +// RUN: ptoas %s | FileCheck %s + +module { + func.func @textual_name_hints_cfg(%cond: i1, %lhs: i32, %rhs: i32) { + %0 = func.call @textual_name_hints_cfg_helper(%cond, %lhs, %rhs) : (i1, i32, i32) -> i32 + return + } + + func.func private @textual_name_hints_cfg_helper(%cond: i1, %lhs: i32, %rhs: i32) -> i32 { + cf.cond_br %cond, ^bb1, ^bb2 + + ^bb1: + %then_value = arith.addi %lhs, %rhs : i32 + cf.br ^bb3(%then_value : i32) + + ^bb2: + %else_value = arith.subi %lhs, %rhs : i32 + cf.br ^bb3(%else_value : i32) + + ^bb3(%merged_value: i32): + %result_value = arith.addi %merged_value, %rhs : i32 + return %result_value : i32 + } +} + +// CHECK-NOT: PTOAS_NAME_HINTS +// CHECK-NOT: PTOAS_PARAM_HINTS +// CHECK-LABEL: AICORE int32_t textual_name_hints_cfg_helper( +// CHECK: // pto: %then_value +// CHECK: // pto: %else_value +// CHECK-NOT: // pto: %merged_value +// CHECK: // pto: %0 diff --git a/test/lit/pto/debug_name_hints_textual_comment_string_emitc.pto b/test/lit/pto/debug_name_hints_textual_comment_string_emitc.pto new file mode 100644 index 0000000000..449b8bd0f4 --- /dev/null +++ b/test/lit/pto/debug_name_hints_textual_comment_string_emitc.pto @@ -0,0 +1,20 @@ +// Copyright (c) 2026 Huawei Technologies Co., Ltd. +// This program is free software, you can redistribute it and/or modify it under the terms of conditions of +// CANN Open Software License Agreement Version 2.0 (the "License"). +// Please refer to the License for details. + +// RUN: ptoas %s | FileCheck %s + +module { + func.func @http_loc_hint(%lhs: i32, %rhs: i32) -> i32 { + %sum = arith.addi %lhs, %rhs : i32 loc("http://sum") + %kept = arith.addi %sum, %rhs : i32 + %ret = arith.addi %kept, %sum : i32 + return %ret : i32 + } +} + +// CHECK-LABEL: AICORE int32_t http_loc_hint( +// CHECK-NOT: PTOAS_NAME_HINTS +// CHECK-NOT: PTOAS_PROVENANCE +// CHECK: // pto: %http://sum diff --git a/test/lit/pto/debug_name_hints_textual_emitc.pto b/test/lit/pto/debug_name_hints_textual_emitc.pto new file mode 100644 index 0000000000..a7fe448b12 --- /dev/null +++ b/test/lit/pto/debug_name_hints_textual_emitc.pto @@ -0,0 +1,27 @@ +// RUN: ptoas %s | FileCheck %s + +module { + func.func @textual_name_hints_caller(%lhs: i32, %rhs: i32) { + %call_result = func.call @textual_name_hints_add(%lhs, %rhs) : (i32, i32) -> i32 + %call_result_2 = func.call @textual_name_hints_sub(%lhs, %rhs) : (i32, i32) -> i32 + return + } + + func.func private @textual_name_hints_add(%lhs: i32, %rhs: i32) -> i32 { + %sum = arith.addi %lhs, %rhs : i32 + return %sum : i32 + } + + func.func private @textual_name_hints_sub(%lhs: i32, %rhs: i32) -> i32 { + %diff = arith.subi %lhs, %rhs : i32 + return %diff : i32 + } +} + +// CHECK-NOT: PTOAS_NAME_HINTS +// CHECK-NOT: PTOAS_PARAM_HINTS +// CHECK-LABEL: AICORE int32_t textual_name_hints_add( +// CHECK-LABEL: AICORE int32_t textual_name_hints_sub( +// CHECK-LABEL: AICORE void textual_name_hints_caller( +// CHECK: // pto: %call_result +// CHECK: // pto: %call_result_2 diff --git a/test/lit/pto/emitc_tile_data_sink_after_tassign.pto b/test/lit/pto/emitc_tile_data_sink_after_tassign.pto index 953c70ed44..70086a0c89 100644 --- a/test/lit/pto/emitc_tile_data_sink_after_tassign.pto +++ b/test/lit/pto/emitc_tile_data_sink_after_tassign.pto @@ -25,13 +25,19 @@ module attributes {pto.target_arch = "a5", pto.kernel_kind = #pto.kernel_kind [[ADDR_BASE:v[0-9]+]]; +// CHECK: // pto: %0 +// CHECK-NEXT: Tile [[ADDR_BASE:v[0-9]+]]; +// CHECK: // pto: %0 // CHECK-NEXT: Tile [[ADDR_TILE:v[0-9]+]] = [[ADDR_BASE]]; // CHECK-NEXT: TASSIGN([[ADDR_TILE]], v1); -// CHECK: Tile [[SINK_BASE:v[0-9]+]]; +// CHECK: // pto: %1 +// CHECK-NEXT: Tile [[SINK_BASE:v[0-9]+]]; +// CHECK: // pto: %1 // CHECK-NEXT: Tile [[SINK_TILE:v[0-9]+]] = [[SINK_BASE]]; +// CHECK: // pto: %1 // CHECK-NEXT: __ubuf__ float* [[ADDR_PTR:v[0-9]+]] = [[ADDR_TILE]].data(); +// CHECK: // pto: %1 // CHECK-NEXT: uint64_t [[ADDR_BITS:v[0-9]+]] = reinterpret_cast([[ADDR_PTR]]); // CHECK-NEXT: TASSIGN([[SINK_TILE]], [[ADDR_BITS]]); -// CHECK-NEXT: __ubuf__ float* [[SINK_PTR:v[0-9]+]] = [[SINK_TILE]].data(); +// CHECK: __ubuf__ float* [[SINK_PTR:v[0-9]+]] = [[SINK_TILE]].data(); // CHECK-NEXT: sink_ptr([[SINK_PTR]]); diff --git a/test/lit/pto/eventid_array_dyn_sync.pto b/test/lit/pto/eventid_array_dyn_sync.pto index e969b767f2..92614ec6cf 100644 --- a/test/lit/pto/eventid_array_dyn_sync.pto +++ b/test/lit/pto/eventid_array_dyn_sync.pto @@ -14,12 +14,12 @@ module { } // CHECK-LABEL: AICORE void eventid_array_dyn_sync() { -// CHECK: const int64_t {{v[0-9]+}} = 0; -// CHECK: PTOAS_EventIdArray<4> [[ARR:v[0-9]+]]; -// CHECK: PTOAS_EventIdArray<4> [[ARR_VAL:v[0-9]+]] = [[ARR]]; -// CHECK: [[ARR_VAL]][{{v[0-9]+}}] = {{v[0-9]+}}; -// CHECK: int64_t [[EID:v[0-9]+]] = [[ARR_VAL]][{{v[0-9]+}}]; -// CHECK: event_t {{v[0-9]+}} = (event_t) [[EID]]; -// CHECK: set_flag(PIPE_MTE2, PIPE_MTE3, {{v[0-9]+}}); -// CHECK: event_t {{v[0-9]+}} = (event_t) [[EID]]; -// CHECK: wait_flag(PIPE_MTE2, PIPE_MTE3, {{v[0-9]+}}); +// CHECK: const int64_t {{[_A-Za-z][_A-Za-z0-9]*}} = 0; +// CHECK: PTOAS_EventIdArray<4> [[ARR_STORAGE:[_A-Za-z][_A-Za-z0-9]*]] +// CHECK: PTOAS_EventIdArray<4> [[ARR:[_A-Za-z][_A-Za-z0-9]*]] = [[ARR_STORAGE]]; +// CHECK: [[ARR]][{{[_A-Za-z][_A-Za-z0-9]*}}] = {{[_A-Za-z][_A-Za-z0-9]*}}; +// CHECK: int64_t [[EID_SRC:[_A-Za-z][_A-Za-z0-9]*]] = [[ARR]][{{[_A-Za-z][_A-Za-z0-9]*}}]; +// CHECK: event_t [[EID:[_A-Za-z][_A-Za-z0-9]*]] = (event_t) [[EID_SRC]]; +// CHECK: set_flag(PIPE_MTE2, PIPE_MTE3, [[EID]]); +// CHECK: event_t {{[_A-Za-z][_A-Za-z0-9]*}} = (event_t) [[EID_SRC]]; +// CHECK: wait_flag(PIPE_MTE2, PIPE_MTE3, {{[_A-Za-z][_A-Za-z0-9]*}}); diff --git a/test/lit/pto/eventid_array_get_set_get.pto b/test/lit/pto/eventid_array_get_set_get.pto index 7578c7a403..5a87f403e1 100644 --- a/test/lit/pto/eventid_array_get_set_get.pto +++ b/test/lit/pto/eventid_array_get_set_get.pto @@ -17,13 +17,13 @@ module { } // CHECK-LABEL: AICORE void eventid_array_get_set_get() { -// CHECK: PTOAS_EventIdArray<4> [[ARR:v[0-9]+]]; -// CHECK: PTOAS_EventIdArray<4> [[ARR_VAL:v[0-9]+]] = [[ARR]]; -// CHECK: [[ARR_VAL]][{{v[0-9]+}}] = {{v[0-9]+}}; -// CHECK: int64_t [[FIRST_ID:v[0-9]+]] = [[ARR_VAL]][{{v[0-9]+}}]; -// CHECK: [[ARR_VAL]][{{v[0-9]+}}] = {{v[0-9]+}}; -// CHECK: int64_t [[SECOND_ID:v[0-9]+]] = [[ARR_VAL]][{{v[0-9]+}}]; -// CHECK: event_t [[FIRST:v[0-9]+]] = (event_t) [[FIRST_ID]]; +// CHECK: PTOAS_EventIdArray<4> [[ARR_STORAGE:[_A-Za-z][_A-Za-z0-9]*]] +// CHECK: PTOAS_EventIdArray<4> [[ARR:[_A-Za-z][_A-Za-z0-9]*]] = [[ARR_STORAGE]]; +// CHECK: [[ARR]][{{[_A-Za-z][_A-Za-z0-9]*}}] = {{[_A-Za-z][_A-Za-z0-9]*}}; +// CHECK: int64_t [[FIRST_SRC:[_A-Za-z][_A-Za-z0-9]*]] = [[ARR]][{{[_A-Za-z][_A-Za-z0-9]*}}]; +// CHECK: [[ARR]][{{[_A-Za-z][_A-Za-z0-9]*}}] = {{[_A-Za-z][_A-Za-z0-9]*}}; +// CHECK: int64_t [[SECOND_SRC:[_A-Za-z][_A-Za-z0-9]*]] = [[ARR]][{{[_A-Za-z][_A-Za-z0-9]*}}]; +// CHECK: event_t [[FIRST:[_A-Za-z][_A-Za-z0-9]*]] = (event_t) [[FIRST_SRC]]; // CHECK: set_flag(PIPE_MTE2, PIPE_MTE3, [[FIRST]]); -// CHECK: event_t [[SECOND:v[0-9]+]] = (event_t) [[SECOND_ID]]; +// CHECK: event_t [[SECOND:[_A-Za-z][_A-Za-z0-9]*]] = (event_t) [[SECOND_SRC]]; // CHECK: wait_flag(PIPE_MTE2, PIPE_MTE3, [[SECOND]]); diff --git a/test/lit/pto/eventid_array_no_cse.pto b/test/lit/pto/eventid_array_no_cse.pto index 8d18b44176..0f33596b71 100644 --- a/test/lit/pto/eventid_array_no_cse.pto +++ b/test/lit/pto/eventid_array_no_cse.pto @@ -18,9 +18,15 @@ module { } // CHECK-LABEL: AICORE void eventid_array_no_cse() { -// CHECK: PTOAS_EventIdArray<4> [[ARR0:v[0-9]+]]; -// CHECK: PTOAS_EventIdArray<4> [[ARR0_VAL:v[0-9]+]] = [[ARR0]]; -// CHECK: PTOAS_EventIdArray<4> [[ARR1:v[0-9]+]]; -// CHECK: PTOAS_EventIdArray<4> [[ARR1_VAL:v[0-9]+]] = [[ARR1]]; -// CHECK: [[ARR0_VAL]][{{v[0-9]+}}] = {{v[0-9]+}}; -// CHECK: [[ARR1_VAL]][{{v[0-9]+}}] = {{v[0-9]+}}; +// CHECK: PTOAS_EventIdArray<4> [[ARR0_STORAGE:[_A-Za-z][_A-Za-z0-9]*]] +// CHECK: PTOAS_EventIdArray<4> [[ARR0:[_A-Za-z][_A-Za-z0-9]*]] = [[ARR0_STORAGE]]; +// CHECK: PTOAS_EventIdArray<4> [[ARR1_STORAGE:[_A-Za-z][_A-Za-z0-9]*]] +// CHECK: PTOAS_EventIdArray<4> [[ARR1:[_A-Za-z][_A-Za-z0-9]*]] = [[ARR1_STORAGE]]; +// CHECK: [[ARR0]][{{[_A-Za-z][_A-Za-z0-9]*}}] = {{[_A-Za-z][_A-Za-z0-9]*}}; +// CHECK: [[ARR1]][{{[_A-Za-z][_A-Za-z0-9]*}}] = {{[_A-Za-z][_A-Za-z0-9]*}}; +// CHECK: int64_t [[EID0_SRC:[_A-Za-z][_A-Za-z0-9]*]] = [[ARR0]][{{[_A-Za-z][_A-Za-z0-9]*}}]; +// CHECK: int64_t [[EID1_SRC:[_A-Za-z][_A-Za-z0-9]*]] = [[ARR1]][{{[_A-Za-z][_A-Za-z0-9]*}}]; +// CHECK: event_t [[EID0:[_A-Za-z][_A-Za-z0-9]*]] = (event_t) [[EID0_SRC]]; +// CHECK: set_flag(PIPE_MTE2, PIPE_MTE3, [[EID0]]); +// CHECK: event_t [[EID1:[_A-Za-z][_A-Za-z0-9]*]] = (event_t) [[EID1_SRC]]; +// CHECK: wait_flag(PIPE_MTE2, PIPE_MTE3, [[EID1]]); diff --git a/test/lit/pto/get_validshape_emitc.pto b/test/lit/pto/get_validshape_emitc.pto index 60b3f01da2..07a589298b 100644 --- a/test/lit/pto/get_validshape_emitc.pto +++ b/test/lit/pto/get_validshape_emitc.pto @@ -13,6 +13,7 @@ module attributes {pto.target_arch = "a5"} { %valid_row, %valid_col = pto.get_validshape %tile : !pto.tile_buf + loc(fused["lhs", "rhs"]) pto.set_validshape %tile, %valid_row, %valid_col : !pto.tile_buf @@ -23,6 +24,10 @@ module attributes {pto.target_arch = "a5"} { // CHECK-LABEL: AICORE void get_validshape_emitc() // CHECK: Tile [[TILE:v[0-9]+]] = Tile -// CHECK: int64_t [[ROW:v[0-9]+]] = [[TILE]].GetValidRow(); +// CHECK-NOT: // pto: %lhs +// CHECK-NOT: // pto: %rhs +// CHECK: // pto: %valid_row +// CHECK-NEXT: int64_t [[ROW:v[0-9]+]] = [[TILE]].GetValidRow(); +// CHECK: // pto: %valid_col // CHECK-NEXT: int64_t [[COL:v[0-9]+]] = [[TILE]].GetValidCol(); // CHECK-NEXT: [[TILE]].SetValidShape([[ROW]], [[COL]]); diff --git a/test/lit/pto/issue157_64bit_view_offset_emitc.pto b/test/lit/pto/issue157_64bit_view_offset_emitc.pto index 0ceccc8107..b749376013 100644 --- a/test/lit/pto/issue157_64bit_view_offset_emitc.pto +++ b/test/lit/pto/issue157_64bit_view_offset_emitc.pto @@ -39,17 +39,17 @@ module { } // A3-LABEL: AICORE void issue157_subview_remote_offset_i8 -// A3-SAME: (__gm__ int8_t* [[SUB_SRC:v[0-9]+]], int64_t [[SUB_OFF:v[0-9]+]]) -// A3-DAG: const int64_t [[SUB_ONE:v[0-9]+]] = 1; -// A3-DAG: const int64_t [[SUB_ZERO:v[0-9]+]] = 0; +// A3-SAME: (__gm__ int8_t* [[SUB_SRC:[_A-Za-z][_A-Za-z0-9]*]], int64_t [[SUB_OFF:[_A-Za-z][_A-Za-z0-9]*]]) +// A3-DAG: const int64_t [[SUB_ONE:[_A-Za-z][_A-Za-z0-9]*]] = 1; +// A3-DAG: const int64_t [[SUB_ZERO:[_A-Za-z][_A-Za-z0-9]*]] = 0; // A3-NOT: unsigned // A3: GlobalTensor( +// CHECK-LABEL: AICORE void cube_kernel(__gm__ float* {{[_A-Za-z][_A-Za-z0-9]*}}, __gm__ float* {{[_A-Za-z][_A-Za-z0-9]*}}) +// CHECK: int64_t {{[_A-Za-z][_A-Za-z0-9]*}} = get_block_idx(); +// CHECK: __gm__ float* {{[_A-Za-z][_A-Za-z0-9]*}} = {{[_A-Za-z][_A-Za-z0-9]*}} + {{[_A-Za-z][_A-Za-z0-9]*}}; +// CHECK: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8, false>( -// CHECK-LABEL: AICORE void vector_kernel(__gm__ float* {{v[0-9]+}}, __gm__ float* {{v[0-9]+}}) -// CHECK: int64_t {{v[0-9]+}} = get_block_idx(); -// CHECK: __gm__ float* {{v[0-9]+}} = {{v[0-9]+}} + {{v[0-9]+}}; -// CHECK: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8, false>( +// CHECK-LABEL: AICORE void vector_kernel(__gm__ float* {{[_A-Za-z][_A-Za-z0-9]*}}, __gm__ float* {{[_A-Za-z][_A-Za-z0-9]*}}) +// CHECK: int64_t {{[_A-Za-z][_A-Za-z0-9]*}} = get_block_idx(); +// CHECK: __gm__ float* {{[_A-Za-z][_A-Za-z0-9]*}} = {{[_A-Za-z][_A-Za-z0-9]*}} + {{[_A-Za-z][_A-Za-z0-9]*}}; +// CHECK: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8, false>( // CHECK-NOT: pto.addptr diff --git a/test/lit/pto/issue481_addptr_gm_slot_buffer_gss.pto b/test/lit/pto/issue481_addptr_gm_slot_buffer_gss.pto index f012566068..ffd82412ae 100644 --- a/test/lit/pto/issue481_addptr_gm_slot_buffer_gss.pto +++ b/test/lit/pto/issue481_addptr_gm_slot_buffer_gss.pto @@ -34,13 +34,13 @@ module { } } -// CHECK-LABEL: AICORE void cube_kernel(__gm__ float* {{v[0-9]+}}, __gm__ float* {{v[0-9]+}}) -// CHECK: int64_t {{v[0-9]+}} = get_block_idx(); -// CHECK: __gm__ float* {{v[0-9]+}} = {{v[0-9]+}} + {{v[0-9]+}}; -// CHECK: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8, false>( +// CHECK-LABEL: AICORE void cube_kernel(__gm__ float* {{[_A-Za-z][_A-Za-z0-9]*}}, __gm__ float* {{[_A-Za-z][_A-Za-z0-9]*}}) +// CHECK: int64_t {{[_A-Za-z][_A-Za-z0-9]*}} = get_block_idx(); +// CHECK: __gm__ float* {{[_A-Za-z][_A-Za-z0-9]*}} = {{[_A-Za-z][_A-Za-z0-9]*}} + {{[_A-Za-z][_A-Za-z0-9]*}}; +// CHECK: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8, false>( -// CHECK-LABEL: AICORE void vector_kernel(__gm__ float* {{v[0-9]+}}, __gm__ float* {{v[0-9]+}}) -// CHECK: int64_t {{v[0-9]+}} = get_block_idx(); -// CHECK: __gm__ float* {{v[0-9]+}} = {{v[0-9]+}} + {{v[0-9]+}}; -// CHECK: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8, false>( +// CHECK-LABEL: AICORE void vector_kernel(__gm__ float* {{[_A-Za-z][_A-Za-z0-9]*}}, __gm__ float* {{[_A-Za-z][_A-Za-z0-9]*}}) +// CHECK: int64_t {{[_A-Za-z][_A-Za-z0-9]*}} = get_block_idx(); +// CHECK: __gm__ float* {{[_A-Za-z][_A-Za-z0-9]*}} = {{[_A-Za-z][_A-Za-z0-9]*}} + {{[_A-Za-z][_A-Za-z0-9]*}}; +// CHECK: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8, false>( // CHECK-NOT: pto.addptr diff --git a/test/lit/pto/issue556_tpop_live_values_no_alias.pto b/test/lit/pto/issue556_tpop_live_values_no_alias.pto index ddd179e75b..d77e49f27c 100644 --- a/test/lit/pto/issue556_tpop_live_values_no_alias.pto +++ b/test/lit/pto/issue556_tpop_live_values_no_alias.pto @@ -57,11 +57,11 @@ module { } // CHECK-LABEL: AICORE void issue556_tpop_live_values_no_alias( -// CHECK: Tile [[POP0:v[0-9]+]]; -// CHECK: Tile [[POP0_COPY:v[0-9]+]] = [[POP0]]; -// CHECK: TPOP, Tile, TileSplitAxis::TILE_UP_DOWN>({{v[0-9]+}}, [[POP0_COPY]]); -// CHECK: Tile [[POP1:v[0-9]+]]; -// CHECK: Tile [[POP1_COPY:v[0-9]+]] = [[POP1]]; -// CHECK: TPOP, Tile, TileSplitAxis::TILE_UP_DOWN>({{v[0-9]+}}, [[POP1_COPY]]); -// CHECK: TMOV({{v[0-9]+}}, [[POP0_COPY]]); -// CHECK: TMOV({{v[0-9]+}}, [[POP1_COPY]]); +// CHECK: Tile [[POP0:[_A-Za-z][_A-Za-z0-9]*]]; +// CHECK: Tile [[POP0_COPY:[_A-Za-z][_A-Za-z0-9]*]] = [[POP0]]; +// CHECK: TPOP, Tile, TileSplitAxis::TILE_UP_DOWN>({{[_A-Za-z][_A-Za-z0-9]*}}, [[POP0_COPY]]); +// CHECK: Tile [[POP1:[_A-Za-z][_A-Za-z0-9]*]]; +// CHECK: Tile [[POP1_COPY:[_A-Za-z][_A-Za-z0-9]*]] = [[POP1]]; +// CHECK: TPOP, Tile, TileSplitAxis::TILE_UP_DOWN>({{[_A-Za-z][_A-Za-z0-9]*}}, [[POP1_COPY]]); +// CHECK: TMOV({{[_A-Za-z][_A-Za-z0-9]*}}, [[POP0_COPY]]); +// CHECK: TMOV({{[_A-Za-z][_A-Za-z0-9]*}}, [[POP1_COPY]]); diff --git a/test/lit/pto/issue660_trowexpandmul_set_validshape_preserves_alloc_valid.pto b/test/lit/pto/issue660_trowexpandmul_set_validshape_preserves_alloc_valid.pto index a7b5e8c51e..fe6737672c 100644 --- a/test/lit/pto/issue660_trowexpandmul_set_validshape_preserves_alloc_valid.pto +++ b/test/lit/pto/issue660_trowexpandmul_set_validshape_preserves_alloc_valid.pto @@ -39,9 +39,9 @@ module { } } -// CHECK-DAG: const int64_t [[ROW:v[0-9]+]] = 16; -// CHECK-DAG: const int64_t [[FULL_COL:v[0-9]+]] = 256; -// CHECK: Tile [[DST:v[0-9]+]] = Tile([[ROW]], [[FULL_COL]]); +// CHECK-DAG: const int64_t [[ROW:[_A-Za-z][_A-Za-z0-9]*]] = 16; +// CHECK-DAG: const int64_t [[FULL_COL:[_A-Za-z][_A-Za-z0-9]*]] = 256; +// CHECK: Tile [[DST:[_A-Za-z][_A-Za-z0-9]*]] = Tile([[ROW]], [[FULL_COL]]); // CHECK: TROWEXPANDMUL([[DST]], // CHECK: [[DST]].SetValidShape([[ROW]], [[ROW]]); -// CHECK: TFILLPAD({{v[0-9]+}}, [[DST]]); +// CHECK: TFILLPAD({{[_A-Za-z][_A-Za-z0-9]*}}, [[DST]]); diff --git a/test/lit/pto/issue686_a5_tmov_treshape_dynamic_valid_shape.pto b/test/lit/pto/issue686_a5_tmov_treshape_dynamic_valid_shape.pto index a14b12d9a0..33062a44c2 100644 --- a/test/lit/pto/issue686_a5_tmov_treshape_dynamic_valid_shape.pto +++ b/test/lit/pto/issue686_a5_tmov_treshape_dynamic_valid_shape.pto @@ -26,18 +26,18 @@ module attributes {pto.target_arch = "a5"} { } } -// CHECK: Tile [[SRC_ORIG:v[0-9]+]] = Tile -// CHECK: Tile [[DST_ORIG:v[0-9]+]] = Tile -// CHECK: Tile [[SRC_STORAGE:v[0-9]+]]; -// CHECK-NEXT: Tile [[SRC:v[0-9]+]] = [[SRC_STORAGE]]; +// CHECK: Tile [[SRC_ORIG:[_A-Za-z][_A-Za-z0-9]*]] = Tile +// CHECK: Tile [[DST_ORIG:[_A-Za-z][_A-Za-z0-9]*]] = Tile +// CHECK: Tile [[SRC_STORAGE:[_A-Za-z][_A-Za-z0-9]*]]; +// CHECK-NEXT: Tile [[SRC:[_A-Za-z][_A-Za-z0-9]*]] = [[SRC_STORAGE]]; // CHECK-NEXT: TRESHAPE([[SRC]], [[SRC_ORIG]]); -// CHECK: Tile [[DST_STORAGE:v[0-9]+]]; -// CHECK-NEXT: Tile [[DST:v[0-9]+]] = [[DST_STORAGE]]; +// CHECK: Tile [[DST_STORAGE:[_A-Za-z][_A-Za-z0-9]*]]; +// CHECK-NEXT: Tile [[DST:[_A-Za-z][_A-Za-z0-9]*]] = [[DST_STORAGE]]; // CHECK-NEXT: TRESHAPE([[DST]], [[DST_ORIG]]); -// CHECK: int64_t [[SRC_ROW:v[0-9]+]] = [[SRC_ORIG]].GetValidRow(); -// CHECK-NEXT: int64_t [[SRC_COL:v[0-9]+]] = [[SRC_ORIG]].GetValidCol(); +// CHECK: int64_t [[SRC_ROW:[_A-Za-z][_A-Za-z0-9]*]] = [[SRC_ORIG]].GetValidRow(); +// CHECK-NEXT: int64_t [[SRC_COL:[_A-Za-z][_A-Za-z0-9]*]] = [[SRC_ORIG]].GetValidCol(); // CHECK-NEXT: [[SRC]].SetValidShape([[SRC_COL]], [[SRC_ROW]]); -// CHECK-NEXT: int64_t [[DST_ROW:v[0-9]+]] = [[DST_ORIG]].GetValidRow(); -// CHECK-NEXT: int64_t [[DST_COL:v[0-9]+]] = [[DST_ORIG]].GetValidCol(); +// CHECK-NEXT: int64_t [[DST_ROW:[_A-Za-z][_A-Za-z0-9]*]] = [[DST_ORIG]].GetValidRow(); +// CHECK-NEXT: int64_t [[DST_COL:[_A-Za-z][_A-Za-z0-9]*]] = [[DST_ORIG]].GetValidCol(); // CHECK-NEXT: [[DST]].SetValidShape([[DST_COL]], [[DST_ROW]]); // CHECK-NEXT: TMOV([[DST]], [[SRC]]); diff --git a/test/lit/pto/issue686_a5_tmov_treshape_dynamic_valid_shape_level2.pto b/test/lit/pto/issue686_a5_tmov_treshape_dynamic_valid_shape_level2.pto index e3c3864d05..5d4af8ba42 100644 --- a/test/lit/pto/issue686_a5_tmov_treshape_dynamic_valid_shape_level2.pto +++ b/test/lit/pto/issue686_a5_tmov_treshape_dynamic_valid_shape_level2.pto @@ -25,18 +25,18 @@ module attributes {pto.target_arch = "a5"} { } // CHECK-LABEL: AICORE void a5_tmov_treshape_dynamic_valid_shape_level2() -// CHECK: Tile [[SRC_ORIG:v[0-9]+]] -// CHECK: Tile [[DST_ORIG:v[0-9]+]] -// CHECK: Tile [[SRC_STORAGE:v[0-9]+]]; -// CHECK-NEXT: Tile [[SRC:v[0-9]+]] = [[SRC_STORAGE]]; +// CHECK: Tile [[SRC_ORIG:[_A-Za-z][_A-Za-z0-9]*]] +// CHECK: Tile [[DST_ORIG:[_A-Za-z][_A-Za-z0-9]*]] +// CHECK: Tile [[SRC_STORAGE:[_A-Za-z][_A-Za-z0-9]*]]; +// CHECK-NEXT: Tile [[SRC:[_A-Za-z][_A-Za-z0-9]*]] = [[SRC_STORAGE]]; // CHECK-NEXT: TRESHAPE([[SRC]], [[SRC_ORIG]]); -// CHECK: Tile [[DST_STORAGE:v[0-9]+]]; -// CHECK-NEXT: Tile [[DST:v[0-9]+]] = [[DST_STORAGE]]; +// CHECK: Tile [[DST_STORAGE:[_A-Za-z][_A-Za-z0-9]*]]; +// CHECK-NEXT: Tile [[DST:[_A-Za-z][_A-Za-z0-9]*]] = [[DST_STORAGE]]; // CHECK-NEXT: TRESHAPE([[DST]], [[DST_ORIG]]); -// CHECK: int64_t [[SRC_ROW:v[0-9]+]] = [[SRC_ORIG]].GetValidRow(); -// CHECK-NEXT: int64_t [[SRC_COL:v[0-9]+]] = [[SRC_ORIG]].GetValidCol(); +// CHECK: int64_t [[SRC_ROW:[_A-Za-z][_A-Za-z0-9]*]] = [[SRC_ORIG]].GetValidRow(); +// CHECK-NEXT: int64_t [[SRC_COL:[_A-Za-z][_A-Za-z0-9]*]] = [[SRC_ORIG]].GetValidCol(); // CHECK-NEXT: [[SRC]].SetValidShape([[SRC_COL]], [[SRC_ROW]]); -// CHECK-NEXT: int64_t [[DST_ROW:v[0-9]+]] = [[DST_ORIG]].GetValidRow(); -// CHECK-NEXT: int64_t [[DST_COL:v[0-9]+]] = [[DST_ORIG]].GetValidCol(); +// CHECK-NEXT: int64_t [[DST_ROW:[_A-Za-z][_A-Za-z0-9]*]] = [[DST_ORIG]].GetValidRow(); +// CHECK-NEXT: int64_t [[DST_COL:[_A-Za-z][_A-Za-z0-9]*]] = [[DST_ORIG]].GetValidCol(); // CHECK-NEXT: [[DST]].SetValidShape([[DST_COL]], [[DST_ROW]]); // CHECK-NEXT: TMOV([[DST]], [[SRC]]); diff --git a/test/lit/pto/issue696_scalar_gm_store_flush.pto b/test/lit/pto/issue696_scalar_gm_store_flush.pto index 66b92a60db..21fbf8fd53 100644 --- a/test/lit/pto/issue696_scalar_gm_store_flush.pto +++ b/test/lit/pto/issue696_scalar_gm_store_flush.pto @@ -55,7 +55,7 @@ module { } // CPP-LABEL: AICORE void scalar_gm_store_flush -// CPP-SAME: (__gm__ int32_t* [[DST0:v[0-9]+]], __gm__ int32_t* [[DST1:v[0-9]+]], __gm__ int32_t* [[SRC:v[0-9]+]], int64_t [[IDX:v[0-9]+]]) +// CPP-SAME: (__gm__ int32_t* [[DST0:[_A-Za-z][_A-Za-z0-9]*]], __gm__ int32_t* [[DST1:[_A-Za-z][_A-Za-z0-9]*]], __gm__ int32_t* [[SRC:[_A-Za-z][_A-Za-z0-9]*]], int64_t [[IDX:[_A-Za-z][_A-Za-z0-9]*]]) // CPP: [[DST0]][[[IDX]]] = // CPP: [[DST1]][{{.*}}] = // CPP: [[DST0]][{{.*}}] = @@ -66,7 +66,7 @@ module { // CPP: return; // CPP-LABEL: AICORE void scalar_gm_store_multi_exit -// CPP-SAME: (__gm__ int32_t* [[ME_DST:v[0-9]+]], __gm__ int32_t* [[ME_SRC:v[0-9]+]], int64_t [[ME_IDX:v[0-9]+]]) +// CPP-SAME: (__gm__ int32_t* [[ME_DST:[_A-Za-z][_A-Za-z0-9]*]], __gm__ int32_t* [[ME_SRC:[_A-Za-z][_A-Za-z0-9]*]], int64_t [[ME_IDX:[_A-Za-z][_A-Za-z0-9]*]]) // CPP: [[ME_DST]][[[ME_IDX]]] = // CPP: pipe_barrier(PIPE_ALL); // CPP-NEXT: dcci((__gm__ void*)0, ENTIRE_DATA_CACHE, CACHELINE_OUT); diff --git a/test/lit/pto/issue713_local_array_get_snapshot.pto b/test/lit/pto/issue713_local_array_get_snapshot.pto index c549c233d7..43a2c20ce2 100644 --- a/test/lit/pto/issue713_local_array_get_snapshot.pto +++ b/test/lit/pto/issue713_local_array_get_snapshot.pto @@ -37,8 +37,11 @@ module { // CPP: int32_t [[ARR:v[0-9]+]][2]; // CPP: [[ARR]][{{v[0-9]+}}] = {{v[0-9]+}}; // CPP: [[ARR]][{{v[0-9]+}}] = {{v[0-9]+}}; +// CPP: // pto: %cur // CPP: int32_t [[CUR:v[0-9]+]] = [[ARR]][{{v[0-9]+}}]; +// CPP: // pto: %new // CPP-NEXT: [[ARR]][{{v[0-9]+}}] = {{.*}}(uint32_t) [[CUR]]{{.*}}; // CPP-NOT: [[ARR]][ -// CPP: int32_t [[SUM:v[0-9]+]] = {{.*}}(uint32_t) [[CUR]]{{.*}}; +// CPP: // pto: %sum +// CPP-NEXT: int32_t [[SUM:v[0-9]+]] = {{.*}}(uint32_t) [[CUR]]{{.*}}; // CPP-NEXT: [[OUT]][{{v[0-9]+}}] = [[SUM]]; diff --git a/test/lit/pto/local_array_1d_emitc.pto b/test/lit/pto/local_array_1d_emitc.pto index c90943f247..13283ec15b 100644 --- a/test/lit/pto/local_array_1d_emitc.pto +++ b/test/lit/pto/local_array_1d_emitc.pto @@ -23,7 +23,7 @@ module { } // CHECK-LABEL: local_array_1d -// CHECK: int32_t [[A:v[0-9]+]][16]; -// CHECK: [[A]][{{v[0-9]+}}] = {{v[0-9]+}}; -// CHECK: int32_t [[R:v[0-9]+]] = [[A]][{{v[0-9]+}}]; -// CHECK: [[A]][{{v[0-9]+}}] = [[R]]; +// CHECK: int32_t [[A:[_A-Za-z][_A-Za-z0-9]*]][16]; +// CHECK: [[A]][{{[_A-Za-z][_A-Za-z0-9]*}}] = {{[_A-Za-z][_A-Za-z0-9]*}}; +// CHECK: int32_t [[R:[_A-Za-z][_A-Za-z0-9]*]] = [[A]][{{[_A-Za-z][_A-Za-z0-9]*}}]; +// CHECK: [[A]][{{[_A-Za-z][_A-Za-z0-9]*}}] = [[R]]; diff --git a/test/lit/pto/local_array_2d_emitc.pto b/test/lit/pto/local_array_2d_emitc.pto index 584ed6efa7..d09f27fe53 100644 --- a/test/lit/pto/local_array_2d_emitc.pto +++ b/test/lit/pto/local_array_2d_emitc.pto @@ -23,7 +23,7 @@ module { } // CHECK-LABEL: local_array_2d -// CHECK: float [[M:v[0-9]+]][8][8]; -// CHECK: [[M]][{{v[0-9]+}}][{{v[0-9]+}}] = {{v[0-9]+}}; -// CHECK: float [[R:v[0-9]+]] = [[M]][{{v[0-9]+}}][{{v[0-9]+}}]; -// CHECK: [[M]][{{v[0-9]+}}][{{v[0-9]+}}] = [[R]]; +// CHECK: float [[M:[_A-Za-z][_A-Za-z0-9]*]][8][8]; +// CHECK: [[M]][{{[_A-Za-z][_A-Za-z0-9]*}}][{{[_A-Za-z][_A-Za-z0-9]*}}] = {{[_A-Za-z][_A-Za-z0-9]*}}; +// CHECK: float [[R:[_A-Za-z][_A-Za-z0-9]*]] = [[M]][{{[_A-Za-z][_A-Za-z0-9]*}}][{{[_A-Za-z][_A-Za-z0-9]*}}]; +// CHECK: [[M]][{{[_A-Za-z][_A-Za-z0-9]*}}][{{[_A-Za-z][_A-Za-z0-9]*}}] = [[R]]; diff --git a/test/lit/pto/local_array_get_rvalue_emitc.pto b/test/lit/pto/local_array_get_rvalue_emitc.pto index 132e14ca47..e1b2ca7f30 100644 --- a/test/lit/pto/local_array_get_rvalue_emitc.pto +++ b/test/lit/pto/local_array_get_rvalue_emitc.pto @@ -23,6 +23,6 @@ module { } // CHECK-LABEL: local_array_get_rvalue -// CHECK: int32_t [[A:v[0-9]+]][16]; -// CHECK: int32_t [[R:v[0-9]+]] = [[A]][{{v[0-9]+}}]; -// CHECK: [[A]][{{v[0-9]+}}] = {{.*}}(uint32_t) [[R]]{{.*}}; +// CHECK: int32_t [[A:[_A-Za-z][_A-Za-z0-9]*]][16]; +// CHECK: int32_t [[R:[_A-Za-z][_A-Za-z0-9]*]] = [[A]][{{[_A-Za-z][_A-Za-z0-9]*}}]; +// CHECK: [[A]][{{[_A-Za-z][_A-Za-z0-9]*}}] = {{.*}}[[R]]{{.*}}; diff --git a/test/lit/pto/mgather_mscatter_a5_attrs_emitc.pto b/test/lit/pto/mgather_mscatter_a5_attrs_emitc.pto index e15e4c82b0..7c50049378 100644 --- a/test/lit/pto/mgather_mscatter_a5_attrs_emitc.pto +++ b/test/lit/pto/mgather_mscatter_a5_attrs_emitc.pto @@ -87,13 +87,13 @@ module attributes {pto.target_arch = "a5"} { } // CHECK-LABEL: AICORE void mgather_emitc -// CHECK: TLOAD({{v[0-9]+}}, {{v[0-9]+}}); -// CHECK-DAG: __ubuf__ int32_t* [[MGATHER_IDX:v[0-9]+]] = {{.*}}; -// CHECK-DAG: __ubuf__ float* [[MGATHER_DST:v[0-9]+]] = {{.*}}; -// CHECK: MGATHER([[MGATHER_DST]], {{v[0-9]+}}, [[MGATHER_IDX]]); +// CHECK: TLOAD({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); +// CHECK-DAG: __ubuf__ int32_t* [[MGATHER_IDX:[_A-Za-z][_A-Za-z0-9]*]] = {{.*}}; +// CHECK-DAG: __ubuf__ float* [[MGATHER_DST:[_A-Za-z][_A-Za-z0-9]*]] = {{.*}}; +// CHECK: MGATHER([[MGATHER_DST]], {{[_A-Za-z][_A-Za-z0-9]*}}, [[MGATHER_IDX]]); // CHECK-LABEL: AICORE void mscatter_emitc -// CHECK: TLOAD({{v[0-9]+}}, {{v[0-9]+}}); -// CHECK: TLOAD({{v[0-9]+}}, {{v[0-9]+}}); -// CHECK-DAG: __ubuf__ float* [[MSCATTER_SRC:v[0-9]+]] = {{.*}}; -// CHECK-DAG: __ubuf__ int32_t* [[MSCATTER_IDX:v[0-9]+]] = {{.*}}; -// CHECK: MSCATTER({{v[0-9]+}}, [[MSCATTER_SRC]], [[MSCATTER_IDX]]); +// CHECK: TLOAD({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); +// CHECK: TLOAD({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); +// CHECK-DAG: __ubuf__ float* [[MSCATTER_SRC:[_A-Za-z][_A-Za-z0-9]*]] = {{.*}}; +// CHECK-DAG: __ubuf__ int32_t* [[MSCATTER_IDX:[_A-Za-z][_A-Za-z0-9]*]] = {{.*}}; +// CHECK: MSCATTER({{[_A-Za-z][_A-Za-z0-9]*}}, [[MSCATTER_SRC]], [[MSCATTER_IDX]]); diff --git a/test/lit/pto/mgather_mscatter_base_a3_emitc.pto b/test/lit/pto/mgather_mscatter_base_a3_emitc.pto index 281c4aad8d..ac413fa848 100644 --- a/test/lit/pto/mgather_mscatter_base_a3_emitc.pto +++ b/test/lit/pto/mgather_mscatter_base_a3_emitc.pto @@ -63,11 +63,11 @@ module attributes {pto.target_arch = "a3"} { } // CHECK-LABEL: AICORE void mgather_emitc_base( -// CHECK: __ubuf__ int32_t* {{v[0-9]+}} = {{.*}}; -// CHECK: __ubuf__ float* [[MGATHER_BASE_DST:v[0-9]+]] = {{.*}}; -// CHECK: MGATHER([[MGATHER_BASE_DST]], {{v[0-9]+}}, {{v[0-9]+}}); +// CHECK: __ubuf__ int32_t* [[MGATHER_IDX:[_A-Za-z][_A-Za-z0-9]*]] = {{[_A-Za-z][_A-Za-z0-9]*}}.data(); +// CHECK: __ubuf__ float* [[MGATHER_DST:[_A-Za-z][_A-Za-z0-9]*]] = {{[_A-Za-z][_A-Za-z0-9]*}}.data(); +// CHECK: MGATHER([[MGATHER_DST]], {{[_A-Za-z][_A-Za-z0-9]*}}, [[MGATHER_IDX]]); // CHECK-LABEL: AICORE void mscatter_emitc_base( -// CHECK: __ubuf__ float* [[MSCATTER_BASE_SRC:v[0-9]+]] = {{.*}}; -// CHECK: __ubuf__ int32_t* [[MSCATTER_BASE_IDX:v[0-9]+]] = {{.*}}; -// CHECK: MSCATTER({{v[0-9]+}}, [[MSCATTER_BASE_SRC]], [[MSCATTER_BASE_IDX]]); +// CHECK: __ubuf__ float* [[MSCATTER_SRC:[_A-Za-z][_A-Za-z0-9]*]] = {{[_A-Za-z][_A-Za-z0-9]*}}.data(); +// CHECK: __ubuf__ int32_t* [[MSCATTER_IDX:[_A-Za-z][_A-Za-z0-9]*]] = {{[_A-Za-z][_A-Za-z0-9]*}}.data(); +// CHECK: MSCATTER({{[_A-Za-z][_A-Za-z0-9]*}}, [[MSCATTER_SRC]], [[MSCATTER_IDX]]); diff --git a/test/lit/pto/multi_tile_const_preload_dyn_loop_select.pto b/test/lit/pto/multi_tile_const_preload_dyn_loop_select.pto index eab2597534..c2a1b19d2d 100644 --- a/test/lit/pto/multi_tile_const_preload_dyn_loop_select.pto +++ b/test/lit/pto/multi_tile_const_preload_dyn_loop_select.pto @@ -75,8 +75,11 @@ module { // EMITC: AICORE void const_preload_dyn_loop_select // EMITC: for ( -// EMITC-NEXT: Tile [[TILE_STORAGE:v[0-9]+]]; -// EMITC-NEXT: Tile [[TILE:v[0-9]+]] = [[TILE_STORAGE]]; -// EMITC-NEXT: uint64_t [[ADDR:v[0-9]+]] = (uint64_t) +// EMITC: // pto: %slot +// EMITC-NEXT: Tile [[TILE_STORAGE:[_A-Za-z][_A-Za-z0-9]*]]; +// EMITC: // pto: %slot +// EMITC-NEXT: Tile [[TILE:[_A-Za-z][_A-Za-z0-9]*]] = [[TILE_STORAGE]]; +// EMITC: // pto: +// EMITC-NEXT: uint64_t [[ADDR:[_A-Za-z][_A-Za-z0-9]*]] = (uint64_t) // EMITC-NEXT: TASSIGN([[TILE]], [[ADDR]]); // EMITC: TLOAD([[TILE]], diff --git a/test/lit/pto/multi_tile_preload_loop_set_wait.pto b/test/lit/pto/multi_tile_preload_loop_set_wait.pto index 339b3b6466..8a8f4cce15 100644 --- a/test/lit/pto/multi_tile_preload_loop_set_wait.pto +++ b/test/lit/pto/multi_tile_preload_loop_set_wait.pto @@ -49,12 +49,12 @@ module { } // CHECK-LABEL: AICORE void preload_and_loop_set_wait -// CHECK: TLOAD([[PRE:v[0-9]+]], +// CHECK: TLOAD([[PRE:[_A-Za-z][_A-Za-z0-9]*]], // CHECK: set_flag(PIPE_MTE2, PIPE_MTE3, EVENT_ID[[PRE_EID:[0-9]+]]); // CHECK-NEXT: wait_flag(PIPE_MTE2, PIPE_MTE3, EVENT_ID[[PRE_EID]]); -// CHECK: TSTORE({{v[0-9]+}}, [[PRE]]); +// CHECK: TSTORE({{[_A-Za-z][_A-Za-z0-9]*}}, [[PRE]]); // CHECK: for ( -// CHECK: TLOAD([[LOOP:v[0-9]+]], +// CHECK: TLOAD([[LOOP:[_A-Za-z][_A-Za-z0-9]*]], // CHECK: set_flag(PIPE_MTE2, PIPE_MTE3, EVENT_ID[[LOOP_EID:[0-9]+]]); // CHECK-NEXT: wait_flag(PIPE_MTE2, PIPE_MTE3, EVENT_ID[[LOOP_EID]]); -// CHECK: TSTORE({{v[0-9]+}}, [[LOOP]]); +// CHECK: TSTORE({{[_A-Za-z][_A-Za-z0-9]*}}, [[LOOP]]); diff --git a/test/lit/pto/ptr_int_cast.pto b/test/lit/pto/ptr_int_cast.pto index a3fa70087e..bba4f125dd 100644 --- a/test/lit/pto/ptr_int_cast.pto +++ b/test/lit/pto/ptr_int_cast.pto @@ -33,10 +33,10 @@ module { // IR-NOT: !pto.ptr // CPP-LABEL: AICORE void ptr_int_cast_kernel -// CPP-SAME: (__gm__ uint64_t* [[SRC:v[0-9]+]], __gm__ uint32_t* [[DST:v[0-9]+]], int64_t [[IDX:v[0-9]+]]) -// CPP: int64_t [[ADDR:v[0-9]+]] = reinterpret_cast([[SRC]]); -// CPP: __gm__ uint32_t* [[SRC32:v[0-9]+]] = reinterpret_cast<__gm__ uint32_t*>({{.*}}[[ADDR]] -// CPP: uint32_t [[VAL:v[0-9]+]] = [[SRC32]][{{.*}}]; +// CPP-SAME: (__gm__ uint64_t* [[SRC:[_A-Za-z][_A-Za-z0-9]*]], __gm__ uint32_t* [[DST:[_A-Za-z][_A-Za-z0-9]*]], int64_t [[IDX:[_A-Za-z][_A-Za-z0-9]*]]) +// CPP: int64_t [[ADDR:[_A-Za-z][_A-Za-z0-9]*]] = reinterpret_cast([[SRC]]); +// CPP: __gm__ uint32_t* [[SRC32:[_A-Za-z][_A-Za-z0-9]*]] = reinterpret_cast<__gm__ uint32_t*>({{.*}}[[ADDR]] +// CPP: uint32_t [[VAL:[_A-Za-z][_A-Za-z0-9]*]] = [[SRC32]][{{.*}}]; // CPP: [[DST]][[[IDX]]] = [[VAL]]; // IR-LABEL: func.func @ptrtoint_addptr_multi_consumer @@ -49,11 +49,11 @@ module { // IR-NOT: !pto.ptr // CPP-LABEL: AICORE void ptrtoint_addptr_multi_consumer -// CPP-SAME: (__gm__ uint64_t* [[SRC64:v[0-9]+]], __gm__ uint64_t* [[DST64:v[0-9]+]], __gm__ int64_t* [[DSTADDR:v[0-9]+]], int64_t [[IDX2:v[0-9]+]]) -// CPP: const int64_t [[ZERO:v[0-9]+]] = 0; -// CPP: const int64_t [[ELEM_BYTES:v[0-9]+]] = 8; -// CPP: uint64_t [[A:v[0-9]+]] = [[SRC64]][[[IDX2]]]; -// CPP: int64_t [[ADDR2:v[0-9]+]] = reinterpret_cast([[SRC64]]); -// CPP: int64_t [[BYTE_ADDR:v[0-9]+]] = {{.*}}[[ADDR2]]{{.*}}[[IDX2]]{{.*}}[[ELEM_BYTES]] +// CPP-SAME: (__gm__ uint64_t* [[SRC64:[_A-Za-z][_A-Za-z0-9]*]], __gm__ uint64_t* [[DST64:[_A-Za-z][_A-Za-z0-9]*]], __gm__ int64_t* [[DSTADDR:[_A-Za-z][_A-Za-z0-9]*]], int64_t [[IDX2:[_A-Za-z][_A-Za-z0-9]*]]) +// CPP: const int64_t [[ZERO:[_A-Za-z][_A-Za-z0-9]*]] = 0; +// CPP: const int64_t [[ELEM_BYTES:[_A-Za-z][_A-Za-z0-9]*]] = 8; +// CPP: uint64_t [[A:[_A-Za-z][_A-Za-z0-9]*]] = [[SRC64]][[[IDX2]]]; +// CPP: int64_t [[ADDR2:[_A-Za-z][_A-Za-z0-9]*]] = reinterpret_cast([[SRC64]]); +// CPP: int64_t [[BYTE_ADDR:[_A-Za-z][_A-Za-z0-9]*]] = {{.*}}[[ADDR2]]{{.*}}[[IDX2]]{{.*}}[[ELEM_BYTES]] // CPP: [[DST64]][[[ZERO]]] = [[A]]; // CPP: [[DSTADDR]][[[ZERO]]] = [[BYTE_ADDR]]; diff --git a/test/lit/pto/set_validshape_if.pto b/test/lit/pto/set_validshape_if.pto index f9006b1db1..dce9c775b7 100644 --- a/test/lit/pto/set_validshape_if.pto +++ b/test/lit/pto/set_validshape_if.pto @@ -31,12 +31,12 @@ module { } } -// CHECK-DAG: const int64_t [[C32:v[0-9]+]] = 32; -// CHECK: Tile [[TILE:v[0-9]+]] = Tile([[C32]], [[C32]]); +// CHECK-DAG: const int64_t [[C32:[_A-Za-z][_A-Za-z0-9]*]] = 32; +// CHECK: Tile [[TILE:[_A-Za-z][_A-Za-z0-9]*]] = Tile([[C32]], [[C32]]); // CHECK: TASSIGN([[TILE]], // CHECK: if ( -// CHECK: [[TILE]].SetValidShape([[ROW1:v[0-9]+]], [[COL1:v[0-9]+]]) +// CHECK: [[TILE]].SetValidShape([[ROW1:[_A-Za-z][_A-Za-z0-9]*]], [[COL1:[_A-Za-z][_A-Za-z0-9]*]]) // CHECK: } else { -// CHECK: [[TILE]].SetValidShape([[ROW2:v[0-9]+]], [[COL2:v[0-9]+]]) +// CHECK: [[TILE]].SetValidShape([[ROW2:[_A-Za-z][_A-Za-z0-9]*]], [[COL2:[_A-Za-z][_A-Za-z0-9]*]]) // CHECK: } // CHECK: TADD([[TILE]], [[TILE]], [[TILE]]); diff --git a/test/lit/pto/set_validshape_local_lowering.pto b/test/lit/pto/set_validshape_local_lowering.pto index 5d79b33f2d..dacefc01e1 100644 --- a/test/lit/pto/set_validshape_local_lowering.pto +++ b/test/lit/pto/set_validshape_local_lowering.pto @@ -15,7 +15,7 @@ module { } } -// CHECK-DAG: const int64_t [[C32:v[0-9]+]] = 32; -// CHECK: Tile [[TILE:v[0-9]+]] = Tile([[C32]], [[C32]]); -// CHECK: TASSIGN([[TILE]], [[ADDR:v[0-9]+]]); -// CHECK: [[TILE]].SetValidShape([[ROW:v[0-9]+]], [[COL:v[0-9]+]]) +// CHECK-DAG: const int64_t [[C32:[_A-Za-z][_A-Za-z0-9]*]] = 32; +// CHECK: Tile [[TILE:[_A-Za-z][_A-Za-z0-9]*]] = Tile([[C32]], [[C32]]); +// CHECK: TASSIGN([[TILE]], [[ADDR:[_A-Za-z][_A-Za-z0-9]*]]); +// CHECK: [[TILE]].SetValidShape([[ROW:[_A-Za-z][_A-Za-z0-9]*]], [[COL:[_A-Za-z][_A-Za-z0-9]*]]) diff --git a/test/lit/pto/tassign_level3_loop_rebind.pto b/test/lit/pto/tassign_level3_loop_rebind.pto index 4246386de7..7c6f18b462 100644 --- a/test/lit/pto/tassign_level3_loop_rebind.pto +++ b/test/lit/pto/tassign_level3_loop_rebind.pto @@ -35,8 +35,8 @@ module { } // CHECK-LABEL: __global__ AICORE void tassign_loop_rebind() { -// CHECK: Tile [[T_STORAGE:v[0-9]+]]; -// CHECK: Tile [[T:v[0-9]+]] = [[T_STORAGE]]; +// CHECK: Tile [[T_STORAGE:[_A-Za-z][_A-Za-z0-9]*]]; +// CHECK: Tile [[T:[_A-Za-z][_A-Za-z0-9]*]] = [[T_STORAGE]]; // CHECK: for ( // CHECK: TASSIGN([[T]], // CHECK: TPRINT{{(<.*>)?}}([[T]]); diff --git a/test/lit/pto/tassign_level3_loop_rebind_gss.pto b/test/lit/pto/tassign_level3_loop_rebind_gss.pto index 7a5f5affa1..8684bddfab 100644 --- a/test/lit/pto/tassign_level3_loop_rebind_gss.pto +++ b/test/lit/pto/tassign_level3_loop_rebind_gss.pto @@ -35,8 +35,8 @@ module { } // CHECK-LABEL: __global__ AICORE void tassign_loop_rebind() { -// CHECK: Tile [[T_STORAGE:v[0-9]+]]; -// CHECK: Tile [[T:v[0-9]+]] = [[T_STORAGE]]; +// CHECK: Tile [[T_STORAGE:[_A-Za-z][_A-Za-z0-9]*]]; +// CHECK: Tile [[T:[_A-Za-z][_A-Za-z0-9]*]] = [[T_STORAGE]]; // CHECK: for ( // CHECK: TASSIGN([[T]], // CHECK: TPRINT{{(<.*>)?}}([[T]]); diff --git a/test/lit/pto/tcolexpanddiv_precision_emitc.pto b/test/lit/pto/tcolexpanddiv_precision_emitc.pto index ea46d994f3..77d55203fc 100644 --- a/test/lit/pto/tcolexpanddiv_precision_emitc.pto +++ b/test/lit/pto/tcolexpanddiv_precision_emitc.pto @@ -15,5 +15,5 @@ module { } } -// A5: TCOLEXPANDDIV([[VDST:v[0-9]+]], [[VSRC0:v[0-9]+]], [[VSRC1:v[0-9]+]]); +// A5: TCOLEXPANDDIV([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC0:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC1:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TCOLEXPANDDIV([[VDST]], [[VSRC0]], [[VSRC1]]); diff --git a/test/lit/pto/tdiv_precision_emitc.pto b/test/lit/pto/tdiv_precision_emitc.pto index 16aad222d5..fc9faaf8fb 100644 --- a/test/lit/pto/tdiv_precision_emitc.pto +++ b/test/lit/pto/tdiv_precision_emitc.pto @@ -17,5 +17,5 @@ module { } } -// A5: TDIV([[VDST:v[0-9]+]], [[VSRC0:v[0-9]+]], [[VSRC1:v[0-9]+]]); +// A5: TDIV([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC0:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC1:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TDIV([[VDST]], [[VSRC0]], [[VSRC1]]); diff --git a/test/lit/pto/tdivs_dual_order_emitc.pto b/test/lit/pto/tdivs_dual_order_emitc.pto index e036a1be8c..84c70d0d79 100644 --- a/test/lit/pto/tdivs_dual_order_emitc.pto +++ b/test/lit/pto/tdivs_dual_order_emitc.pto @@ -16,5 +16,5 @@ module { } } -// A3: TDIVS([[VDST:v[0-9]+]], [[VSRC:v[0-9]+]], [[VSCALAR:v[0-9]+]]); +// A3: TDIVS([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC:[_A-Za-z][_A-Za-z0-9]*]], [[VSCALAR:[_A-Za-z][_A-Za-z0-9]*]]); // A3: TDIVS([[VDST]], [[VSCALAR]], [[VSRC]]); diff --git a/test/lit/pto/tdivs_dual_order_emitc_gss.pto b/test/lit/pto/tdivs_dual_order_emitc_gss.pto index bba9b3f842..716e075307 100644 --- a/test/lit/pto/tdivs_dual_order_emitc_gss.pto +++ b/test/lit/pto/tdivs_dual_order_emitc_gss.pto @@ -16,5 +16,5 @@ module { } } -// A3: TDIVS([[VDST:v[0-9]+]], [[VSRC:v[0-9]+]], [[VSCALAR:v[0-9]+]]); +// A3: TDIVS([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC:[_A-Za-z][_A-Za-z0-9]*]], [[VSCALAR:[_A-Za-z][_A-Za-z0-9]*]]); // A3: TDIVS([[VDST]], [[VSCALAR]], [[VSRC]]); diff --git a/test/lit/pto/texp_precision_emitc.pto b/test/lit/pto/texp_precision_emitc.pto index 68f3001157..27a4fda783 100644 --- a/test/lit/pto/texp_precision_emitc.pto +++ b/test/lit/pto/texp_precision_emitc.pto @@ -14,5 +14,5 @@ module { } } -// A5: TEXP([[VDST:v[0-9]+]], [[VSRC:v[0-9]+]]); +// A5: TEXP([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TEXP([[VDST]], [[VSRC]]); diff --git a/test/lit/pto/tfmod_precision_emitc.pto b/test/lit/pto/tfmod_precision_emitc.pto index bbd94ebc23..f631d78772 100644 --- a/test/lit/pto/tfmod_precision_emitc.pto +++ b/test/lit/pto/tfmod_precision_emitc.pto @@ -15,5 +15,5 @@ module { } } -// A5: TFMOD([[VDST:v[0-9]+]], [[VSRC0:v[0-9]+]], [[VSRC1:v[0-9]+]]); +// A5: TFMOD([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC0:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC1:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TFMOD([[VDST]], [[VSRC0]], [[VSRC1]]); diff --git a/test/lit/pto/tgather_three_forms_emitc.pto b/test/lit/pto/tgather_three_forms_emitc.pto index 56a6c4128c..d96ff714ec 100644 --- a/test/lit/pto/tgather_three_forms_emitc.pto +++ b/test/lit/pto/tgather_three_forms_emitc.pto @@ -21,9 +21,9 @@ module { } } -// A3: TGATHER({{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}); +// A3: TGATHER({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); // A3: TGATHER, Tile, MaskPattern::P1111>( // A3-NOT: reinterpret_cast< // A3-NOT: TGATHER, Tile, Tile, Tile, CmpMode::EQ, 7>( // A3: TGATHER, Tile, Tile, Tile, CmpMode::EQ>( -// A3-SAME: {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}); +// A3-SAME: {{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); diff --git a/test/lit/pto/tgemv_accphase_emitc.pto b/test/lit/pto/tgemv_accphase_emitc.pto index a7aa3ce882..0123a17c4b 100644 --- a/test/lit/pto/tgemv_accphase_emitc.pto +++ b/test/lit/pto/tgemv_accphase_emitc.pto @@ -23,12 +23,12 @@ module attributes {pto.target_arch = "a5"} { } } -// A5: TGEMV([[D0:v[0-9]+]], [[L0:v[0-9]+]], [[R0:v[0-9]+]]); +// A5: TGEMV([[D0:[_A-Za-z][_A-Za-z0-9]*]], [[L0:[_A-Za-z][_A-Za-z0-9]*]], [[R0:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TGEMV([[D0]], [[L0]], [[R0]]); // A5: TGEMV([[D0]], [[L0]], [[R0]]); -// A5: TGEMV_ACC([[D1:v[0-9]+]], [[CIN:v[0-9]+]], [[L1:v[0-9]+]], [[R1:v[0-9]+]]); +// A5: TGEMV_ACC([[D1:[_A-Za-z][_A-Za-z0-9]*]], [[CIN:[_A-Za-z][_A-Za-z0-9]*]], [[L1:[_A-Za-z][_A-Za-z0-9]*]], [[R1:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TGEMV_ACC([[D1]], [[CIN]], [[L1]], [[R1]]); // A5: TGEMV_ACC([[D1]], [[CIN]], [[L1]], [[R1]]); -// A5: TGEMV_BIAS([[D2:v[0-9]+]], [[L2:v[0-9]+]], [[R2:v[0-9]+]], [[B2:v[0-9]+]]); +// A5: TGEMV_BIAS([[D2:[_A-Za-z][_A-Za-z0-9]*]], [[L2:[_A-Za-z][_A-Za-z0-9]*]], [[R2:[_A-Za-z][_A-Za-z0-9]*]], [[B2:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TGEMV_BIAS([[D2]], [[L2]], [[R2]], [[B2]]); // A5: TGEMV_BIAS([[D2]], [[L2]], [[R2]], [[B2]]); diff --git a/test/lit/pto/tgemv_mx_accphase_emitc.pto b/test/lit/pto/tgemv_mx_accphase_emitc.pto index 9f181a4535..9e77ca5690 100644 --- a/test/lit/pto/tgemv_mx_accphase_emitc.pto +++ b/test/lit/pto/tgemv_mx_accphase_emitc.pto @@ -27,10 +27,10 @@ module attributes {pto.target_arch = "a5"} { } } -// A5: TGEMV_MX([[D0:v[0-9]+]], [[A0:v[0-9]+]], [[AS0:v[0-9]+]], [[B0:v[0-9]+]], [[BS0:v[0-9]+]]); -// A5: TGEMV_MX([[D1:v[0-9]+]], [[A1:v[0-9]+]], [[AS1:v[0-9]+]], [[B1:v[0-9]+]], [[BS1:v[0-9]+]]); -// A5: TGEMV_MX([[D2:v[0-9]+]], [[A2:v[0-9]+]], [[AS2:v[0-9]+]], [[B2:v[0-9]+]], [[BS2:v[0-9]+]]); -// A5: TGEMV_MX([[D3:v[0-9]+]], [[C3:v[0-9]+]], [[A3:v[0-9]+]], [[AS3:v[0-9]+]], [[B3:v[0-9]+]], [[BS3:v[0-9]+]]); -// A5: TGEMV_MX([[D4:v[0-9]+]], [[C4:v[0-9]+]], [[A4:v[0-9]+]], [[AS4:v[0-9]+]], [[B4:v[0-9]+]], [[BS4:v[0-9]+]]); -// A5: TGEMV_MX([[D5:v[0-9]+]], [[C5:v[0-9]+]], [[A5:v[0-9]+]], [[AS5:v[0-9]+]], [[B5:v[0-9]+]], [[BS5:v[0-9]+]]); -// A5: TGEMV_MX([[D6:v[0-9]+]], [[A6:v[0-9]+]], [[AS6:v[0-9]+]], [[B6:v[0-9]+]], [[BS6:v[0-9]+]], [[BIAS6:v[0-9]+]]); +// A5: TGEMV_MX([[D0:[_A-Za-z][_A-Za-z0-9]*]], [[A0:[_A-Za-z][_A-Za-z0-9]*]], [[AS0:[_A-Za-z][_A-Za-z0-9]*]], [[B0:[_A-Za-z][_A-Za-z0-9]*]], [[BS0:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TGEMV_MX([[D1:[_A-Za-z][_A-Za-z0-9]*]], [[A1:[_A-Za-z][_A-Za-z0-9]*]], [[AS1:[_A-Za-z][_A-Za-z0-9]*]], [[B1:[_A-Za-z][_A-Za-z0-9]*]], [[BS1:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TGEMV_MX([[D2:[_A-Za-z][_A-Za-z0-9]*]], [[A2:[_A-Za-z][_A-Za-z0-9]*]], [[AS2:[_A-Za-z][_A-Za-z0-9]*]], [[B2:[_A-Za-z][_A-Za-z0-9]*]], [[BS2:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TGEMV_MX([[D3:[_A-Za-z][_A-Za-z0-9]*]], [[C3:[_A-Za-z][_A-Za-z0-9]*]], [[A3:[_A-Za-z][_A-Za-z0-9]*]], [[AS3:[_A-Za-z][_A-Za-z0-9]*]], [[B3:[_A-Za-z][_A-Za-z0-9]*]], [[BS3:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TGEMV_MX([[D4:[_A-Za-z][_A-Za-z0-9]*]], [[C4:[_A-Za-z][_A-Za-z0-9]*]], [[A4:[_A-Za-z][_A-Za-z0-9]*]], [[AS4:[_A-Za-z][_A-Za-z0-9]*]], [[B4:[_A-Za-z][_A-Za-z0-9]*]], [[BS4:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TGEMV_MX([[D5:[_A-Za-z][_A-Za-z0-9]*]], [[C5:[_A-Za-z][_A-Za-z0-9]*]], [[A5:[_A-Za-z][_A-Za-z0-9]*]], [[AS5:[_A-Za-z][_A-Za-z0-9]*]], [[B5:[_A-Za-z][_A-Za-z0-9]*]], [[BS5:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TGEMV_MX([[D6:[_A-Za-z][_A-Za-z0-9]*]], [[A6:[_A-Za-z][_A-Za-z0-9]*]], [[AS6:[_A-Za-z][_A-Za-z0-9]*]], [[B6:[_A-Za-z][_A-Za-z0-9]*]], [[BS6:[_A-Za-z][_A-Za-z0-9]*]], [[BIAS6:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/tgemv_mx_f8e5m2_emitc.pto b/test/lit/pto/tgemv_mx_f8e5m2_emitc.pto index 7a70f17c8c..bbc7bce964 100644 --- a/test/lit/pto/tgemv_mx_f8e5m2_emitc.pto +++ b/test/lit/pto/tgemv_mx_f8e5m2_emitc.pto @@ -13,4 +13,4 @@ module attributes {pto.target_arch = "a5"} { } } -// A5: TGEMV_MX([[D0:v[0-9]+]], [[A0:v[0-9]+]], [[AS0:v[0-9]+]], [[B0:v[0-9]+]], [[BS0:v[0-9]+]]); +// A5: TGEMV_MX([[D0:[_A-Za-z][_A-Za-z0-9]*]], [[A0:[_A-Za-z][_A-Za-z0-9]*]], [[AS0:[_A-Za-z][_A-Za-z0-9]*]], [[B0:[_A-Za-z][_A-Za-z0-9]*]], [[BS0:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/tgetval_level3_index_cast_regression.pto b/test/lit/pto/tgetval_level3_index_cast_regression.pto index 7ae49373ab..c6145c29cd 100644 --- a/test/lit/pto/tgetval_level3_index_cast_regression.pto +++ b/test/lit/pto/tgetval_level3_index_cast_regression.pto @@ -45,5 +45,5 @@ module { } // CHECK-LABEL: __global__ AICORE void tgetval_level3_index_cast_regression() { -// CHECK: int32_t [[ROW:v[0-9]+]] = [[IDX:v[0-9]+]].GetValue( +// CHECK: int32_t [[ROW:[_A-Za-z][_A-Za-z0-9]*]] = [[IDX:[_A-Za-z][_A-Za-z0-9]*]].GetValue( // CHECK: TINSERT( diff --git a/test/lit/pto/tile_compact_mode_emitc.pto b/test/lit/pto/tile_compact_mode_emitc.pto index b1ba469a6d..f03d81b79f 100644 --- a/test/lit/pto/tile_compact_mode_emitc.pto +++ b/test/lit/pto/tile_compact_mode_emitc.pto @@ -28,6 +28,6 @@ module { // A3-DAG: memref.alloc() : memref<1x16xf16, strided<[16, 1]>, #pto.address_space> // A3-DAG: memref.alloc() : memref<1x16xf16, strided<[16, 1]>, #pto.address_space> // A3-DAG: memref.alloc() : memref<1x16xf16, strided<[17, 1]>, #pto.address_space> -// A3: Tile [[DEFAULT:v[0-9]+]]; -// A3: Tile [[COMPACT:v[0-9]+]]; -// A3: Tile [[ROWP1:v[0-9]+]]; +// A3: Tile [[DEFAULT:[_A-Za-z][_A-Za-z0-9]*]]; +// A3: Tile [[COMPACT:[_A-Za-z][_A-Za-z0-9]*]]; +// A3: Tile [[ROWP1:[_A-Za-z][_A-Za-z0-9]*]]; diff --git a/test/lit/pto/tlog_precision_emitc.pto b/test/lit/pto/tlog_precision_emitc.pto index f50d1aefb6..22feab289c 100644 --- a/test/lit/pto/tlog_precision_emitc.pto +++ b/test/lit/pto/tlog_precision_emitc.pto @@ -14,5 +14,5 @@ module { } } -// A5: TLOG([[VDST:v[0-9]+]], [[VSRC:v[0-9]+]]); +// A5: TLOG([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TLOG([[VDST]], [[VSRC]]); diff --git a/test/lit/pto/tmatmul_acc_phase_emitc.pto b/test/lit/pto/tmatmul_acc_phase_emitc.pto index 41dad00490..ea66c663e3 100644 --- a/test/lit/pto/tmatmul_acc_phase_emitc.pto +++ b/test/lit/pto/tmatmul_acc_phase_emitc.pto @@ -48,7 +48,7 @@ module { } } -// A3: TMATMUL([[C:v[0-9]+]], [[A:v[0-9]+]], [[B:v[0-9]+]]); +// A3: TMATMUL([[C:[_A-Za-z][_A-Za-z0-9]*]], [[A:[_A-Za-z][_A-Za-z0-9]*]], [[B:[_A-Za-z][_A-Za-z0-9]*]]); // A3: TMATMUL([[C]], [[A]], [[B]]); // A3: TMATMUL([[C]], [[A]], [[B]]); // A3: TMATMUL_ACC([[C]], [[C]], [[A]], [[B]]); diff --git a/test/lit/pto/tmatmul_accphase_emitc.pto b/test/lit/pto/tmatmul_accphase_emitc.pto index ee02bb197b..ac1939a4ac 100644 --- a/test/lit/pto/tmatmul_accphase_emitc.pto +++ b/test/lit/pto/tmatmul_accphase_emitc.pto @@ -23,12 +23,12 @@ module attributes {pto.target_arch = "a5"} { } } -// A5: TMATMUL([[D0:v[0-9]+]], [[L0:v[0-9]+]], [[R0:v[0-9]+]]); +// A5: TMATMUL([[D0:[_A-Za-z][_A-Za-z0-9]*]], [[L0:[_A-Za-z][_A-Za-z0-9]*]], [[R0:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TMATMUL([[D0]], [[L0]], [[R0]]); // A5: TMATMUL([[D0]], [[L0]], [[R0]]); -// A5: TMATMUL_ACC([[D1:v[0-9]+]], [[CIN:v[0-9]+]], [[L1:v[0-9]+]], [[R1:v[0-9]+]]); +// A5: TMATMUL_ACC([[D1:[_A-Za-z][_A-Za-z0-9]*]], [[CIN:[_A-Za-z][_A-Za-z0-9]*]], [[L1:[_A-Za-z][_A-Za-z0-9]*]], [[R1:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TMATMUL_ACC([[D1]], [[CIN]], [[L1]], [[R1]]); // A5: TMATMUL_ACC([[D1]], [[CIN]], [[L1]], [[R1]]); -// A5: TMATMUL_BIAS([[D2:v[0-9]+]], [[L2:v[0-9]+]], [[R2:v[0-9]+]], [[B2:v[0-9]+]]); +// A5: TMATMUL_BIAS([[D2:[_A-Za-z][_A-Za-z0-9]*]], [[L2:[_A-Za-z][_A-Za-z0-9]*]], [[R2:[_A-Za-z][_A-Za-z0-9]*]], [[B2:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TMATMUL_BIAS([[D2]], [[L2]], [[R2]], [[B2]]); // A5: TMATMUL_BIAS([[D2]], [[L2]], [[R2]], [[B2]]); diff --git a/test/lit/pto/tmatmul_low_precision_emitc.pto b/test/lit/pto/tmatmul_low_precision_emitc.pto index fd754c3441..1a89b8e908 100644 --- a/test/lit/pto/tmatmul_low_precision_emitc.pto +++ b/test/lit/pto/tmatmul_low_precision_emitc.pto @@ -25,7 +25,7 @@ module attributes {pto.target_arch = "a5"} { } } -// A5: TMATMUL([[D0:v[0-9]+]], [[L0:v[0-9]+]], [[R0:v[0-9]+]]); -// A5: TMATMUL_ACC([[D1:v[0-9]+]], [[C1:v[0-9]+]], [[L1:v[0-9]+]], [[R1:v[0-9]+]]); -// A5: TMATMUL_BIAS([[D2:v[0-9]+]], [[L2:v[0-9]+]], [[R2:v[0-9]+]], [[B2:v[0-9]+]]); -// A5: TMATMUL([[D3:v[0-9]+]], [[L3:v[0-9]+]], [[R3:v[0-9]+]]); +// A5: TMATMUL([[D0:[_A-Za-z][_A-Za-z0-9]*]], [[L0:[_A-Za-z][_A-Za-z0-9]*]], [[R0:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TMATMUL_ACC([[D1:[_A-Za-z][_A-Za-z0-9]*]], [[C1:[_A-Za-z][_A-Za-z0-9]*]], [[L1:[_A-Za-z][_A-Za-z0-9]*]], [[R1:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TMATMUL_BIAS([[D2:[_A-Za-z][_A-Za-z0-9]*]], [[L2:[_A-Za-z][_A-Za-z0-9]*]], [[R2:[_A-Za-z][_A-Za-z0-9]*]], [[B2:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TMATMUL([[D3:[_A-Za-z][_A-Za-z0-9]*]], [[L3:[_A-Za-z][_A-Za-z0-9]*]], [[R3:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/tmatmul_mx_accphase_emitc.pto b/test/lit/pto/tmatmul_mx_accphase_emitc.pto index c1d4163567..baa7ca72eb 100644 --- a/test/lit/pto/tmatmul_mx_accphase_emitc.pto +++ b/test/lit/pto/tmatmul_mx_accphase_emitc.pto @@ -23,9 +23,9 @@ module attributes {pto.target_arch = "a5"} { } } -// A5: TMATMUL_MX([[D0:v[0-9]+]], [[A0:v[0-9]+]], [[AS0:v[0-9]+]], [[B0:v[0-9]+]], [[BS0:v[0-9]+]]); -// A5: TMATMUL_MX([[D1:v[0-9]+]], [[A1:v[0-9]+]], [[AS1:v[0-9]+]], [[B1:v[0-9]+]], [[BS1:v[0-9]+]]); -// A5: TMATMUL_MX([[D2:v[0-9]+]], [[A2:v[0-9]+]], [[AS2:v[0-9]+]], [[B2:v[0-9]+]], [[BS2:v[0-9]+]]); -// A5: TMATMUL_MX([[D3:v[0-9]+]], [[C3:v[0-9]+]], [[A3:v[0-9]+]], [[AS3:v[0-9]+]], [[B3:v[0-9]+]], [[BS3:v[0-9]+]]); -// A5: TMATMUL_MX([[D4:v[0-9]+]], [[C4:v[0-9]+]], [[A4:v[0-9]+]], [[AS4:v[0-9]+]], [[B4:v[0-9]+]], [[BS4:v[0-9]+]]); -// A5: TMATMUL_MX([[D5:v[0-9]+]], [[C5:v[0-9]+]], [[A5:v[0-9]+]], [[AS5:v[0-9]+]], [[B5:v[0-9]+]], [[BS5:v[0-9]+]]); +// A5: TMATMUL_MX([[D0:[_A-Za-z][_A-Za-z0-9]*]], [[A0:[_A-Za-z][_A-Za-z0-9]*]], [[AS0:[_A-Za-z][_A-Za-z0-9]*]], [[B0:[_A-Za-z][_A-Za-z0-9]*]], [[BS0:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TMATMUL_MX([[D1:[_A-Za-z][_A-Za-z0-9]*]], [[A1:[_A-Za-z][_A-Za-z0-9]*]], [[AS1:[_A-Za-z][_A-Za-z0-9]*]], [[B1:[_A-Za-z][_A-Za-z0-9]*]], [[BS1:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TMATMUL_MX([[D2:[_A-Za-z][_A-Za-z0-9]*]], [[A2:[_A-Za-z][_A-Za-z0-9]*]], [[AS2:[_A-Za-z][_A-Za-z0-9]*]], [[B2:[_A-Za-z][_A-Za-z0-9]*]], [[BS2:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TMATMUL_MX([[D3:[_A-Za-z][_A-Za-z0-9]*]], [[C3:[_A-Za-z][_A-Za-z0-9]*]], [[A3:[_A-Za-z][_A-Za-z0-9]*]], [[AS3:[_A-Za-z][_A-Za-z0-9]*]], [[B3:[_A-Za-z][_A-Za-z0-9]*]], [[BS3:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TMATMUL_MX([[D4:[_A-Za-z][_A-Za-z0-9]*]], [[C4:[_A-Za-z][_A-Za-z0-9]*]], [[A4:[_A-Za-z][_A-Za-z0-9]*]], [[AS4:[_A-Za-z][_A-Za-z0-9]*]], [[B4:[_A-Za-z][_A-Za-z0-9]*]], [[BS4:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TMATMUL_MX([[D5:[_A-Za-z][_A-Za-z0-9]*]], [[C5:[_A-Za-z][_A-Za-z0-9]*]], [[A5:[_A-Za-z][_A-Za-z0-9]*]], [[AS5:[_A-Za-z][_A-Za-z0-9]*]], [[B5:[_A-Za-z][_A-Za-z0-9]*]], [[BS5:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/tmatmul_mx_emitc.pto b/test/lit/pto/tmatmul_mx_emitc.pto index 78b016e9ce..487a629787 100644 --- a/test/lit/pto/tmatmul_mx_emitc.pto +++ b/test/lit/pto/tmatmul_mx_emitc.pto @@ -39,6 +39,6 @@ module { } } -// A5: TMATMUL_MX([[D0:v[0-9]+]], [[A0:v[0-9]+]], [[AS0:v[0-9]+]], [[B0:v[0-9]+]], [[BS0:v[0-9]+]]); -// A5: TMATMUL_MX([[D1:v[0-9]+]], [[C1:v[0-9]+]], [[A1:v[0-9]+]], [[AS1:v[0-9]+]], [[B1:v[0-9]+]], [[BS1:v[0-9]+]]); -// A5: TMATMUL_MX([[D2:v[0-9]+]], [[A2:v[0-9]+]], [[AS2:v[0-9]+]], [[B2:v[0-9]+]], [[BS2:v[0-9]+]], [[BIAS2:v[0-9]+]]); +// A5: TMATMUL_MX([[D0:[_A-Za-z][_A-Za-z0-9]*]], [[A0:[_A-Za-z][_A-Za-z0-9]*]], [[AS0:[_A-Za-z][_A-Za-z0-9]*]], [[B0:[_A-Za-z][_A-Za-z0-9]*]], [[BS0:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TMATMUL_MX([[D1:[_A-Za-z][_A-Za-z0-9]*]], [[C1:[_A-Za-z][_A-Za-z0-9]*]], [[A1:[_A-Za-z][_A-Za-z0-9]*]], [[AS1:[_A-Za-z][_A-Za-z0-9]*]], [[B1:[_A-Za-z][_A-Za-z0-9]*]], [[BS1:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TMATMUL_MX([[D2:[_A-Za-z][_A-Za-z0-9]*]], [[A2:[_A-Za-z][_A-Za-z0-9]*]], [[AS2:[_A-Za-z][_A-Za-z0-9]*]], [[B2:[_A-Za-z][_A-Za-z0-9]*]], [[BS2:[_A-Za-z][_A-Za-z0-9]*]], [[BIAS2:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/tmatmul_mx_f8e5m2_emitc.pto b/test/lit/pto/tmatmul_mx_f8e5m2_emitc.pto index e70247a558..95c9068ab1 100644 --- a/test/lit/pto/tmatmul_mx_f8e5m2_emitc.pto +++ b/test/lit/pto/tmatmul_mx_f8e5m2_emitc.pto @@ -12,4 +12,4 @@ module attributes {pto.target_arch = "a5"} { } } -// A5: TMATMUL_MX([[D0:v[0-9]+]], [[A0:v[0-9]+]], [[AS0:v[0-9]+]], [[B0:v[0-9]+]], [[BS0:v[0-9]+]]); +// A5: TMATMUL_MX([[D0:[_A-Za-z][_A-Za-z0-9]*]], [[A0:[_A-Za-z][_A-Za-z0-9]*]], [[AS0:[_A-Za-z][_A-Za-z0-9]*]], [[B0:[_A-Za-z][_A-Za-z0-9]*]], [[BS0:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/tmatmul_mx_low_precision_variants_emitc.pto b/test/lit/pto/tmatmul_mx_low_precision_variants_emitc.pto index 35e833adcf..e74114b48e 100644 --- a/test/lit/pto/tmatmul_mx_low_precision_variants_emitc.pto +++ b/test/lit/pto/tmatmul_mx_low_precision_variants_emitc.pto @@ -30,6 +30,6 @@ module attributes {pto.target_arch = "a5"} { } } -// A5: TMATMUL_MX([[D0:v[0-9]+]], [[A0:v[0-9]+]], [[AS0:v[0-9]+]], [[B0:v[0-9]+]], [[BS0:v[0-9]+]]); -// A5: TMATMUL_MX([[D1:v[0-9]+]], [[A1:v[0-9]+]], [[AS1:v[0-9]+]], [[B1:v[0-9]+]], [[BS1:v[0-9]+]]); -// A5: TMATMUL_MX([[D2:v[0-9]+]], [[A2:v[0-9]+]], [[AS2:v[0-9]+]], [[B2:v[0-9]+]], [[BS2:v[0-9]+]], [[BIAS2:v[0-9]+]]); +// A5: TMATMUL_MX([[D0:[_A-Za-z][_A-Za-z0-9]*]], [[A0:[_A-Za-z][_A-Za-z0-9]*]], [[AS0:[_A-Za-z][_A-Za-z0-9]*]], [[B0:[_A-Za-z][_A-Za-z0-9]*]], [[BS0:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TMATMUL_MX([[D1:[_A-Za-z][_A-Za-z0-9]*]], [[A1:[_A-Za-z][_A-Za-z0-9]*]], [[AS1:[_A-Za-z][_A-Za-z0-9]*]], [[B1:[_A-Za-z][_A-Za-z0-9]*]], [[BS1:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TMATMUL_MX([[D2:[_A-Za-z][_A-Za-z0-9]*]], [[A2:[_A-Za-z][_A-Za-z0-9]*]], [[AS2:[_A-Za-z][_A-Za-z0-9]*]], [[B2:[_A-Za-z][_A-Za-z0-9]*]], [[BS2:[_A-Za-z][_A-Za-z0-9]*]], [[BIAS2:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/tpow_emitc.pto b/test/lit/pto/tpow_emitc.pto index 51c25e0206..186ce1c57c 100644 --- a/test/lit/pto/tpow_emitc.pto +++ b/test/lit/pto/tpow_emitc.pto @@ -21,4 +21,4 @@ module { // A3: pto.tpow ins([[BASE:%[0-9]+]], [[EXP:%[0-9]+]], [[TMP:%[0-9]+]] : memref<1x16xf32 // A3: outs([[DST:%[0-9]+]] : memref<1x16xf32 -// A3: TPOW([[VDST:v[0-9]+]], [[VBASE:v[0-9]+]], [[VEXP:v[0-9]+]], [[VTMP:v[0-9]+]]); +// A3: TPOW([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VBASE:[_A-Za-z][_A-Za-z0-9]*]], [[VEXP:[_A-Za-z][_A-Za-z0-9]*]], [[VTMP:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/tpow_int_no_tmp.pto b/test/lit/pto/tpow_int_no_tmp.pto index d5741b0927..a1ad9b88d9 100644 --- a/test/lit/pto/tpow_int_no_tmp.pto +++ b/test/lit/pto/tpow_int_no_tmp.pto @@ -21,4 +21,4 @@ module { // A3: pto.tpow ins({{.*}}, {{.*}} : memref<1x16xi32 // A3-NOT: , {{.*}} : memref<1x16xi32{{[^,]*}}, memref<1x16xi32{{[^,]*}}, memref<1x16xi32 -// A3: TPOW([[VDST:v[0-9]+]], [[VBASE:v[0-9]+]], [[VEXP:v[0-9]+]]); +// A3: TPOW([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VBASE:[_A-Za-z][_A-Za-z0-9]*]], [[VEXP:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/tpow_precision_emitc.pto b/test/lit/pto/tpow_precision_emitc.pto index 905da4489a..7c484bbacc 100644 --- a/test/lit/pto/tpow_precision_emitc.pto +++ b/test/lit/pto/tpow_precision_emitc.pto @@ -24,5 +24,5 @@ module { } } -// A5: TPOW([[VDST:v[0-9]+]], [[VBASE:v[0-9]+]], [[VEXP:v[0-9]+]], [[VTMP:v[0-9]+]]); +// A5: TPOW([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VBASE:[_A-Za-z][_A-Za-z0-9]*]], [[VEXP:[_A-Za-z][_A-Za-z0-9]*]], [[VTMP:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TPOW([[VDST]], [[VBASE]], [[VEXP]], [[VTMP]]); diff --git a/test/lit/pto/tprefetch_async_emitc.pto b/test/lit/pto/tprefetch_async_emitc.pto index 156fba5b85..020aa96bf5 100644 --- a/test/lit/pto/tprefetch_async_emitc.pto +++ b/test/lit/pto/tprefetch_async_emitc.pto @@ -12,7 +12,7 @@ module { } // CHECK-LABEL: AICORE void tprefetch_async_emitc( -// CHECK: pto::PrefetchAsyncContext [[CTX:v[0-9]+]] = pto::PrefetchAsyncContext( -// CHECK: pto::comm::AsyncEvent [[EVT:v[0-9]+]] = TPREFETCH_ASYNC( -// CHECK: pto::comm::AsyncSession [[SESSION:v[0-9]+]] = [[CTX]].session; -// CHECK: bool [[DONE:v[0-9]+]] = [[EVT]].Wait([[SESSION]]); +// CHECK: pto::PrefetchAsyncContext [[CTX:[_A-Za-z][_A-Za-z0-9]*]] = pto::PrefetchAsyncContext( +// CHECK: pto::comm::AsyncEvent [[EVT:[_A-Za-z][_A-Za-z0-9]*]] = TPREFETCH_ASYNC( +// CHECK: pto::comm::AsyncSession [[SESSION:[_A-Za-z][_A-Za-z0-9]*]] = [[CTX]].session; +// CHECK: bool [[DONE:[_A-Za-z][_A-Za-z0-9]*]] = [[EVT]].Wait([[SESSION]]); diff --git a/test/lit/pto/tprint_alloc_tile_no_rebind.pto b/test/lit/pto/tprint_alloc_tile_no_rebind.pto index 0ebf9332ed..f219123e55 100644 --- a/test/lit/pto/tprint_alloc_tile_no_rebind.pto +++ b/test/lit/pto/tprint_alloc_tile_no_rebind.pto @@ -13,10 +13,10 @@ module { } // CHECK-LABEL: __global__ AICORE void print_kernel() { -// CHECK: Tile [[TILE:v[0-9]+]]; -// CHECK: Tile [[TILE_COPY:v[0-9]+]] = [[TILE]]; -// CHECK: TASSIGN([[TILE_COPY]], [[ADDR:v[0-9]+]]); +// CHECK: Tile [[TILE_STORAGE:[_A-Za-z][_A-Za-z0-9]*]] +// CHECK: Tile [[TILE:[_A-Za-z][_A-Za-z0-9]*]] = [[TILE_STORAGE]]; +// CHECK: TASSIGN([[TILE]], {{[_A-Za-z][_A-Za-z0-9]*}}); // CHECK-NOT: TASSIGN( // CHECK-NOT: .data() // CHECK-NOT: reinterpret_cast -// CHECK: TPRINT{{(<.*>)?}}([[TILE_COPY]]); +// CHECK: TPRINT{{(<.*>)?}}([[TILE]]); diff --git a/test/lit/pto/tprint_tmp_format_emitc.pto b/test/lit/pto/tprint_tmp_format_emitc.pto index 7c62c08038..9c45843aa3 100644 --- a/test/lit/pto/tprint_tmp_format_emitc.pto +++ b/test/lit/pto/tprint_tmp_format_emitc.pto @@ -38,5 +38,4 @@ module attributes {pto.target_arch = "a5"} { } } -// CHECK: TPRINT( -// CHECK-SAME: {{v[0-9]+}}, {{v[0-9]+}} +// CHECK: TPRINT({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); diff --git a/test/lit/pto/tpush_tpop_dynamic_validshape_a5.pto b/test/lit/pto/tpush_tpop_dynamic_validshape_a5.pto index a39b43af6c..812ef476d4 100644 --- a/test/lit/pto/tpush_tpop_dynamic_validshape_a5.pto +++ b/test/lit/pto/tpush_tpop_dynamic_validshape_a5.pto @@ -47,13 +47,13 @@ module { } // A5-LABEL: AICORE void cube_kernel( -// A5: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 2, false>( -// A5: Tile [[CUBE_TILE:v[0-9]+]] = Tile({{v[0-9]+}}, {{v[0-9]+}}); -// A5: [[CUBE_TILE]].SetValidShape({{v[0-9]+}}, {{v[0-9]+}}); -// A5: TPOP, Tile, TileSplitAxis::TILE_LEFT_RIGHT>({{v[0-9]+}}, [[CUBE_TILE]]); +// A5: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 2, false>( +// A5: Tile [[CUBE_TILE:[_A-Za-z][_A-Za-z0-9]*]] = Tile({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); +// A5: [[CUBE_TILE]].SetValidShape({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); +// A5: TPOP, Tile, TileSplitAxis::TILE_LEFT_RIGHT>({{[_A-Za-z][_A-Za-z0-9]*}}, [[CUBE_TILE]]); // A5-LABEL: AICORE void vector_kernel( -// A5: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 2, false>( -// A5: Tile [[VEC_TILE:v[0-9]+]] = Tile({{v[0-9]+}}, {{v[0-9]+}}); -// A5: [[VEC_TILE]].SetValidShape({{v[0-9]+}}, {{v[0-9]+}}); -// A5: TPOP, Tile, TileSplitAxis::TILE_LEFT_RIGHT>({{v[0-9]+}}, [[VEC_TILE]]); +// A5: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 2, false>( +// A5: Tile [[VEC_TILE:[_A-Za-z][_A-Za-z0-9]*]] = Tile({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); +// A5: [[VEC_TILE]].SetValidShape({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); +// A5: TPOP, Tile, TileSplitAxis::TILE_LEFT_RIGHT>({{[_A-Za-z][_A-Za-z0-9]*}}, [[VEC_TILE]]); diff --git a/test/lit/pto/tpush_tpop_dynamic_validshape_default_a5.pto b/test/lit/pto/tpush_tpop_dynamic_validshape_default_a5.pto index 0b96de6043..f1c64f6432 100644 --- a/test/lit/pto/tpush_tpop_dynamic_validshape_default_a5.pto +++ b/test/lit/pto/tpush_tpop_dynamic_validshape_default_a5.pto @@ -45,11 +45,11 @@ module { } // A5-LABEL: AICORE void cube_kernel( -// A5: Tile {{v[0-9]+}} = Tile({{v[0-9]+}}, {{v[0-9]+}}); +// A5: Tile {{[_A-Za-z][_A-Za-z0-9]*}} = Tile({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); // A5-NOT: SetValidShape // A5: TPOP, Tile, TileSplitAxis::TILE_LEFT_RIGHT>( // A5-LABEL: AICORE void vector_kernel( -// A5: Tile {{v[0-9]+}} = Tile({{v[0-9]+}}, {{v[0-9]+}}); +// A5: Tile {{[_A-Za-z][_A-Za-z0-9]*}} = Tile({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); // A5-NOT: SetValidShape // A5: TPOP, Tile, TileSplitAxis::TILE_LEFT_RIGHT>( diff --git a/test/lit/pto/tpush_tpop_emitc.pto b/test/lit/pto/tpush_tpop_emitc.pto index f976c8d677..523d8bedfa 100644 --- a/test/lit/pto/tpush_tpop_emitc.pto +++ b/test/lit/pto/tpush_tpop_emitc.pto @@ -32,20 +32,20 @@ module { } // A3-LABEL: AICORE void cube_push_gm( -// A3: const int32_t {{v[0-9]+}} = 0; -// A3: const int64_t {{v[0-9]+}} = 0; +// A3: const int32_t {{[_A-Za-z][_A-Za-z0-9]*}} = 0; +// A3: const int64_t {{[_A-Za-z][_A-Za-z0-9]*}} = 0; // A3: #if defined(__DAV_CUBE__) -// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8, true>( +// A3: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8, true>( // A3: TPUSH, Tile, TileSplitAxis::TILE_NO_SPLIT>( // A3: #endif // __DAV_CUBE__ // A3-LABEL: AICORE void vector_pop_gm( -// A3: const int32_t {{v[0-9]+}} = 0; +// A3: const int32_t {{[_A-Za-z][_A-Za-z0-9]*}} = 0; // A3: #if defined(__DAV_VEC__) // A3: set_mask_norm(); // A3: set_vector_mask(-1, -1); -// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8, false>( -// A3: Tile {{v[0-9]+}}; +// A3: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_C2V, 1024, 8, 8, false>( +// A3: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; // A3: TPOP, Tile, TileSplitAxis::TILE_UP_DOWN>( // A3: TFREE, TileSplitAxis::TILE_LEFT_RIGHT>( // A3: #endif // __DAV_VEC__ diff --git a/test/lit/pto/tpush_tpop_frontend_lowering_a3.pto b/test/lit/pto/tpush_tpop_frontend_lowering_a3.pto index 0bee9339fc..7aaa06dfd9 100644 --- a/test/lit/pto/tpush_tpop_frontend_lowering_a3.pto +++ b/test/lit/pto/tpush_tpop_frontend_lowering_a3.pto @@ -61,20 +61,20 @@ module { } // A3-LABEL: AICORE void cube_kernel(__gm__ float* -// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>( +// A3: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>( // A3: TPUSH -// A3: Tile {{v[0-9]+}}; +// A3: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; // A3: TPOP, Tile, TileSplitAxis::TILE_NO_SPLIT>( -// A3: Tile {{v[0-9]+}}; +// A3: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; // A3: TMOV( // A3: TFREE, TileSplitAxis::TILE_NO_SPLIT>( // A3-LABEL: AICORE void vector_kernel(__gm__ float* -// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>( -// A3: Tile {{v[0-9]+}}; +// A3: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>( +// A3: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; // A3: TPUSH, Tile, TileSplitAxis::TILE_NO_SPLIT>( // A3: TPOP, Tile, TileSplitAxis::TILE_NO_SPLIT>( -// A3: Tile {{v[0-9]+}}; +// A3: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; // A3: TNEG( // A3: TFREE, TileSplitAxis::TILE_NO_SPLIT>( diff --git a/test/lit/pto/tpush_tpop_frontend_lowering_a3_gss.pto b/test/lit/pto/tpush_tpop_frontend_lowering_a3_gss.pto index 979975bb65..9d45c628e1 100644 --- a/test/lit/pto/tpush_tpop_frontend_lowering_a3_gss.pto +++ b/test/lit/pto/tpush_tpop_frontend_lowering_a3_gss.pto @@ -61,20 +61,20 @@ module { } // A3-LABEL: AICORE void cube_kernel(__gm__ float* -// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>( +// A3: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>( // A3: TPUSH -// A3: Tile {{v[0-9]+}}; +// A3: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; // A3: TPOP, Tile, TileSplitAxis::TILE_NO_SPLIT>( -// A3: Tile {{v[0-9]+}}; +// A3: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; // A3: TMOV( // A3: TFREE, TileSplitAxis::TILE_NO_SPLIT>( // A3-LABEL: AICORE void vector_kernel(__gm__ float* -// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>( -// A3: Tile {{v[0-9]+}}; +// A3: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 4, true>( +// A3: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; // A3: TPUSH, Tile, TileSplitAxis::TILE_NO_SPLIT>( // A3: TPOP, Tile, TileSplitAxis::TILE_NO_SPLIT>( -// A3: Tile {{v[0-9]+}}; +// A3: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; // A3: TNEG( // A3: TFREE, TileSplitAxis::TILE_NO_SPLIT>( diff --git a/test/lit/pto/tpush_tpop_frontend_lowering_a5.pto b/test/lit/pto/tpush_tpop_frontend_lowering_a5.pto index dc4f0625a0..413abb9029 100644 --- a/test/lit/pto/tpush_tpop_frontend_lowering_a5.pto +++ b/test/lit/pto/tpush_tpop_frontend_lowering_a5.pto @@ -57,23 +57,23 @@ module { } // A5-LABEL: AICORE void cube_kernel( -// A5: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 2, true>( +// A5: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 2, true>( // A5: TPUSH -// A5: Tile {{v[0-9]+}}; +// A5: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; // A5: TPOP, Tile, TileSplitAxis::TILE_NO_SPLIT>( -// A5: Tile {{v[0-9]+}}; +// A5: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; // A5: TMOV( // A5: TFREE, TileSplitAxis::TILE_NO_SPLIT>( // A5-LABEL: AICORE void vector_kernel( // A5: if (get_subblockid() == 0) { -// A5: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 2, true>( -// A5: Tile {{v[0-9]+}}; -// A5: Tile {{v[0-9]+}}; +// A5: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_BOTH, 1024, 4, 2, true>( +// A5: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; +// A5: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; // A5: TMOV( // A5: TPUSH, Tile, TileSplitAxis::TILE_NO_SPLIT>( // A5: TPOP, Tile, TileSplitAxis::TILE_NO_SPLIT>( -// A5: Tile {{v[0-9]+}}; +// A5: Tile {{[_A-Za-z][_A-Za-z0-9]*}}; // A5: TNEG( // A5: TFREE, TileSplitAxis::TILE_NO_SPLIT>( // A5: } diff --git a/test/lit/pto/tpush_tpop_globaltensor_frontend_a3.pto b/test/lit/pto/tpush_tpop_globaltensor_frontend_a3.pto index cb315e14f8..d8973881e6 100644 --- a/test/lit/pto/tpush_tpop_globaltensor_frontend_a3.pto +++ b/test/lit/pto/tpush_tpop_globaltensor_frontend_a3.pto @@ -64,18 +64,16 @@ module { } // CHECK-LABEL: AICORE void cube_kernel -// CHECK-SAME: (__gm__ float* [[CUBE_GM:v[0-9]+]], +// CHECK-SAME: (__gm__ float* [[CUBE_GM:[_A-Za-z][_A-Za-z0-9]*]], // CHECK: TPipe<0, Direction::DIR_C2V, 1024, 8, 8, true>([[CUBE_GM]], {{.*}}, {{.*}}); -// CHECK: GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND> [[CUBE_ENTRY:v[0-9]+]](nullptr); -// CHECK: GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND> [[CUBE_ENTRY_VAL:v[0-9]+]] = [[CUBE_ENTRY]]; -// CHECK: TALLOC, GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND>, TileSplitAxis::TILE_NO_SPLIT>({{.*}}, [[CUBE_ENTRY_VAL]]); +// CHECK: GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND> {{[_A-Za-z][_A-Za-z0-9]*}}(nullptr); +// CHECK: TALLOC, GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND>, TileSplitAxis::TILE_NO_SPLIT>({{.*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); // CHECK: TSTORE -// CHECK: TPUSH, GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND>, TileSplitAxis::TILE_NO_SPLIT>({{.*}}, [[CUBE_ENTRY_VAL]]); +// CHECK: TPUSH, GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND>, TileSplitAxis::TILE_NO_SPLIT>({{.*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); // CHECK-LABEL: AICORE void vector_kernel -// CHECK-SAME: (__gm__ float* [[VEC_GM:v[0-9]+]], +// CHECK-SAME: (__gm__ float* [[VEC_GM:[_A-Za-z][_A-Za-z0-9]*]], // CHECK: TPipe<0, Direction::DIR_C2V, 1024, 8, 8, true>([[VEC_GM]], {{.*}}, {{.*}}); -// CHECK: GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND> [[VEC_ENTRY:v[0-9]+]](nullptr); -// CHECK: GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND> [[VEC_ENTRY_VAL:v[0-9]+]] = [[VEC_ENTRY]]; +// CHECK: GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND> {{[_A-Za-z][_A-Za-z0-9]*}}(nullptr); // CHECK: TPOP, GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND>, TileSplitAxis::TILE_NO_SPLIT>( // CHECK: TLOAD // CHECK: TFREE, GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND>, TileSplitAxis::TILE_NO_SPLIT>( diff --git a/test/lit/pto/tpush_tpop_globaltensor_internal_a3.pto b/test/lit/pto/tpush_tpop_globaltensor_internal_a3.pto index 0b80720028..c56c19ed82 100644 --- a/test/lit/pto/tpush_tpop_globaltensor_internal_a3.pto +++ b/test/lit/pto/tpush_tpop_globaltensor_internal_a3.pto @@ -20,7 +20,7 @@ module { } // CHECK-LABEL: AICORE void cube_globaltensor_internal( -// CHECK: __gm__ float* [[GM_DATA:v[0-9]+]] = PTOAS__GLOBAL_TENSOR_DATA( +// CHECK: __gm__ float* [[GM_DATA:[_A-Za-z][_A-Za-z0-9]*]] = PTOAS__GLOBAL_TENSOR_DATA( // CHECK: TPipe<0, Direction::DIR_C2V, 1024, 8, 8, true>([[GM_DATA]], {{.*}}, {{.*}}); // CHECK: TALLOC, GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND>, TileSplitAxis::TILE_NO_SPLIT>( // CHECK: TPUSH, GlobalTensor, pto::Stride<256, 256, 256, 16, 1>, pto::Layout::ND>, TileSplitAxis::TILE_NO_SPLIT>( diff --git a/test/lit/pto/tpush_tpop_internal_slot_num_a3.pto b/test/lit/pto/tpush_tpop_internal_slot_num_a3.pto index 2d314ad3f0..0ec0ef23a2 100644 --- a/test/lit/pto/tpush_tpop_internal_slot_num_a3.pto +++ b/test/lit/pto/tpush_tpop_internal_slot_num_a3.pto @@ -17,4 +17,4 @@ module { } // A3-LABEL: AICORE void cube_kernel( -// A3: auto {{v[0-9]+}} = TPipe<0, Direction::DIR_C2V, 1024, 2, 1, false>( +// A3: auto {{[_A-Za-z][_A-Za-z0-9]*}} = TPipe<0, Direction::DIR_C2V, 1024, 2, 1, false>( diff --git a/test/lit/pto/trecip_precision_emitc.pto b/test/lit/pto/trecip_precision_emitc.pto index 830d03d0ea..73311a0b79 100644 --- a/test/lit/pto/trecip_precision_emitc.pto +++ b/test/lit/pto/trecip_precision_emitc.pto @@ -14,5 +14,5 @@ module { } } -// A5: TRECIP([[VDST:v[0-9]+]], [[VSRC:v[0-9]+]]); +// A5: TRECIP([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TRECIP([[VDST]], [[VSRC]]); diff --git a/test/lit/pto/trem_emitc.pto b/test/lit/pto/trem_emitc.pto index 2c8760b809..a5aa39f7ee 100644 --- a/test/lit/pto/trem_emitc.pto +++ b/test/lit/pto/trem_emitc.pto @@ -15,4 +15,4 @@ module { // A3-SAME: memref<1x16xf32 // A3-SAME: memref<2x16xf32 // A3: outs([[DST:%[0-9]+]] : memref<1x16xf32 -// A3: TREM([[VDST:v[0-9]+]], [[VSRC0:v[0-9]+]], [[VSRC1:v[0-9]+]], [[VTMP:v[0-9]+]]); +// A3: TREM([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC0:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC1:[_A-Za-z][_A-Za-z0-9]*]], [[VTMP:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/trem_precision_emitc.pto b/test/lit/pto/trem_precision_emitc.pto index 462867bd48..765c9c0ae0 100644 --- a/test/lit/pto/trem_precision_emitc.pto +++ b/test/lit/pto/trem_precision_emitc.pto @@ -16,5 +16,5 @@ module { } } -// A5: TREM([[VDST:v[0-9]+]], [[VSRC0:v[0-9]+]], [[VSRC1:v[0-9]+]], [[VTMP:v[0-9]+]]); +// A5: TREM([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC0:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC1:[_A-Za-z][_A-Za-z0-9]*]], [[VTMP:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TREM([[VDST]], [[VSRC0]], [[VSRC1]], [[VTMP]]); diff --git a/test/lit/pto/treshape_static_valid_shape_emitc.pto b/test/lit/pto/treshape_static_valid_shape_emitc.pto index 701874e5f0..f11bbcf90b 100644 --- a/test/lit/pto/treshape_static_valid_shape_emitc.pto +++ b/test/lit/pto/treshape_static_valid_shape_emitc.pto @@ -35,7 +35,9 @@ module attributes {pto.target_arch = "a5"} { // CHECK: Tile [[SRC_STORAGE:v[0-9]+]]; // CHECK: Tile [[SRC:v[0-9]+]] = [[SRC_STORAGE]]; // CHECK: TASSIGN([[SRC]], -// CHECK: Tile [[RESHAPED_STORAGE:v[0-9]+]]; +// CHECK: // pto: %reshaped +// CHECK-NEXT: Tile [[RESHAPED_STORAGE:v[0-9]+]]; +// CHECK: // pto: %reshaped // CHECK-NEXT: Tile [[RESHAPED:v[0-9]+]] = [[RESHAPED_STORAGE]]; // CHECK-NEXT: TRESHAPE([[RESHAPED]], [[SRC]]); // CHECK-NOT: SetValidShape diff --git a/test/lit/pto/trowexpandadd_tmp_emitc.pto b/test/lit/pto/trowexpandadd_tmp_emitc.pto index b3652e776d..979650a87c 100644 --- a/test/lit/pto/trowexpandadd_tmp_emitc.pto +++ b/test/lit/pto/trowexpandadd_tmp_emitc.pto @@ -23,5 +23,5 @@ module { } } -// A5: TROWEXPANDADD([[VDST0:v[0-9]+]], [[VSRC00:v[0-9]+]], [[VSRC01:v[0-9]+]]); -// A5: TROWEXPANDADD([[VDST1:v[0-9]+]], [[VSRC10:v[0-9]+]], [[VSRC11:v[0-9]+]], [[VTMP:v[0-9]+]]); +// A5: TROWEXPANDADD({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); +// A5: TROWEXPANDADD({{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}, {{[_A-Za-z][_A-Za-z0-9]*}}); diff --git a/test/lit/pto/trowexpanddiv_precision_emitc.pto b/test/lit/pto/trowexpanddiv_precision_emitc.pto index 6056f708f1..af9e1241bb 100644 --- a/test/lit/pto/trowexpanddiv_precision_emitc.pto +++ b/test/lit/pto/trowexpanddiv_precision_emitc.pto @@ -16,5 +16,5 @@ module { } } -// A5: TROWEXPANDDIV([[VDST0:v[0-9]+]], [[VSRC00:v[0-9]+]], [[VSRC01:v[0-9]+]]); -// A5: TROWEXPANDDIV([[VDST1:v[0-9]+]], [[VSRC10:v[0-9]+]], [[VSRC11:v[0-9]+]], [[VTMP:v[0-9]+]]); +// A5: TROWEXPANDDIV([[VDST0:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC00:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC01:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TROWEXPANDDIV([[VDST1:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC10:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC11:[_A-Za-z][_A-Za-z0-9]*]], [[VTMP:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/trsqrt_emitc.pto b/test/lit/pto/trsqrt_emitc.pto index 2287bfe29f..b9b1b55bf3 100644 --- a/test/lit/pto/trsqrt_emitc.pto +++ b/test/lit/pto/trsqrt_emitc.pto @@ -17,5 +17,5 @@ module { // A3: outs([[DST0:%[0-9]+]] : memref<1x16xf16 // A3: pto.trsqrt ins([[SRC1:%[0-9]+]], [[TMP:%[0-9]+]] : memref<1x16xf16 // A3: outs([[DST1:%[0-9]+]] : memref<1x16xf16 -// A3: TRSQRT([[VDST0:v[0-9]+]], [[VSRC0:v[0-9]+]]); -// A3: TRSQRT([[VDST1:v[0-9]+]], [[VSRC1:v[0-9]+]], [[VTMP:v[0-9]+]]); +// A3: TRSQRT([[VDST0:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC0:[_A-Za-z][_A-Za-z0-9]*]]); +// A3: TRSQRT([[VDST1:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC1:[_A-Za-z][_A-Za-z0-9]*]], [[VTMP:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/trsqrt_precision_emitc.pto b/test/lit/pto/trsqrt_precision_emitc.pto index ef6f00a826..5a293a6ee6 100644 --- a/test/lit/pto/trsqrt_precision_emitc.pto +++ b/test/lit/pto/trsqrt_precision_emitc.pto @@ -23,5 +23,5 @@ module { } } -// A5: TRSQRT([[VDST0:v[0-9]+]], [[VSRC0:v[0-9]+]]); -// A5: TRSQRT([[VDST1:v[0-9]+]], [[VSRC1:v[0-9]+]], [[VTMP:v[0-9]+]]); +// A5: TRSQRT([[VDST0:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC0:[_A-Za-z][_A-Za-z0-9]*]]); +// A5: TRSQRT([[VDST1:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC1:[_A-Za-z][_A-Za-z0-9]*]], [[VTMP:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/tsort32_emitc.pto b/test/lit/pto/tsort32_emitc.pto index a8ad2cefe8..354195cf6e 100644 --- a/test/lit/pto/tsort32_emitc.pto +++ b/test/lit/pto/tsort32_emitc.pto @@ -26,5 +26,5 @@ module { // A3: outs([[DST0:%.*]] : memref<1x64xf16 // A3: pto.tsort32 ins([[SRC]], [[IDX]], [[TMP:%.*]] : memref<1x32xf16{{.*}}memref<1x32xui32{{.*}}memref<1x64xf16 // A3: outs([[DST1:%.*]] : memref<1x64xf16 -// A3: TSORT32([[VDST0:v[0-9]+]], [[VSRC:v[0-9]+]], [[VIDX:v[0-9]+]]); -// A3: TSORT32([[VDST1:v[0-9]+]], [[VSRC]], [[VIDX]], [[VTMP:v[0-9]+]]); +// A3: TSORT32([[VDST0:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC:[_A-Za-z][_A-Za-z0-9]*]], [[VIDX:[_A-Za-z][_A-Za-z0-9]*]]); +// A3: TSORT32([[VDST1:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC]], [[VIDX]], [[VTMP:[_A-Za-z][_A-Za-z0-9]*]]); diff --git a/test/lit/pto/tsqrt_precision_emitc.pto b/test/lit/pto/tsqrt_precision_emitc.pto index 92a30a2ad7..ad21c3943b 100644 --- a/test/lit/pto/tsqrt_precision_emitc.pto +++ b/test/lit/pto/tsqrt_precision_emitc.pto @@ -14,5 +14,5 @@ module { } } -// A5: TSQRT([[VDST:v[0-9]+]], [[VSRC:v[0-9]+]]); +// A5: TSQRT([[VDST:[_A-Za-z][_A-Za-z0-9]*]], [[VSRC:[_A-Za-z][_A-Za-z0-9]*]]); // A5: TSQRT([[VDST]], [[VSRC]]); diff --git a/test/lit/pto/tstore_forms_emitc.pto b/test/lit/pto/tstore_forms_emitc.pto index e425ab57b4..5c7dbeaa1f 100644 --- a/test/lit/pto/tstore_forms_emitc.pto +++ b/test/lit/pto/tstore_forms_emitc.pto @@ -51,11 +51,11 @@ module { } } -// A3: TSTORE([[DST0:v[0-9]+]], [[VEC:v[0-9]+]]); +// A3: TSTORE([[DST0:[_A-Za-z][_A-Za-z0-9]*]], [[VEC:[_A-Za-z][_A-Za-z0-9]*]]); // A3: TSTORE([[DST0]], [[VEC]]); // A3: TSTORE<{{.*}}AtomicType::AtomicAdd>([[DST0]], [[VEC]]); // A3: TSTORE([[DST0]], [[VEC]]); -// A3: TSTORE<{{.*}}AtomicType::AtomicAdd, ReluPreMode::NormalRelu>([[DST1:v[0-9]+]], [[ACC:v[0-9]+]]); +// A3: TSTORE<{{.*}}AtomicType::AtomicAdd, ReluPreMode::NormalRelu>([[DST1:[_A-Za-z][_A-Za-z0-9]*]], [[ACC:[_A-Za-z][_A-Za-z0-9]*]]); // A3: TSTORE([[DST1]], [[ACC]]); -// A3: TSTORE<{{.*}}AtomicType::AtomicAdd, ReluPreMode::NormalRelu>([[DST1]], [[ACC]], [[PRE:v[0-9]+]]); +// A3: TSTORE<{{.*}}AtomicType::AtomicAdd, ReluPreMode::NormalRelu>([[DST1]], [[ACC]], [[PRE:[_A-Za-z][_A-Za-z0-9]*]]); // A3: TSTORE([[DST1]], [[ACC]], [[PRE]]); diff --git a/test/samples/DeepseekV4DecodeA3/deepseek_v4_decode_golden_lib.py b/test/samples/DeepseekV4DecodeA3/deepseek_v4_decode_golden_lib.py index 008d217c93..8654d441ae 100644 --- a/test/samples/DeepseekV4DecodeA3/deepseek_v4_decode_golden_lib.py +++ b/test/samples/DeepseekV4DecodeA3/deepseek_v4_decode_golden_lib.py @@ -10,10 +10,12 @@ import numpy as np from validation_runtime import ( + default_buffers, float32_to_bf16, load_case_meta, load_int32_assignments, rng, + single_output, write_buffers, write_golden, ) @@ -58,10 +60,10 @@ def _make_fp32_input(meta, name: str, generator, expected: int) -> np.ndarray: def build_case(meta, generator, ints): - if meta.outputs != ["v1"]: - raise ValueError(f"unexpected outputs: {meta.outputs}") - if meta.read_order != ["v1", "v2", "v3"]: - raise ValueError(f"unexpected read order: {meta.read_order}") + out_name = single_output(meta) + if len(meta.inputs) != 2: + raise ValueError(f"expected 2 non-output buffers, got {meta.inputs}") + rope_even_name, rope_odd_name = meta.inputs if len(ints) < 2: raise ValueError(f"expected block_idx/block_num int32 params, got {ints}") @@ -73,15 +75,14 @@ def build_case(meta, generator, ints): output_elems = OUTPUT_ROWS * OUTPUT_COLS input_elems = INPUT_ROWS * INPUT_COLS - buffers = { - "v1": _make_bf16_zeros(meta, "v1", output_elems), - "v2": _make_fp32_input(meta, "v2", generator, input_elems), - "v3": _make_fp32_input(meta, "v3", generator, input_elems), - } + buffers = default_buffers(meta) + buffers[out_name] = _make_bf16_zeros(meta, out_name, output_elems) + buffers[rope_even_name] = _make_fp32_input(meta, rope_even_name, generator, input_elems) + buffers[rope_odd_name] = _make_fp32_input(meta, rope_odd_name, generator, input_elems) - out = np.array(buffers["v1"], copy=True).reshape(OUTPUT_ROWS, OUTPUT_COLS) - rope_even = np.asarray(buffers["v2"], dtype=np.float32).reshape(INPUT_ROWS, INPUT_COLS) - rope_odd = np.asarray(buffers["v3"], dtype=np.float32).reshape(INPUT_ROWS, INPUT_COLS) + out = np.array(buffers[out_name], copy=True).reshape(OUTPUT_ROWS, OUTPUT_COLS) + rope_even = np.asarray(buffers[rope_even_name], dtype=np.float32).reshape(INPUT_ROWS, INPUT_COLS) + rope_odd = np.asarray(buffers[rope_odd_name], dtype=np.float32).reshape(INPUT_ROWS, INPUT_COLS) group_idx = block_idx // BLOCK_GROUP lane_idx = block_idx % BLOCK_GROUP @@ -99,7 +100,7 @@ def build_case(meta, generator, ints): col0 = OUTPUT_COL_BASE + hh * OUTPUT_COL_STRIDE out[dst_row, col0:col0 + INPUT_COLS] = tile_bf16[hh] - return buffers, {"v1": out.reshape(-1)} + return buffers, {out_name: out.reshape(-1)} def run_case(case_name: str): diff --git a/test/samples/DeepseekV4DecodeA5/deepseek_v4_decode_golden_lib.py b/test/samples/DeepseekV4DecodeA5/deepseek_v4_decode_golden_lib.py index 008d217c93..8654d441ae 100644 --- a/test/samples/DeepseekV4DecodeA5/deepseek_v4_decode_golden_lib.py +++ b/test/samples/DeepseekV4DecodeA5/deepseek_v4_decode_golden_lib.py @@ -10,10 +10,12 @@ import numpy as np from validation_runtime import ( + default_buffers, float32_to_bf16, load_case_meta, load_int32_assignments, rng, + single_output, write_buffers, write_golden, ) @@ -58,10 +60,10 @@ def _make_fp32_input(meta, name: str, generator, expected: int) -> np.ndarray: def build_case(meta, generator, ints): - if meta.outputs != ["v1"]: - raise ValueError(f"unexpected outputs: {meta.outputs}") - if meta.read_order != ["v1", "v2", "v3"]: - raise ValueError(f"unexpected read order: {meta.read_order}") + out_name = single_output(meta) + if len(meta.inputs) != 2: + raise ValueError(f"expected 2 non-output buffers, got {meta.inputs}") + rope_even_name, rope_odd_name = meta.inputs if len(ints) < 2: raise ValueError(f"expected block_idx/block_num int32 params, got {ints}") @@ -73,15 +75,14 @@ def build_case(meta, generator, ints): output_elems = OUTPUT_ROWS * OUTPUT_COLS input_elems = INPUT_ROWS * INPUT_COLS - buffers = { - "v1": _make_bf16_zeros(meta, "v1", output_elems), - "v2": _make_fp32_input(meta, "v2", generator, input_elems), - "v3": _make_fp32_input(meta, "v3", generator, input_elems), - } + buffers = default_buffers(meta) + buffers[out_name] = _make_bf16_zeros(meta, out_name, output_elems) + buffers[rope_even_name] = _make_fp32_input(meta, rope_even_name, generator, input_elems) + buffers[rope_odd_name] = _make_fp32_input(meta, rope_odd_name, generator, input_elems) - out = np.array(buffers["v1"], copy=True).reshape(OUTPUT_ROWS, OUTPUT_COLS) - rope_even = np.asarray(buffers["v2"], dtype=np.float32).reshape(INPUT_ROWS, INPUT_COLS) - rope_odd = np.asarray(buffers["v3"], dtype=np.float32).reshape(INPUT_ROWS, INPUT_COLS) + out = np.array(buffers[out_name], copy=True).reshape(OUTPUT_ROWS, OUTPUT_COLS) + rope_even = np.asarray(buffers[rope_even_name], dtype=np.float32).reshape(INPUT_ROWS, INPUT_COLS) + rope_odd = np.asarray(buffers[rope_odd_name], dtype=np.float32).reshape(INPUT_ROWS, INPUT_COLS) group_idx = block_idx // BLOCK_GROUP lane_idx = block_idx % BLOCK_GROUP @@ -99,7 +100,7 @@ def build_case(meta, generator, ints): col0 = OUTPUT_COL_BASE + hh * OUTPUT_COL_STRIDE out[dst_row, col0:col0 + INPUT_COLS] = tile_bf16[hh] - return buffers, {"v1": out.reshape(-1)} + return buffers, {out_name: out.reshape(-1)} def run_case(case_name: str): diff --git a/test/samples/Gemvmx/gemvmx_golden.py b/test/samples/Gemvmx/gemvmx_golden.py index 0c8e2d52dd..6df1b43a83 100644 --- a/test/samples/Gemvmx/gemvmx_golden.py +++ b/test/samples/Gemvmx/gemvmx_golden.py @@ -68,6 +68,9 @@ def convert_scale_b_format(scale, block_size=16, c0_size_mx=2): def main(): meta = load_case_meta() out_name = single_output(meta) + if len(meta.inputs) != 4: + raise ValueError(f"expected 4 input buffers, got {meta.inputs}") + a_name, b_name, a_scale_name, b_scale_name = meta.inputs generator = rng() a_bits = generator.choice(E4M3_BITS, size=128).astype(np.uint8) @@ -76,15 +79,15 @@ def main(): b_scale = generator.integers(127, 130, size=(4, 16), dtype=np.uint8) buffers = default_buffers(meta) - buffers["v1"] = a_bits - buffers["v2"] = b_bits - buffers["v3"] = a_scale.reshape(-1) + buffers[a_name] = a_bits + buffers[b_name] = b_bits + buffers[a_scale_name] = a_scale.reshape(-1) packed_b_scale = convert_scale_b_format(b_scale).astype(np.uint8).reshape(-1) - v4 = np.zeros(meta.elem_counts["v4"], dtype=np.uint8) - v4[: packed_b_scale.size] = packed_b_scale - buffers["v4"] = v4 - buffers[out_name] = np.zeros(meta.elem_counts[out_name], dtype=np.float32) + b_scale_buffer = np.zeros(meta.elem_counts[b_scale_name], dtype=meta.np_types[b_scale_name]) + b_scale_buffer[: packed_b_scale.size] = packed_b_scale + buffers[b_scale_name] = b_scale_buffer + buffers[out_name] = np.zeros(meta.elem_counts[out_name], dtype=meta.np_types[out_name]) write_buffers(meta, buffers) a = decode_e4m3fn(a_bits).reshape(1, 128) diff --git a/test/samples/MatmulMxLowPrecision/matmul_mx_low_precision_golden.py b/test/samples/MatmulMxLowPrecision/matmul_mx_low_precision_golden.py index 33beab7996..1b681c99c7 100644 --- a/test/samples/MatmulMxLowPrecision/matmul_mx_low_precision_golden.py +++ b/test/samples/MatmulMxLowPrecision/matmul_mx_low_precision_golden.py @@ -83,6 +83,9 @@ def convert_scale_b_format(scale, block_size=16, c0_size_mx=2): def main(): meta = load_case_meta() out_name = single_output(meta) + if len(meta.inputs) != 4: + raise ValueError(f"expected 4 input buffers, got {meta.inputs}") + a_name, b_name, a_scale_name, b_scale_name = meta.inputs generator = rng() m = 128 @@ -96,20 +99,20 @@ def main(): b_scale = generator.integers(127, 130, size=(k_aligned // 32, n), dtype=np.uint8) buffers = default_buffers(meta) - buffers["v1"] = a_bits - buffers["v2"] = b_bits + buffers[a_name] = a_bits + buffers[b_name] = b_bits packed_a_scale = convert_scale_a_format(a_scale).astype(np.uint8).reshape(-1) - v3 = np.zeros(meta.elem_counts["v3"], dtype=np.uint8) - v3[: packed_a_scale.size] = packed_a_scale - buffers["v3"] = v3 + a_scale_buffer = np.zeros(meta.elem_counts[a_scale_name], dtype=meta.np_types[a_scale_name]) + a_scale_buffer[: packed_a_scale.size] = packed_a_scale + buffers[a_scale_name] = a_scale_buffer packed_b_scale = convert_scale_b_format(b_scale).astype(np.uint8).reshape(-1) - v4 = np.zeros(meta.elem_counts["v4"], dtype=np.uint8) - v4[: packed_b_scale.size] = packed_b_scale - buffers["v4"] = v4 + b_scale_buffer = np.zeros(meta.elem_counts[b_scale_name], dtype=meta.np_types[b_scale_name]) + b_scale_buffer[: packed_b_scale.size] = packed_b_scale + buffers[b_scale_name] = b_scale_buffer - buffers[out_name] = np.zeros(meta.elem_counts[out_name], dtype=np.float32) + buffers[out_name] = np.zeros(meta.elem_counts[out_name], dtype=meta.np_types[out_name]) write_buffers(meta, buffers) a = decode_e4m3fn(a_bits).reshape(m, k) diff --git a/test/samples/Qwen3DecodeA3/qwen3_decode_golden_lib.py b/test/samples/Qwen3DecodeA3/qwen3_decode_golden_lib.py index 99cf23690f..4c6a9d6599 100644 --- a/test/samples/Qwen3DecodeA3/qwen3_decode_golden_lib.py +++ b/test/samples/Qwen3DecodeA3/qwen3_decode_golden_lib.py @@ -8,6 +8,7 @@ # See LICENSE in the root of the software repository for the full text of the License. import numpy as np +from dataclasses import replace from validation_runtime import ( bf16_to_float32, @@ -567,10 +568,39 @@ def build_down_proj_residual(meta, generator, ints): } +def _build_legacy_name_map(meta): + ordered = list(meta.read_order) + return {f'v{idx}': name for idx, name in enumerate(ordered, start=1)} + + +def _with_legacy_meta_aliases(meta, legacy_to_actual): + elem_counts = dict(meta.elem_counts) + np_types = dict(meta.np_types) + for legacy, actual in legacy_to_actual.items(): + if legacy in elem_counts: + continue + if actual not in elem_counts or actual not in np_types: + continue + elem_counts[legacy] = elem_counts[actual] + np_types[legacy] = np_types[actual] + return replace(meta, elem_counts=elem_counts, np_types=np_types) + + +def _rewrite_legacy_buffer_names(entries, legacy_to_actual): + rewritten = {} + for name, value in entries.items(): + rewritten[legacy_to_actual.get(name, name)] = value + return rewritten + + def run_case(case_name: str): meta = load_case_meta() + legacy_to_actual = _build_legacy_name_map(meta) + meta = _with_legacy_meta_aliases(meta, legacy_to_actual) generator = rng() ints = load_integer_assignments() buffers, golden = BUILDERS[case_name](meta, generator, ints) + buffers = _rewrite_legacy_buffer_names(buffers, legacy_to_actual) + golden = _rewrite_legacy_buffer_names(golden, legacy_to_actual) write_buffers(meta, buffers) write_golden(meta, golden) diff --git a/test/samples/Qwen3DecodeA5/qwen3_decode_golden_lib.py b/test/samples/Qwen3DecodeA5/qwen3_decode_golden_lib.py index 5e756cc425..9107687411 100644 --- a/test/samples/Qwen3DecodeA5/qwen3_decode_golden_lib.py +++ b/test/samples/Qwen3DecodeA5/qwen3_decode_golden_lib.py @@ -8,6 +8,7 @@ # See LICENSE in the root of the software repository for the full text of the License. import numpy as np +from dataclasses import replace from validation_runtime import ( bf16_to_float32, @@ -565,10 +566,39 @@ def build_down_proj_residual(meta, generator, ints): } +def _build_legacy_name_map(meta): + ordered = list(meta.read_order) + return {f'v{idx}': name for idx, name in enumerate(ordered, start=1)} + + +def _with_legacy_meta_aliases(meta, legacy_to_actual): + elem_counts = dict(meta.elem_counts) + np_types = dict(meta.np_types) + for legacy, actual in legacy_to_actual.items(): + if legacy in elem_counts: + continue + if actual not in elem_counts or actual not in np_types: + continue + elem_counts[legacy] = elem_counts[actual] + np_types[legacy] = np_types[actual] + return replace(meta, elem_counts=elem_counts, np_types=np_types) + + +def _rewrite_legacy_buffer_names(entries, legacy_to_actual): + rewritten = {} + for name, value in entries.items(): + rewritten[legacy_to_actual.get(name, name)] = value + return rewritten + + def run_case(case_name: str): meta = load_case_meta() + legacy_to_actual = _build_legacy_name_map(meta) + meta = _with_legacy_meta_aliases(meta, legacy_to_actual) generator = rng() ints = load_integer_assignments() buffers, golden = BUILDERS[case_name](meta, generator, ints) + buffers = _rewrite_legacy_buffer_names(buffers, legacy_to_actual) + golden = _rewrite_legacy_buffer_names(golden, legacy_to_actual) write_buffers(meta, buffers) write_golden(meta, golden) diff --git a/tools/ptoas/driver.cpp b/tools/ptoas/driver.cpp index b8de95aa2c..6cd1add149 100644 --- a/tools/ptoas/driver.cpp +++ b/tools/ptoas/driver.cpp @@ -12,8 +12,11 @@ #include "PTO/IR/PTO.h" #include "PTO/Transforms/Passes.h" #include "VPTOHostStubEmission.h" +#include "mlir/AsmParser/AsmParser.h" +#include "mlir/AsmParser/AsmParserState.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/AsmState.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Builders.h" #include "mlir/IR/SymbolTable.h" @@ -175,9 +178,31 @@ parseTextualModule(std::unique_ptr inputBuffer, mlir::pto::ScopedPTOParserTargetArch scopedParserArch( &context, arch == "a5" ? mlir::pto::PTOParserTargetArch::A5 : mlir::pto::PTOParserTargetArch::A3); - OwningOpRef module = parseSourceFile(sourceMgr, &context); - if (!module) + ParserConfig parserConfig(&context); + Block parsedBlock; + LocationAttr sourceFileLoc = UnknownLoc::get(&context); + if (const auto *sourceBuf = sourceMgr.getMemoryBuffer(sourceMgr.getMainFileID())) { + sourceFileLoc = FileLineColLoc::get(&context, sourceBuf->getBufferIdentifier(), + /*line=*/0, /*column=*/0); + } + AsmParserState parserState; + if (failed(parseAsmSourceFile(sourceMgr, &parsedBlock, parserConfig, + &parserState))) { llvm::errs() << "Error: Failed to parse MLIR.\n"; + return OwningOpRef(); + } + // `parseSourceFile` internally uses the same helper to wrap the + // parsed top-level block. We spell it out here because the public wrapper + // does not expose `AsmParserState`, which we need for textual SSA-name + // recovery. + OwningOpRef module = + mlir::detail::constructContainerOpForParserIfNecessary( + &parsedBlock, &context, sourceFileLoc); + if (!module) { + llvm::errs() << "Error: Failed to build parsed module.\n"; + return module; + } + mlir::pto::applyTextualNameHintsToModule(*module, parserState); return module; } diff --git a/tools/ptoas/ptoas.cpp b/tools/ptoas/ptoas.cpp index d5ba2b1319..be40674078 100644 --- a/tools/ptoas/ptoas.cpp +++ b/tools/ptoas/ptoas.cpp @@ -14,6 +14,7 @@ #include "VPTOHostStubEmission.h" #include "TilelangDaemon.h" #include "PTO/Transforms/CppPostprocess.h" +#include "mlir/AsmParser/AsmParserState.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Diagnostics.h" #include "mlir/IR/BuiltinOps.h" @@ -53,9 +54,13 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Program.h" +#include #include +#include +#include #include #include #include @@ -603,6 +608,607 @@ static bool hasUnexpandedTileOps(ModuleOp module) { return found; } +using FunctionBlockArgHintMap = + llvm::StringMap, 4>>; + +static bool isGeneratedValueName(llvm::StringRef name); +static SmallVector getValueNameHints(Value value); + +static bool isCppIdentifierStart(char c) { + return std::isalpha(static_cast(c)) || c == '_'; +} + +static bool isCppIdentifierChar(char c) { + return std::isalnum(static_cast(c)) || c == '_'; +} + +static std::optional getTextualNameFromSMRange(llvm::SMRange range) { + if (!range.Start.isValid() || !range.End.isValid()) + return std::nullopt; + const char *begin = range.Start.getPointer(); + const char *end = range.End.getPointer(); + if (!begin || !end || end < begin) + return std::nullopt; + llvm::StringRef name(begin, static_cast(end - begin)); + if (name.empty()) + return std::nullopt; + name = name.trim(); + if (name.consume_front("%") && name.empty()) + return std::nullopt; + return name.str(); +} + +static SmallVector +expandTextualResultGroupHints(const AsmParserState::OperationDefinition &opDef, + unsigned groupIndex) { + SmallVector hints; + if (groupIndex >= opDef.resultGroups.size()) + return hints; + const auto &group = opDef.resultGroups[groupIndex]; + std::optional baseName = + getTextualNameFromSMRange(group.definition.loc); + if (!baseName) + return hints; + + unsigned resultStart = group.startIndex; + unsigned resultEnd = groupIndex + 1 == opDef.resultGroups.size() + ? opDef.op->getNumResults() + : opDef.resultGroups[groupIndex + 1].startIndex; + if (resultStart >= resultEnd) + return hints; + if (resultEnd - resultStart == 1) { + hints.push_back(*baseName); + return hints; + } + for (unsigned idx = resultStart; idx < resultEnd; ++idx) + hints.push_back(*baseName + "#" + std::to_string(idx - resultStart)); + return hints; +} + +static std::string sanitizeCppIdentifier(llvm::StringRef name) { + std::string sanitized; + sanitized.reserve(name.size() + 4); + + auto appendUnderscore = [&]() { + if (sanitized.empty() || sanitized.back() != '_') + sanitized.push_back('_'); + }; + + for (char c : name) { + if (isCppIdentifierChar(c)) + sanitized.push_back(c); + else + appendUnderscore(); + } + + while (!sanitized.empty() && sanitized.back() == '_') + sanitized.pop_back(); + + if (sanitized.empty()) + return {}; + if (!isCppIdentifierStart(sanitized.front())) + sanitized.insert(sanitized.begin(), '_'); + return sanitized; +} + +static void appendLocationNameHints(Location loc, + SmallVectorImpl &hints) { + if (auto nameLoc = dyn_cast(loc)) { + std::string sanitized = sanitizeCppIdentifier(nameLoc.getName().getValue()); + if (!sanitized.empty()) + hints.push_back(std::move(sanitized)); + return; + } + + if (auto fusedLoc = dyn_cast(loc)) { + if (Attribute metadata = fusedLoc.getMetadata()) { + if (auto strAttr = dyn_cast(metadata)) { + std::string sanitized = sanitizeCppIdentifier(strAttr.getValue()); + if (!sanitized.empty()) + hints.push_back(std::move(sanitized)); + return; + } + if (auto arrayAttr = dyn_cast(metadata)) { + for (Attribute attr : arrayAttr) { + auto strAttr = dyn_cast(attr); + if (!strAttr) + continue; + std::string sanitized = sanitizeCppIdentifier(strAttr.getValue()); + if (!sanitized.empty()) + hints.push_back(std::move(sanitized)); + } + if (!hints.empty()) + return; + } + } + + // Only metadata explicitly attached by PTOAS name-hint recovery carries an + // ordered result-name list. Ordinary fused child locations are debug + // provenance, not result-indexed name hints. + return; + } + + if (auto callSiteLoc = dyn_cast(loc)) { + appendLocationNameHints(callSiteLoc.getCallee(), hints); + if (hints.empty()) + appendLocationNameHints(callSiteLoc.getCaller(), hints); + } +} + +static bool hasLocationNameHints(Location loc) { + SmallVector hints; + appendLocationNameHints(loc, hints); + return !hints.empty(); +} + +// Read the *raw* (unsanitized) source SSA name hints carried in the Location +// metadata. Unlike appendLocationNameHints, this preserves the original textual +// form (e.g. "0", "24", "query_tile") so that issue #337's "pto: %N" provenance +// comments can map a generated C++ variable back to its input .pto SSA name, +// even for pure-digit names that would otherwise be sanitized to "_0". +static void appendRawLocationProvenance(Location loc, + SmallVectorImpl &hints) { + if (auto nameLoc = dyn_cast(loc)) { + std::string raw = nameLoc.getName().getValue().str(); + if (!raw.empty()) + hints.push_back(std::move(raw)); + return; + } + + if (auto fusedLoc = dyn_cast(loc)) { + if (Attribute metadata = fusedLoc.getMetadata()) { + if (auto strAttr = dyn_cast(metadata)) { + std::string raw = strAttr.getValue().str(); + if (!raw.empty()) + hints.push_back(std::move(raw)); + return; + } + if (auto arrayAttr = dyn_cast(metadata)) { + for (Attribute attr : arrayAttr) { + auto strAttr = dyn_cast(attr); + if (!strAttr) + continue; + std::string raw = strAttr.getValue().str(); + if (!raw.empty()) + hints.push_back(std::move(raw)); + } + if (!hints.empty()) + return; + } + } + + // Only metadata explicitly attached by PTOAS name-hint recovery carries an + // ordered result-name list. Ordinary fused child locations are debug + // provenance, not result-indexed name hints. + return; + } + + if (auto callSiteLoc = dyn_cast(loc)) { + appendRawLocationProvenance(callSiteLoc.getCallee(), hints); + if (hints.empty()) + appendRawLocationProvenance(callSiteLoc.getCaller(), hints); + } +} + +// Recover the raw provenance (input .pto SSA name) for an op's results. +// Returns one raw name per result when available, mirroring getResultNameHints +// but without sanitization. +static SmallVector getRawResultProvenance(Operation *op) { + SmallVector hints; + if (!op || op->getNumResults() == 0) + return hints; + appendRawLocationProvenance(op->getLoc(), hints); + if (hints.empty()) + return hints; + hints.erase(std::remove_if(hints.begin(), hints.end(), + [](const std::string &name) { + return name.empty(); + }), + hints.end()); + if (hints.empty()) + return hints; + if (op->getNumResults() == 1) { + if (hints.size() > 1) + hints.resize(1); + return hints; + } + if (hints.size() > op->getNumResults()) + hints.resize(op->getNumResults()); + return hints; +} + +static SmallVector getRawLocationProvenance(Location loc) { + SmallVector hints; + appendRawLocationProvenance(loc, hints); + hints.erase(std::remove_if(hints.begin(), hints.end(), + [](const std::string &hint) { + return hint.empty(); + }), + hints.end()); + return hints; +} + +static Location getIndexedRawProvenanceLoc(Location fallbackLoc, unsigned index) { + SmallVector hints = getRawLocationProvenance(fallbackLoc); + if (index >= hints.size()) + return fallbackLoc; + return NameLoc::get(StringAttr::get(fallbackLoc.getContext(), hints[index]), + fallbackLoc); +} + +static Location attachLocationNameHints(Location baseLoc, + llvm::ArrayRef hints, + MLIRContext *context) { + SmallVector attrs; + attrs.reserve(hints.size()); + for (llvm::StringRef hint : hints) { + if (!hint.empty()) + attrs.push_back(StringAttr::get(context, hint)); + } + if (attrs.empty()) + return baseLoc; + if (attrs.size() == 1) + return NameLoc::get(cast(attrs.front()), baseLoc); + return FusedLoc::get(ArrayRef{baseLoc}, ArrayAttr::get(context, attrs), + context); +} + +static void applyValueNameHints(Value value, llvm::ArrayRef hints) { + if (!value || hints.empty() || hasLocationNameHints(value.getLoc())) + return; + value.setLoc(attachLocationNameHints(value.getLoc(), hints, value.getContext())); +} + +static void applyOperationResultNameHints(Operation *op, + llvm::ArrayRef hints) { + if (!op || op->getNumResults() == 0 || hints.empty() || + hasLocationNameHints(op->getLoc())) + return; + + SmallVector limitedHints; + limitedHints.reserve(std::min(op->getNumResults(), hints.size())); + for (size_t i = 0, e = std::min(op->getNumResults(), hints.size()); + i < e; ++i) + limitedHints.push_back(hints[i]); + if (limitedHints.empty()) + return; + + op->setLoc(attachLocationNameHints(op->getLoc(), limitedHints, op->getContext())); +} + +static void splitDerivedSingleResultProvenanceLocsInRegion(Region ®ion); + +static void splitDerivedSingleResultProvenanceLocsInBlock(Block &block) { + SmallVector ops; + ops.reserve(block.getOperations().size()); + for (Operation &op : block) + ops.push_back(&op); + + for (size_t i = 0; i < ops.size();) { + Operation *op = ops[i]; + if (op->getNumResults() != 1) { + ++i; + continue; + } + + SmallVector hints = getRawLocationProvenance(op->getLoc()); + if (hints.size() <= 1) { + ++i; + continue; + } + + size_t runEnd = i + 1; + while (runEnd < ops.size() && ops[runEnd]->getNumResults() == 1 && + ops[runEnd]->getLoc() == op->getLoc()) { + ++runEnd; + } + + size_t runSize = runEnd - i; + if (runSize == hints.size()) { + Location sharedLoc = op->getLoc(); + for (size_t j = 0; j < runSize; ++j) + ops[i + j]->setLoc(getIndexedRawProvenanceLoc(sharedLoc, j)); + } + + i = runEnd; + } + + for (Operation &op : block) { + for (Region ®ion : op.getRegions()) + splitDerivedSingleResultProvenanceLocsInRegion(region); + } +} + +static void splitDerivedSingleResultProvenanceLocsInRegion(Region ®ion) { + for (Block &block : region) + splitDerivedSingleResultProvenanceLocsInBlock(block); +} + +static void splitDerivedSingleResultProvenanceLocs(Operation *root) { + if (!root) + return; + for (Region ®ion : root->getRegions()) + splitDerivedSingleResultProvenanceLocsInRegion(region); +} + +static void narrowUnusedMultiResultProvenanceLocs(Operation *root) { + if (!root) + return; + + root->walk([&](Operation *op) { + if (op->getNumResults() <= 1) + return; + + SmallVector hints = getRawLocationProvenance(op->getLoc()); + if (hints.size() != op->getNumResults()) + return; + + SmallVector liveHints; + liveHints.reserve(hints.size()); + for (auto [index, result] : llvm::enumerate(op->getResults())) { + if (!result.use_empty()) + liveHints.push_back(hints[index]); + } + + if (liveHints.empty() || liveHints.size() == hints.size()) + return; + + op->setLoc(attachLocationNameHints(op->getLoc(), liveHints, + op->getContext())); + }); +} + +namespace { +struct NarrowUnusedMultiResultProvenancePass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID( + NarrowUnusedMultiResultProvenancePass) + + void runOnOperation() override { + narrowUnusedMultiResultProvenanceLocs(getOperation()); + } +}; +} // namespace + +static std::unique_ptr createNarrowUnusedMultiResultProvenancePass() { + return std::make_unique(); +} + +static void collectNonEntryBlocksInSourceOrder( + Operation *op, SmallVectorImpl &blocks) { + for (Region ®ion : op->getRegions()) { + bool isEntryBlock = true; + for (Block &block : region) { + if (!isEntryBlock && block.getNumArguments() != 0) + blocks.push_back(&block); + isEntryBlock = false; + for (Operation &nestedOp : block) + collectNonEntryBlocksInSourceOrder(&nestedOp, blocks); + } + } +} + +void mlir::pto::applyTextualNameHintsToModule(ModuleOp module, + const AsmParserState &parserState) { + if (!module) + return; + + for (const AsmParserState::BlockDefinition &blockDef : parserState.getBlockDefs()) { + if (!blockDef.block) + continue; + for (auto [argIndex, argDef] : llvm::enumerate(blockDef.arguments)) { + if (argIndex >= blockDef.block->getNumArguments()) + break; + std::optional hint = getTextualNameFromSMRange(argDef.loc); + if (!hint) + continue; + applyValueNameHints(blockDef.block->getArgument(argIndex), + llvm::ArrayRef{*hint}); + } + } + + for (const AsmParserState::OperationDefinition &opDef : parserState.getOpDefs()) { + if (!opDef.op || opDef.op->getNumResults() == 0) + continue; + + SmallVector hints; + hints.reserve(opDef.op->getNumResults()); + for (unsigned groupIndex = 0, e = opDef.resultGroups.size(); groupIndex < e; + ++groupIndex) { + SmallVector groupHints = + expandTextualResultGroupHints(opDef, groupIndex); + hints.append(groupHints.begin(), groupHints.end()); + } + if (hints.empty()) + continue; + applyOperationResultNameHints(opDef.op, hints); + } +} + +static FunctionBlockArgHintMap collectFunctionBlockArgNameHints(ModuleOp module) { + FunctionBlockArgHintMap hintsByFunction; + for (func::FuncOp func : module.getOps()) { + SmallVector nonEntryBlocks; + collectNonEntryBlocksInSourceOrder(func.getOperation(), nonEntryBlocks); + if (nonEntryBlocks.empty()) + continue; + + SmallVector, 4> blockHints; + blockHints.reserve(nonEntryBlocks.size()); + for (Block *block : nonEntryBlocks) { + SmallVector argHints; + bool hasAllHints = block->getNumArguments() != 0; + for (BlockArgument arg : block->getArguments()) { + SmallVector hints = getValueNameHints(arg); + if (hints.empty()) { + hasAllHints = false; + break; + } + argHints.push_back(std::move(hints.front())); + } + if (hasAllHints) + blockHints.push_back(std::move(argHints)); + } + + if (!blockHints.empty()) + hintsByFunction[func.getSymNameAttr()] = std::move(blockHints); + } + return hintsByFunction; +} + +static void applyFunctionBlockArgNameHintsToEmitC( + ModuleOp module, const FunctionBlockArgHintMap &blockArgHints) { + for (emitc::FuncOp func : module.getOps()) { + auto it = blockArgHints.find(func.getSymNameAttr()); + if (it == blockArgHints.end() || it->second.empty()) + continue; + + SmallVector nonEntryBlocks; + collectNonEntryBlocksInSourceOrder(func.getOperation(), nonEntryBlocks); + if (nonEntryBlocks.size() != it->second.size()) + continue; + + bool shapeMatches = true; + for (auto [blockIndex, block] : llvm::enumerate(nonEntryBlocks)) { + if (block->getNumArguments() != it->second[blockIndex].size()) { + shapeMatches = false; + break; + } + } + if (!shapeMatches) + continue; + + for (auto [blockIndex, block] : llvm::enumerate(nonEntryBlocks)) { + const auto &argHints = it->second[blockIndex]; + for (auto [argIndex, arg] : llvm::enumerate(block->getArguments())) + applyValueNameHints(arg, llvm::ArrayRef{argHints[argIndex]}); + } + } +} + +static SmallVector getValueNameHints(Value value) { + SmallVector hints; + if (!value) + return hints; + appendLocationNameHints(value.getLoc(), hints); + if (hints.size() > 1) + hints.resize(1); + return hints; +} + +static std::string buildHintMarker(llvm::StringRef prefix, + llvm::ArrayRef hints) { + auto encodeHintMarkerToken = [](llvm::StringRef token) { + auto hexDigit = [](unsigned value) -> char { + return value < 10 ? static_cast('0' + value) + : static_cast('A' + (value - 10)); + }; + + auto isSafeMarkerChar = [](unsigned char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || c == '_' || c == '.' || c == '-'; + }; + + std::string encoded; + encoded.reserve(token.size()); + for (unsigned char c : token.bytes()) { + if (isSafeMarkerChar(c)) { + encoded.push_back(static_cast(c)); + continue; + } + encoded.push_back('%'); + encoded.push_back(hexDigit((c >> 4) & 0xF)); + encoded.push_back(hexDigit(c & 0xF)); + } + return encoded; + }; + + std::string marker = ("/* " + prefix + ":").str(); + for (size_t i = 0; i < hints.size(); ++i) { + if (i != 0) + marker.push_back(','); + marker.append(encodeHintMarkerToken(hints[i])); + } + marker.append(" */\n"); + return marker; +} + +static SmallVector +collectExpressionProvenance(emitc::ExpressionOp expr) { + SmallVector provenance; + auto appendUnique = [&](llvm::ArrayRef names) { + for (const std::string &name : names) { + if (name.empty()) + continue; + if (std::find(provenance.begin(), provenance.end(), name) != + provenance.end()) + continue; + provenance.push_back(name); + } + }; + + expr.walk([&](Operation *nested) { + if (nested == expr.getOperation()) + return WalkResult::advance(); + if (nested->getNumResults() == 0 || isa(nested)) + return WalkResult::advance(); + appendUnique(getRawResultProvenance(nested)); + return WalkResult::advance(); + }); + appendUnique(getRawResultProvenance(expr.getOperation())); + return provenance; +} + +static void annotateEmitCProvenanceHints(ModuleOp module) { + struct ProvenanceMarker { + Operation *op = nullptr; + SmallVector names; + }; + + llvm::SmallVector opsToAnnotate; + module.walk([&](Operation *op) { + if (op->getNumResults() == 0 || isa(op)) + return WalkResult::advance(); + + if (auto expr = dyn_cast(op)) { + SmallVector provenance = collectExpressionProvenance(expr); + if (provenance.empty()) + return WalkResult::skip(); + opsToAnnotate.push_back( + ProvenanceMarker{op, SmallVector(provenance)}); + return WalkResult::skip(); + } + + if (op->getParentOfType()) + return WalkResult::advance(); + // Only carry raw provenance into the C++ post-pass. Semantic renaming is + // intentionally deferred until naming can happen inside the emitter's own + // symbol table instead of via post-hoc C++ text rewriting. + SmallVector provenance = getRawResultProvenance(op); + if (provenance.empty()) + return WalkResult::advance(); + opsToAnnotate.push_back(ProvenanceMarker{ + op, SmallVector(provenance.begin(), provenance.end())}); + return WalkResult::advance(); + }); + + OpBuilder builder(module.getContext()); + for (const ProvenanceMarker &marker : opsToAnnotate) { + // Emit a provenance marker carrying the raw input SSA name. This is + // consumed by the C++ post-processor to emit `// pto: %N` comments so a + // reader can map a generated variable back to its .pto source (issue #337 + // point 1: locatability without strict number alignment). + if (!marker.names.empty()) { + builder.setInsertionPoint(marker.op); + builder.create( + marker.op->getLoc(), + builder.getStringAttr( + buildHintMarker("PTOAS_PROVENANCE", marker.names))); + } + } +} + // -------------------------------------------------------------------------- // Post-process C++ output: rewrite marker calls into Tile member calls. // We emit marker calls in EmitC IR because EmitC currently does not provide a @@ -1509,6 +2115,174 @@ static void rewriteHoistedGlobalTensorDecls(std::string &cpp) { cpp.swap(out); } +static std::optional> +parseNameHintMarker(llvm::StringRef markerBody) { + auto decodeHintMarkerToken = [](llvm::StringRef token) { + auto hexValue = [](char c) -> int { + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + return -1; + }; + + std::string decoded; + decoded.reserve(token.size()); + for (size_t i = 0; i < token.size();) { + if (token[i] == '%' && i + 2 < token.size()) { + int hi = hexValue(token[i + 1]); + int lo = hexValue(token[i + 2]); + if (hi >= 0 && lo >= 0) { + decoded.push_back( + static_cast((static_cast(hi) << 4) | lo)); + i += 3; + continue; + } + } + decoded.push_back(token[i]); + ++i; + } + return decoded; + }; + + llvm::SmallVector hints; + markerBody = markerBody.trim(); + if (markerBody.empty()) + return std::nullopt; + + size_t start = 0; + while (start <= markerBody.size()) { + size_t comma = markerBody.find(',', start); + llvm::StringRef token = markerBody.slice( + start, comma == llvm::StringRef::npos ? markerBody.size() : comma); + token = token.trim(); + if (!token.empty()) + hints.push_back(decodeHintMarkerToken(token)); + if (comma == llvm::StringRef::npos) + break; + start = comma + 1; + } + + if (hints.empty()) + return std::nullopt; + return hints; +} + +static void stripHintMarkersWithPrefix(std::string &cpp, + llvm::StringRef markerPrefix) { + std::string out; + out.reserve(cpp.size()); + size_t searchPos = 0; + while (searchPos < cpp.size()) { + size_t markerPos = cpp.find(markerPrefix.str(), searchPos); + if (markerPos == std::string::npos) { + out.append(cpp, searchPos, std::string::npos); + break; + } + + out.append(cpp, searchPos, markerPos - searchPos); + size_t markerEnd = cpp.find("*/", markerPos + markerPrefix.size()); + if (markerEnd == std::string::npos) { + out.append(cpp, markerPos, std::string::npos); + break; + } + markerEnd += 2; + while (markerEnd < cpp.size() && + (cpp[markerEnd] == '\r' || cpp[markerEnd] == '\n')) + ++markerEnd; + searchPos = markerEnd; + } + cpp.swap(out); +} + +static void stripAllHintMarkers(std::string &cpp) { + stripHintMarkersWithPrefix(cpp, "/* PTOAS_PROVENANCE:"); +} + +static std::string sanitizeCommentText(llvm::StringRef text) { + auto hexDigit = [](unsigned value) -> char { + return value < 10 ? static_cast('0' + value) + : static_cast('A' + (value - 10)); + }; + + std::string sanitized; + sanitized.reserve(text.size()); + for (unsigned char c : text.bytes()) { + switch (c) { + case '\n': + sanitized.append("\\n"); + break; + case '\r': + sanitized.append("\\r"); + break; + case '\t': + sanitized.append("\\t"); + break; + default: + if (std::iscntrl(c)) { + sanitized.push_back('\\'); + sanitized.push_back('x'); + sanitized.push_back(hexDigit((c >> 4) & 0xF)); + sanitized.push_back(hexDigit(c & 0xF)); + } else { + sanitized.push_back(static_cast(c)); + } + break; + } + } + return sanitized; +} + +// Convert `/* PTOAS_PROVENANCE:rawname,... */` markers into standalone +// `// pto: %rawname` comment lines in-place. This avoids guessing which later +// generated declaration a marker should attach to after EmitC/Cpp emission, +// hoisting, or inlining. The marker is consumed (removed) here. +static void emitProvenanceComments(std::string &segment) { + static constexpr llvm::StringLiteral kProvenancePrefix = + "/* PTOAS_PROVENANCE:"; + std::string out; + out.reserve(segment.size() + 128); + size_t i = 0; + while (i < segment.size()) { + size_t mp = segment.find(kProvenancePrefix.str(), i); + if (mp == std::string::npos) { + out.append(segment, i, std::string::npos); + break; + } + out.append(segment, i, mp - i); + size_t me = segment.find("*/", mp + kProvenancePrefix.size()); + if (me == std::string::npos) { + out.append(segment, i, std::string::npos); + break; + } + auto names = parseNameHintMarker( + llvm::StringRef(segment).slice(mp + kProvenancePrefix.size(), me)); + if (names && !names->empty()) { + out.append("// pto: "); + for (size_t idx = 0; idx < names->size(); ++idx) { + if (idx != 0) + out.append(", "); + out.push_back('%'); + out.append(sanitizeCommentText((*names)[idx])); + } + out.push_back('\n'); + } + me += 2; + while (me < segment.size() && + (segment[me] == '\r' || segment[me] == '\n')) + ++me; + i = me; + } + segment.swap(out); +} + +static void rewriteNameHintMarkers(std::string &cpp) { + emitProvenanceComments(cpp); + stripAllHintMarkers(cpp); +} + namespace { struct ConstantDeclCandidate { size_t declLine = 0; @@ -1873,6 +2647,15 @@ int mlir::pto::compilePTOASModule( int argc = context.getArgc(); char **argv = context.getArgv(); + // Name-hint provenance: textual .pto inputs had their SSA/arg/block-arg names + // attached to op Locations by the driver right after parsing. Collect the + // block-arg hint map now, before lowering, so it can be reattached on the + // EmitC CFG side before final C++ emission. + FunctionBlockArgHintMap functionBlockArgHints; + if (module) { + functionBlockArgHints = collectFunctionBlockArgNameHints(*module); + } + if (effectiveBackend != PTOBackend::VPTO && (emitVPTO || emitVPTOLLVMDialect || ptoPrintSeamIR || !ptoSeamIRFile.empty())) { @@ -2136,6 +2919,8 @@ int mlir::pto::compilePTOASModule( // or an `arith.select` chain (dynamic slot). The multi-address cast // produced by PlanMemory survives as the alloc anchor. pm.addPass(pto::createPTOResolveBufferSelectPass()); + if (effectiveBackend == PTOBackend::EmitC) + pm.addPass(createNarrowUnusedMultiResultProvenancePass()); if (emitMlirIR) { if (failed(pm.run(*module))) { @@ -2157,6 +2942,8 @@ int mlir::pto::compilePTOASModule( // materialized tile-native handles, so helper arguments are restored to the // tile_buf ABI before qk.as_ptr()-style bridges are cloned into callers. pm.addPass(pto::createPTOInlineBackendHelpersPass()); + if (effectiveBackend == PTOBackend::EmitC) + pm.addPass(createNarrowUnusedMultiResultProvenancePass()); pm.addPass(createCanonicalizerPass()); pm.addPass(createCSEPass()); if (failed(applyConfiguredPassManagerCLOptions(pm, "main PTOAS pipeline"))) @@ -2168,8 +2955,10 @@ int mlir::pto::compilePTOASModule( return 1; } - if (ptoPrintSeamIR) - printSharedPreBackendSeamIR(*module); + if (ptoPrintSeamIR) { + module->print(llvm::errs()); + llvm::errs() << "\n"; + } if (failed(emitSharedPreBackendSeamIR(*module, ptoSeamIRFile))) return 1; @@ -2189,6 +2978,9 @@ int mlir::pto::compilePTOASModule( if (failed(emitSharedPreBackendSeamIR(*module, ptoSeamIRFile))) return 1; + narrowUnusedMultiResultProvenanceLocs(module.get()); + splitDerivedSingleResultProvenanceLocs(module.get()); + PassManager emitcPM(module->getContext()); emitcPM.enableVerifier(); if (arch == "a3") { @@ -2207,6 +2999,8 @@ int mlir::pto::compilePTOASModule( return 1; } + applyFunctionBlockArgNameHintsToEmitC(*module, functionBlockArgHints); + splitDerivedSingleResultProvenanceLocs(module.get()); dropEmptyEmitCExpressions(module.get()); materializeControlFlowOperands(module.get()); normalizeEmitCIntegerAttrsForCppEmission(module.get()); @@ -2214,6 +3008,7 @@ int mlir::pto::compilePTOASModule( llvm::errs() << "Error: Failed to order emitted functions for C++ emission.\n"; return 1; } + annotateEmitCProvenanceHints(*module); // Emit C++ to string, then post-process, then write to output file. std::string cppOutput; @@ -2238,6 +3033,7 @@ int mlir::pto::compilePTOASModule( rewriteMalformedVerbatimSemicolons(cppOutput); rewriteScalarConstantDecls(cppOutput); rewriteHoistedGlobalTensorDecls(cppOutput); + rewriteNameHintMarkers(cppOutput); result.kind = PTOASCompileResultKind::Text; result.textOutput = std::move(cppOutput); diff --git a/tools/ptoas/ptoas.h b/tools/ptoas/ptoas.h index 9578a5d4f1..6aedbd51c6 100644 --- a/tools/ptoas/ptoas.h +++ b/tools/ptoas/ptoas.h @@ -21,6 +21,7 @@ #include namespace mlir { +class AsmParserState; class DialectRegistry; class MLIRContext; } // namespace mlir @@ -124,6 +125,13 @@ void registerPTOASDialects(DialectRegistry ®istry); void registerPTOASPassesAndCLOptions(); void loadPTOASDialects(MLIRContext &context); +// Attach textual-.pto SSA name hints (function args, block args, op results) +// to the parsed module's Locations as debug metadata. Called by the driver +// right after parsing a textual .pto input so the names survive lowering. +// No-op for non-textual (PTOBC) inputs or modules without recoverable names. +void applyTextualNameHintsToModule(ModuleOp module, + const AsmParserState &parserState); + } // namespace mlir::pto #endif