From 5637310557a3f577d12dfe4783703092f425c5cf Mon Sep 17 00:00:00 2001 From: RuoyuZhou Date: Wed, 1 Jul 2026 12:57:09 +0800 Subject: [PATCH 1/4] Encode PTO v0.57 tile blocks in the Linx compiler The compiler now exposes the v0.57 split tile descriptor model, updated PTO intrinsics, BSTART class handling, and lowering/printing paths needed by SuperNPUBench and the runtime probes. Constraint: v0.57 uses B.ITP for sources and B.OTA for destinations instead of combined B.IOT descriptors Rejected: Keep TSTORE_FP and similar forms as independent opcode ids | equivalent forms are selected by form bits under one encoding row Confidence: high Scope-risk: broad Directive: Do not reintroduce encoding for sync, communication, pipe lifecycle, or PTO IR-only operations in the hardware block map Tested: ninja -C compiler/llvm/build-linxisa-clang clang llc -j8 Tested: llc/FileCheck compiler/llvm/llvm/test/CodeGen/LinxISA/v057-fixp-tinsert-ttrans.ll Tested: SuperNPUBench compile_all.sh all generated 49 LinxISA and 56 PTO ISA ELFs --- clang/include/clang/Basic/BuiltinsLinxISA.td | 5 + clang/include/clang/Options/Options.td | 4 + clang/lib/Basic/Targets/LinxISA.cpp | 9 + clang/lib/CodeGen/TargetBuiltins/LinxISA.cpp | 82 ++ clang/lib/Driver/Driver.cpp | 4 + clang/lib/Driver/ToolChains/Clang.cpp | 1 + clang/test/CodeGen/linxisa-builtins.c | 15 + clang/test/Driver/linx-mlxbc.c | 16 + llvm/include/llvm/IR/IntrinsicsLinx.td | 91 +- .../LinxISA/AsmParser/LinxISAAsmParser.cpp | 607 ++++++++----- .../Disassembler/LinxISADisassembler.cpp | 6 + llvm/lib/Target/LinxISA/LinxISABlockify.cpp | 811 +++++++++--------- .../Target/LinxISA/LinxISAISelDAGToDAG.cpp | 459 ++++++++-- .../Target/LinxISA/LinxISAISelLowering.cpp | 3 + llvm/lib/Target/LinxISA/LinxISAInstrInfo.td | 81 +- .../lib/Target/LinxISA/LinxISAMCInstLower.cpp | 69 +- .../Target/LinxISA/LinxISARegisterInfo.cpp | 4 +- .../Target/LinxISA/LinxISATileSSABalance.cpp | 65 +- .../MCTargetDesc/LinxISAInstPrinter.cpp | 410 +++++---- .../LinxISA/MCTargetDesc/linxisa_opcodes.c | 39 +- .../LinxISA/v057-fixp-tinsert-ttrans.ll | 55 ++ .../CodeGen/LinxISA/vpar-tile-binop-body.ll | 42 + 22 files changed, 1899 insertions(+), 979 deletions(-) create mode 100644 clang/test/Driver/linx-mlxbc.c create mode 100644 llvm/test/CodeGen/LinxISA/v057-fixp-tinsert-ttrans.ll create mode 100644 llvm/test/CodeGen/LinxISA/vpar-tile-binop-body.ll diff --git a/clang/include/clang/Basic/BuiltinsLinxISA.td b/clang/include/clang/Basic/BuiltinsLinxISA.td index 99aacbde599e0..9ed85de9106b7 100644 --- a/clang/include/clang/Basic/BuiltinsLinxISA.td +++ b/clang/include/clang/Basic/BuiltinsLinxISA.td @@ -23,11 +23,16 @@ let Attributes = [NoThrow] in { // PR4/PR5 strict tile surfaces. def tile_tload : LinxISABuiltin<"_Vector<1024, int>(void const *, _Constant unsigned int, _Constant unsigned int, _Constant long long int, _Constant long long int, _Constant long long int, _Constant long long int)">; def tile_tstore : LinxISABuiltin<"void(void *, _Vector<1024, int>, _Constant unsigned int, _Constant unsigned int, _Constant long long int, _Constant long long int, _Constant long long int, _Constant long long int)">; + def tile_mgather : LinxISABuiltin<"_Vector<1024, int>(void const *, _Vector<1024, int>, _Constant unsigned int, _Constant unsigned int, _Constant long long int, _Constant long long int, _Constant long long int, _Constant long long int)">; + def tile_mscatter : LinxISABuiltin<"void(void *, _Vector<1024, int>, _Vector<1024, int>, _Constant unsigned int, _Constant unsigned int, _Constant long long int, _Constant long long int, _Constant long long int, _Constant long long int)">; def tile_tmov : LinxISABuiltin<"_Vector<1024, int>(_Vector<1024, int>, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int, _Constant long long int, _Constant unsigned int)">; + def tile_tinsert : LinxISABuiltin<"_Vector<1024, int>(_Vector<1024, int>, _Vector<1024, int>, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int, long long int)">; + def tile_ttrans : LinxISABuiltin<"_Vector<1024, int>(_Vector<1024, int>, _Vector<1024, int>, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int)">; def cube_acccvt : LinxISABuiltin<"_Vector<1024, int>(_Vector<1024, int>, _Constant unsigned int, _Constant unsigned int, _Constant long long int, _Constant long long int)">; def tepl_unary : LinxISABuiltin<"_Vector<1024, int>(_Vector<1024, int>, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int)">; def tepl_binary : LinxISABuiltin<"_Vector<1024, int>(_Vector<1024, int>, _Vector<1024, int>, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int)">; + def tepl_ternary : LinxISABuiltin<"_Vector<1024, int>(_Vector<1024, int>, _Vector<1024, int>, _Vector<1024, int>, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int)">; def tepl_binary_scalar : LinxISABuiltin<"_Vector<1024, int>(_Vector<1024, int>, long long int, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int)">; def tepl_splat : LinxISABuiltin<"_Vector<1024, int>(long long int, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int, _Constant unsigned int)">; diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index a08100ef9d0cd..69e2e047107f5 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -5095,6 +5095,10 @@ def mqdsp6_compat : Flag<["-"], "mqdsp6-compat">, Group, MarshallingInfoFlag>; def m64 : Flag<["-"], "m64">, Group, Flags<[NoXarchOption]>, Visibility<[ClangOption, CLOption, DXCOption, FlangOption]>; +def mlxbc : Flag<["-"], "mlxbc">, Group, + Flags<[NoXarchOption, TargetSpecific]>, + Visibility<[ClangOption, CLOption]>, + HelpText<"Select the Linx block ISA bare-metal target">; def maix64 : Flag<["-"], "maix64">, Group, Flags<[NoXarchOption]>, Visibility<[FlangOption]>; def mx32 : Flag<["-"], "mx32">, Group, Flags<[NoXarchOption]>, diff --git a/clang/lib/Basic/Targets/LinxISA.cpp b/clang/lib/Basic/Targets/LinxISA.cpp index eb45d52055721..3f612c4dba033 100644 --- a/clang/lib/Basic/Targets/LinxISA.cpp +++ b/clang/lib/Basic/Targets/LinxISA.cpp @@ -73,8 +73,17 @@ bool LinxISATargetInfo::validateAsmConstraint( // e - even register (for paired registers) // z - zero register (r0) // Z - first special register (ra/r10) + // Tr - tile register switch (Name[0]) { + case 'T': { + if (Name[1] == 'r') { + Info.setAllowsRegister(); + Name++; + return true; + } + return false; + } case 'r': { // General purpose register Info.setAllowsRegister(); diff --git a/clang/lib/CodeGen/TargetBuiltins/LinxISA.cpp b/clang/lib/CodeGen/TargetBuiltins/LinxISA.cpp index aaf482fe30638..1637dc70793aa 100644 --- a/clang/lib/CodeGen/TargetBuiltins/LinxISA.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/LinxISA.cpp @@ -57,6 +57,42 @@ llvm::Value *CodeGenFunction::EmitLinxISABuiltinExpr(unsigned BuiltinID, {Base, Tile, DType, Layout, LB0, LB1, Size, Stride}); } + case LinxISA::BI__builtin_linx_tile_mgather: { + llvm::Value *Base = EmitScalarExpr(E->getArg(0)); + llvm::Value *Index = EmitScalarExpr(E->getArg(1)); + llvm::Value *Size = castToI32(EmitScalarExpr(E->getArg(2))); + llvm::Value *DType = castToI32(EmitScalarExpr(E->getArg(3))); + llvm::Value *Layout = castToI32(EmitScalarExpr(E->getArg(4))); + llvm::Value *LB0 = castToI32(EmitScalarExpr(E->getArg(5))); + llvm::Value *LB1 = castToI32(EmitScalarExpr(E->getArg(6))); + llvm::Value *Stride = castToI64(EmitScalarExpr(E->getArg(7))); + + llvm::Type *TileTy = ConvertType(E->getType()); + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::linx_tma_mgather_desc, {TileTy}); + return Builder.CreateCall(F, + {Base, Index, DType, Layout, LB0, LB1, Size, + Stride}, + "linx.tile.mgather"); + } + case LinxISA::BI__builtin_linx_tile_mscatter: { + llvm::Value *Base = EmitScalarExpr(E->getArg(0)); + llvm::Value *Tile = EmitScalarExpr(E->getArg(1)); + llvm::Value *Index = EmitScalarExpr(E->getArg(2)); + llvm::Value *Size = castToI32(EmitScalarExpr(E->getArg(3))); + llvm::Value *DType = castToI32(EmitScalarExpr(E->getArg(4))); + llvm::Value *Layout = castToI32(EmitScalarExpr(E->getArg(5))); + llvm::Value *LB0 = castToI32(EmitScalarExpr(E->getArg(6))); + llvm::Value *LB1 = castToI32(EmitScalarExpr(E->getArg(7))); + llvm::Value *Stride = castToI64(EmitScalarExpr(E->getArg(8))); + + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::linx_tma_mscatter_desc, + {Tile->getType()}); + return Builder.CreateCall(F, + {Base, Tile, Index, DType, Layout, LB0, LB1, + Size, Stride}); + } case LinxISA::BI__builtin_linx_tile_tmov: { llvm::Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Value *Mode = castToI32(EmitScalarExpr(E->getArg(1))); @@ -73,6 +109,39 @@ llvm::Value *CodeGenFunction::EmitLinxISABuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Src, Mode, Size, DType, Layout, HasLayout}, "linx.tile.tmov"); } + case LinxISA::BI__builtin_linx_tile_tinsert: { + llvm::Value *Dst = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src = EmitScalarExpr(E->getArg(1)); + llvm::Value *Size = castToI32(EmitScalarExpr(E->getArg(2))); + llvm::Value *DType = castToI32(EmitScalarExpr(E->getArg(3))); + llvm::Value *DstRows = castToI32(EmitScalarExpr(E->getArg(4))); + llvm::Value *DstCols = castToI32(EmitScalarExpr(E->getArg(5))); + llvm::Value *SrcRows = castToI32(EmitScalarExpr(E->getArg(6))); + llvm::Value *SrcCols = castToI32(EmitScalarExpr(E->getArg(7))); + llvm::Value *Meta = castToI64(EmitScalarExpr(E->getArg(8))); + + llvm::Function *F = CGM.getIntrinsic( + llvm::Intrinsic::linx_tile_tinsert_legacy, {Dst->getType()}); + return Builder.CreateCall(F, {Dst, Src, Size, DType, DstRows, DstCols, + SrcRows, SrcCols, Meta}, + "linx.tile.tinsert"); + } + case LinxISA::BI__builtin_linx_tile_ttrans: { + llvm::Value *Src = EmitScalarExpr(E->getArg(0)); + llvm::Value *Tmp = EmitScalarExpr(E->getArg(1)); + llvm::Value *Size = castToI32(EmitScalarExpr(E->getArg(2))); + llvm::Value *DType = castToI32(EmitScalarExpr(E->getArg(3))); + llvm::Value *DstRows = castToI32(EmitScalarExpr(E->getArg(4))); + llvm::Value *DstCols = castToI32(EmitScalarExpr(E->getArg(5))); + llvm::Value *SrcRows = castToI32(EmitScalarExpr(E->getArg(6))); + llvm::Value *SrcCols = castToI32(EmitScalarExpr(E->getArg(7))); + + llvm::Function *F = CGM.getIntrinsic( + llvm::Intrinsic::linx_tile_ttrans_legacy, {Src->getType()}); + return Builder.CreateCall(F, {Src, Tmp, Size, DType, DstRows, DstCols, + SrcRows, SrcCols}, + "linx.tile.ttrans"); + } case LinxISA::BI__builtin_linx_cube_mamulb: { llvm::Value *A = EmitScalarExpr(E->getArg(0)); llvm::Value *B = EmitScalarExpr(E->getArg(1)); @@ -130,6 +199,19 @@ llvm::Value *CodeGenFunction::EmitLinxISABuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {A, B, TileOp, Size, DType}, "linx.tepl.b"); } + case LinxISA::BI__builtin_linx_tepl_ternary: { + llvm::Value *A = EmitScalarExpr(E->getArg(0)); + llvm::Value *B = EmitScalarExpr(E->getArg(1)); + llvm::Value *C = EmitScalarExpr(E->getArg(2)); + llvm::Value *TileOp = castToI32(EmitScalarExpr(E->getArg(3))); + llvm::Value *Size = castToI32(EmitScalarExpr(E->getArg(4))); + llvm::Value *DType = castToI32(EmitScalarExpr(E->getArg(5))); + + llvm::Function *F = CGM.getIntrinsic( + llvm::Intrinsic::linx_tepl_ternary_legacy, {A->getType()}); + return Builder.CreateCall(F, {A, B, C, TileOp, Size, DType}, + "linx.tepl.t"); + } case LinxISA::BI__builtin_linx_tepl_binary_scalar: { llvm::Value *A = EmitScalarExpr(E->getArg(0)); llvm::Value *Scalar = castToI64(EmitScalarExpr(E->getArg(1))); diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index eb3f9cbea2845..a9c95eef480a1 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -615,6 +615,10 @@ static llvm::Triple computeTargetTriple(const Driver &D, // FIXME: Already done in Compilation *Driver::BuildCompilation if (const Arg *A = Args.getLastArg(options::OPT_target)) TargetTriple = A->getValue(); + else if (Arg *A = Args.getLastArg(options::OPT_mlxbc)) { + TargetTriple = "linx64-linx-none-elf"; + A->claim(); + } llvm::Triple Target(llvm::Triple::normalize(TargetTriple)); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 5147d95506a71..5787784f3388e 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1781,6 +1781,7 @@ void Clang::AddLinxISATargetArgs(const ArgList &Args, // ToolChains/Arch/LinxISA.cpp. (void)Args.getLastArg(options::OPT_mcpu_EQ); (void)Args.getLastArg(options::OPT_march_EQ); + (void)Args.getLastArg(options::OPT_mlxbc); if (const Arg *A = Args.getLastArg(options::OPT_mtune_EQ)) { CmdArgs.push_back("-tune-cpu"); diff --git a/clang/test/CodeGen/linxisa-builtins.c b/clang/test/CodeGen/linxisa-builtins.c index b284c5e6ec426..ea5be2d783aed 100644 --- a/clang/test/CodeGen/linxisa-builtins.c +++ b/clang/test/CodeGen/linxisa-builtins.c @@ -30,6 +30,21 @@ tile_i32 test_tile_tmov(tile_i32 src) { // CHECK-LABEL: define{{.*}} <1024 x i32> @test_tile_tmov // CHECK: call <1024 x i32> @llvm.linx.tile.tmov.legacy +tile_i32 test_tile_tinsert(tile_i32 dst, tile_i32 src, long long meta) { + return __builtin_linx_tile_tinsert(dst, src, 8u, 1u, 16u, 32u, 16u, 8u, + meta); +} + +// CHECK-LABEL: define{{.*}} <1024 x i32> @test_tile_tinsert +// CHECK: call <1024 x i32> @llvm.linx.tile.tinsert.legacy + +tile_i32 test_tile_ttrans(tile_i32 src, tile_i32 tmp) { + return __builtin_linx_tile_ttrans(src, tmp, 8u, 1u, 32u, 16u, 16u, 32u); +} + +// CHECK-LABEL: define{{.*}} <1024 x i32> @test_tile_ttrans +// CHECK: call <1024 x i32> @llvm.linx.tile.ttrans.legacy + tile_i32 test_cube_mamulb(tile_i32 a, tile_i32 b) { return __builtin_linx_cube_mamulb(a, b, 4u, 4u, 4u); } diff --git a/clang/test/Driver/linx-mlxbc.c b/clang/test/Driver/linx-mlxbc.c new file mode 100644 index 0000000000000..8719e72fa57c3 --- /dev/null +++ b/clang/test/Driver/linx-mlxbc.c @@ -0,0 +1,16 @@ +// Verify the legacy Linx benchmark driver flag selects the v0.57 Linx target. +// +// RUN: %clang -mlxbc -### -c %s 2>&1 | FileCheck %s --check-prefix=MLXBC +// RUN: %clang -mlxbc --target=linx32-unknown-linux-gnu -### -c %s 2>&1 | FileCheck %s --check-prefix=EXPLICIT + +// MLXBC: Target: linx64-linx-none-elf +// MLXBC: "-triple" "linx64-linx-none-elf" +// MLXBC: "-target-feature" "+lnx-s32" +// MLXBC: "-target-feature" "+lnx-s64" + +// EXPLICIT: Target: linx32-unknown-linux-gnu +// EXPLICIT: "-triple" "linx32-unknown-linux-gnu" +// EXPLICIT: "-target-feature" "+lnx-s32" +// EXPLICIT-NOT: "-target-feature" "+lnx-s64" + +int linx_mlxbc_driver_test(void) { return 0; } diff --git a/llvm/include/llvm/IR/IntrinsicsLinx.td b/llvm/include/llvm/IR/IntrinsicsLinx.td index 3944ce03f6d4a..c71ac69966147 100644 --- a/llvm/include/llvm/IR/IntrinsicsLinx.td +++ b/llvm/include/llvm/IR/IntrinsicsLinx.td @@ -15,7 +15,7 @@ let TargetPrefix = "linx" in { // ------------------------------------------------------------------------- // Tile block intrinsics (TAU) - PR4 surface // - // Preferred strict-v0.3 frontend surface: + // Preferred LinxISA v0.57 frontend surface: // llvm.linx.tile.tload/tstore/tmov // // Type contract: @@ -71,6 +71,35 @@ let TargetPrefix = "linx" in { ImmArg>, ImmArg>, ImmArg>]>; + // %linx.tile @llvm.linx.tile.tinsert(%linx.tile dst, %linx.tile src, + // i32 immSizeCode, i32 immDType, + // i32 immDstRows, i32 immDstCols, + // i32 immSrcRows, i32 immSrcCols, + // i64 meta) + def int_linx_tile_tinsert : + Intrinsic<[llvm_linx_tile_ty], + [llvm_linx_tile_ty, llvm_linx_tile_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], + [IntrNoMem, IntrHasSideEffects, + ImmArg>, ImmArg>, + ImmArg>, ImmArg>, + ImmArg>, ImmArg>]>; + + // %linx.tile @llvm.linx.tile.ttrans(%linx.tile src, %linx.tile tmp, + // i32 immSizeCode, i32 immDType, + // i32 immDstRows, i32 immDstCols, + // i32 immSrcRows, i32 immSrcCols) + def int_linx_tile_ttrans : + Intrinsic<[llvm_linx_tile_ty], + [llvm_linx_tile_ty, llvm_linx_tile_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, + ImmArg>, ImmArg>, + ImmArg>, ImmArg>, + ImmArg>, ImmArg>]>; + // %linx.tile @llvm.linx.cube.mamulb(%linx.tile a, %linx.tile b, // i32 immM, i32 immN, i32 immK) def int_linx_cube_mamulb : @@ -139,11 +168,22 @@ let TargetPrefix = "linx" in { ImmArg>, ImmArg>, ImmArg>]>; + // %linx.tile @llvm.linx.tepl.ternary(%linx.tile a, %linx.tile b, + // %linx.tile c, i32 immTileOp10, + // i32 immSizeCode, i32 immDType) + def int_linx_tepl_ternary : + Intrinsic<[llvm_linx_tile_ty], + [llvm_linx_tile_ty, llvm_linx_tile_ty, llvm_linx_tile_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, + ImmArg>, ImmArg>, + ImmArg>]>; + // %linx.tile @llvm.linx.tepl.binary.scalar(%linx.tile a, i64 scalar, // i32 immTileOp10, i32 immSizeCode, // i32 immDType, i32 immMode) // - // Scalar TEPL mode extension for strict-v0.3: + // Scalar TEPL mode extension for LinxISA v0.57: // - mode 1 = VS (tile,scalar) def int_linx_tepl_binary_scalar : Intrinsic<[llvm_linx_tile_ty], @@ -157,7 +197,7 @@ let TargetPrefix = "linx" in { // i32 immSizeCode, i32 immDType, // i32 immMode) // - // Scalar TEPL mode extension for strict-v0.3: + // Scalar TEPL mode extension for LinxISA v0.57: // - mode 2 = SV (scalar->tile splat) def int_linx_tepl_splat : Intrinsic<[llvm_linx_tile_ty], @@ -242,7 +282,7 @@ let TargetPrefix = "linx" in { // Bring-up contract: // - Executed as a decoupled BSTART.VPAR header that replays a single-lane // vector body over LB0/LB1 dimensions. - // - Tile operands are bound via B.IOTI descriptors and accessed through TA/TB/TO. + // - Tile operands are bound via B.ITP/B.OTA descriptors and accessed through TA/TB/TO. // - immSizeCode uses the same 2^(SizeCode+4) convention as TLOAD/TSTORE. // ------------------------------------------------------------------------- @@ -284,6 +324,25 @@ let TargetPrefix = "linx" in { ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; + def int_linx_tma_mgather_desc : + Intrinsic<[llvm_anyvector_ty], + [llvm_ptr_ty, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i64_ty], + [IntrReadMem, IntrArgMemOnly, + NoCapture>, ImmArg>, + ImmArg>, ImmArg>, + ImmArg>, ImmArg>, + ImmArg>]>; + def int_linx_tma_mscatter_desc : + Intrinsic<[], + [llvm_ptr_ty, llvm_anyvector_ty, LLVMMatchType<0>, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly, + NoCapture>, ImmArg>, + ImmArg>, ImmArg>, + ImmArg>, ImmArg>, + ImmArg>]>; // Legacy vector TMOV bridge. def int_linx_tile_tmov_legacy : @@ -294,6 +353,23 @@ let TargetPrefix = "linx" in { ImmArg>, ImmArg>, ImmArg>, ImmArg>, ImmArg>]>; + def int_linx_tile_tinsert_legacy : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_i64_ty], + [IntrNoMem, IntrHasSideEffects, + ImmArg>, ImmArg>, + ImmArg>, ImmArg>, + ImmArg>, ImmArg>]>; + def int_linx_tile_ttrans_legacy : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, + ImmArg>, ImmArg>, + ImmArg>, ImmArg>, + ImmArg>, ImmArg>]>; // Legacy CUBE forms. def int_linx_cube_mamulb_legacy : @@ -332,6 +408,13 @@ let TargetPrefix = "linx" in { [IntrNoMem, IntrHasSideEffects, ImmArg>, ImmArg>, ImmArg>]>; + def int_linx_tepl_ternary_legacy : + Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrHasSideEffects, + ImmArg>, ImmArg>, + ImmArg>]>; def int_linx_tepl_binary_scalar_legacy : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty, diff --git a/llvm/lib/Target/LinxISA/AsmParser/LinxISAAsmParser.cpp b/llvm/lib/Target/LinxISA/AsmParser/LinxISAAsmParser.cpp index 126e15f46bafe..dd9608f47e31a 100644 --- a/llvm/lib/Target/LinxISA/AsmParser/LinxISAAsmParser.cpp +++ b/llvm/lib/Target/LinxISA/AsmParser/LinxISAAsmParser.cpp @@ -185,7 +185,8 @@ static std::optional parseRegCode(StringRef Name) { .Default(std::nullopt); } -static std::optional parseTileRef(StringRef Name, bool &Reuse) { +static std::optional parseTileRef(StringRef Name, bool &Reuse, + bool V057Namespace = false) { Reuse = false; StringRef N = Name.trim(); StringRef Base = N; @@ -204,6 +205,16 @@ static std::optional parseTileRef(StringRef Name, bool &Reuse) { } } + if (Base.equals_insensitive("TZERO")) + return 0u; + + if (Base.consume_front_insensitive("tile#")) { + unsigned Tile = 0; + if (Base.getAsInteger(10, Tile) || Tile > 255u) + return std::nullopt; + return Tile; + } + if (Base.size() < 3) return std::nullopt; const char HandChar = @@ -233,7 +244,8 @@ static std::optional parseTileRef(StringRef Name, bool &Reuse) { StringRef Tail = Base.drop_front(2); if (Tail.getAsInteger(10, Depth) || Depth == 0 || Depth > 8) return std::nullopt; - return (Hand << 3) | ((Depth - 1u) & 0x7u); + const unsigned Compact = (Hand << 3) | ((Depth - 1u) & 0x7u); + return V057Namespace ? Compact + 1u : Compact; } static std::optional parseDataTypeKeyword(StringRef Name) { @@ -273,71 +285,151 @@ static std::optional parseDataTypeKeyword(StringRef Name) { static std::optional parseTMAFunctionKeyword(StringRef Name) { const std::string Up = toUpperStr(Name.trim()); return StringSwitch>(Up) - .Case("TLOAD", 0u) - .Case("TSTORE", 1u) - .Case("TMOV", 2u) + .Case("MGATHER", 0u) + .Case("MSCATTER", 1u) + .Case("TLOAD", 2u) + .Case("TPREFETCH", 3u) + .Case("TPREFETCH_ASYNC", 3u) + .Case("TSTORE", 4u) + .Case("TSTORE_FP", 4u) .Default(std::nullopt); } static std::optional parseCubeFunctionKeyword(StringRef Name) { const std::string Up = toUpperStr(Name.trim()); return StringSwitch>(Up) - .Case("MAMULB", 0u) - .Case("TMATMUL", 0u) - .Case("MAMULB.ACC", 2u) - .Case("TMATMUL.ACC", 2u) - .Case("ACCCVT", 8u) + .Case("TGEMV", 0u) + .Case("TGEMV.ACC", 0u) + .Case("TGEMV.BIAS", 0u) + .Case("TGEMV.MX", 0u) + .Case("TGEMV.MX.ACC", 0u) + .Case("TGEMV.MX.BIAS", 0u) + .Case("TMATMUL", 1u) + .Case("TMATMUL.ACC", 1u) + .Case("TMATMUL.BIAS", 1u) + .Case("TMATMUL.MX", 1u) + .Case("TMATMUL.MX.ACC", 1u) + .Case("TMATMUL.MX.BIAS", 1u) + .Default(std::nullopt); +} + +static std::optional parseFIXPFunctionKeyword(StringRef Name) { + const std::string Up = toUpperStr(Name.trim()); + return StringSwitch>(Up) + .Case("TDEQUANT", 0u) + .Case("TEXTRACT", 1u) + .Case("TEXTRACT_FP", 1u) + .Case("TINSERT", 2u) + .Case("TINSERT_FP", 2u) + .Case("TMOV", 3u) + .Case("TMOV.FP", 3u) + .Case("TMOV_FP", 3u) + .Case("TCONCAT", 4u) + .Case("TCONCATIDX", 4u) + .Case("TFILLPAD", 5u) + .Case("TFILLPAD_EXPAND", 5u) + .Case("TFILLPAD_INPLACE", 5u) + .Case("TQUANT", 6u) + .Case("TTRANS", 7u) .Default(std::nullopt); } static std::optional parseTEPLTileOpcodeKeyword(StringRef Name) { const std::string Up = toUpperStr(Name.trim()); return StringSwitch>(Up) - // Mode 0: base TEPL function set. - .Case("TADD", 0x000u) - .Case("TSUB", 0x001u) - .Case("TMUL", 0x002u) - .Case("TDIV", 0x003u) - .Case("TMAX", 0x004u) - .Case("TMIN", 0x005u) - .Case("TAND", 0x006u) - .Case("TOR", 0x007u) - .Case("TXOR", 0x008u) - .Case("TSHL", 0x009u) - .Case("TSHR", 0x00au) - .Case("TRELU", 0x00bu) - .Case("TPRELU", 0x00cu) - .Case("TCVT", 0x00du) - .Case("TEXP", 0x00eu) - .Case("TLOG", 0x00fu) - .Case("TSQRT", 0x010u) - .Case("TRSQRT", 0x011u) - .Case("TROWMAX", 0x012u) - .Case("TROWMIN", 0x013u) - .Case("TROWSUM", 0x014u) + // LinxISA v0.57 TEPL tile-op allocation. + .Case("TABS", 0x000u) + .Case("TADD", 0x001u) + .Case("TADDC", 0x002u) + .Case("TADDS", 0x003u) + .Case("TADDSC", 0x004u) + .Case("TAND", 0x005u) + .Case("TANDS", 0x006u) + .Case("TAXPY", 0x007u) + .Case("TCI", 0x008u) + .Case("TCMP", 0x009u) + .Case("TCMPS", 0x00au) + .Case("TCOLARGMAX", 0x00bu) + .Case("TCOLARGMIN", 0x00cu) + .Case("TCOLEXPAND", 0x00du) + .Case("TCOLEXPANDADD", 0x00eu) + .Case("TCOLEXPANDDIV", 0x00fu) + .Case("TCOLEXPANDEXPDIF", 0x010u) + .Case("TCOLEXPANDMAX", 0x011u) + .Case("TCOLEXPANDMIN", 0x012u) + .Case("TCOLEXPANDMUL", 0x013u) + .Case("TCOLEXPANDSUB", 0x014u) .Case("TCOLMAX", 0x015u) .Case("TCOLMIN", 0x016u) - .Case("TCOLSUM", 0x017u) - .Case("TRECIP", 0x018u) - .Case("TEXPANDS", 0x019u) - .Case("TGATHER", 0x01au) - .Case("TSCATTER", 0x01bu) - .Case("TRESHAPE", 0x01cu) - .Case("TTRANSPOSE", 0x01du) - .Case("TCOLEXPAND", 0x01eu) - .Case("TROWEXPAND", 0x01fu) - // Mode 1: scalar-RHS extension window. - .Case("TADDS", 0x020u) - .Case("TSUBS", 0x021u) - .Case("TMULS", 0x022u) - .Case("TDIVS", 0x023u) - .Case("TMAXS", 0x024u) - .Case("TMINS", 0x025u) - .Case("TANDS", 0x026u) - .Case("TORS", 0x027u) - .Case("TXORS", 0x028u) - .Case("TSHLS", 0x029u) - .Case("TSHRS", 0x02au) + .Case("TCOLPROD", 0x017u) + .Case("TCOLSUM", 0x018u) + .Case("TCVT", 0x019u) + .Case("TDIV", 0x01au) + .Case("TDIVS", 0x01bu) + .Case("TEXP", 0x01cu) + .Case("TEXPANDS", 0x01du) + .Case("TFMOD", 0x01eu) + .Case("TFMODS", 0x01fu) + .Case("TGATHER", 0x020u) + .Case("TGATHERB", 0x021u) + .Case("THISTOGRAM", 0x022u) + .Case("TLOG", 0x023u) + .Case("TLRELU", 0x024u) + .Case("TMAX", 0x025u) + .Case("TMAXS", 0x026u) + .Case("TMIN", 0x027u) + .Case("TMINS", 0x028u) + .Case("TMRGSORT", 0x029u) + .Case("TMUL", 0x02au) + .Case("TMULS", 0x02bu) + .Case("TNEG", 0x02cu) + .Case("TNOT", 0x02du) + .Case("TOR", 0x02eu) + .Case("TORS", 0x02fu) + .Case("TPARTADD", 0x030u) + .Case("TPARTARGMAX", 0x031u) + .Case("TPARTARGMIN", 0x032u) + .Case("TPARTMAX", 0x033u) + .Case("TPARTMIN", 0x034u) + .Case("TPARTMUL", 0x035u) + .Case("TPOW", 0x036u) + .Case("TPRELU", 0x037u) + .Case("TRANDOM", 0x038u) + .Case("TRECIP", 0x039u) + .Case("TRELU", 0x03au) + .Case("TREM", 0x03bu) + .Case("TREMS", 0x03cu) + .Case("TROWARGMAX", 0x03du) + .Case("TROWARGMIN", 0x03eu) + .Case("TROWEXPAND", 0x03fu) + .Case("TROWEXPANDADD", 0x040u) + .Case("TROWEXPANDDIV", 0x041u) + .Case("TROWEXPANDEXPDIF", 0x042u) + .Case("TROWEXPANDMAX", 0x043u) + .Case("TROWEXPANDMIN", 0x044u) + .Case("TROWEXPANDMUL", 0x045u) + .Case("TROWEXPANDSUB", 0x046u) + .Case("TROWMAX", 0x047u) + .Case("TROWMIN", 0x048u) + .Case("TROWPROD", 0x049u) + .Case("TROWSUM", 0x04au) + .Case("TRSQRT", 0x04bu) + .Case("TSCATTER", 0x04cu) + .Case("TSEL", 0x04du) + .Case("TSELS", 0x04eu) + .Case("TSHL", 0x04fu) + .Case("TSHLS", 0x050u) + .Case("TSHR", 0x051u) + .Case("TSHRS", 0x052u) + .Case("TSORT32", 0x053u) + .Case("TSQRT", 0x054u) + .Case("TSUB", 0x055u) + .Case("TSUBC", 0x056u) + .Case("TSUBS", 0x057u) + .Case("TSUBSC", 0x058u) + .Case("TTRI", 0x059u) + .Case("TXOR", 0x05au) + .Case("TXORS", 0x05bu) .Default(std::nullopt); } @@ -348,59 +440,94 @@ struct TileBlockAlias { static std::optional parseTileBlockAliasMnemonic(StringRef Name) { const std::string Up = toUpperStr(Name.trim()); - return StringSwitch>(Up) - .Case("BSTART.TLOAD", TileBlockAlias{"BSTART.TMA", 0u}) - .Case("BSTART.TSTORE", TileBlockAlias{"BSTART.TMA", 1u}) - .Case("BSTART.TMOV", TileBlockAlias{"BSTART.TMA", 2u}) - .Case("BSTART.MAMULB", TileBlockAlias{"BSTART.CUBE", 0u}) - .Case("BSTART.TMATMUL", TileBlockAlias{"BSTART.CUBE", 0u}) - .Case("BSTART.MAMULB.ACC", TileBlockAlias{"BSTART.CUBE", 2u}) - .Case("BSTART.TMATMUL.ACC", TileBlockAlias{"BSTART.CUBE", 2u}) - .Case("BSTART.ACCCVT", TileBlockAlias{"BSTART.CUBE", 8u}) - .Case("BSTART.TADD", TileBlockAlias{"BSTART.TEPL", 0x000u}) - .Case("BSTART.TSUB", TileBlockAlias{"BSTART.TEPL", 0x001u}) - .Case("BSTART.TMUL", TileBlockAlias{"BSTART.TEPL", 0x002u}) - .Case("BSTART.TDIV", TileBlockAlias{"BSTART.TEPL", 0x003u}) - .Case("BSTART.TMAX", TileBlockAlias{"BSTART.TEPL", 0x004u}) - .Case("BSTART.TMIN", TileBlockAlias{"BSTART.TEPL", 0x005u}) - .Case("BSTART.TAND", TileBlockAlias{"BSTART.TEPL", 0x006u}) - .Case("BSTART.TOR", TileBlockAlias{"BSTART.TEPL", 0x007u}) - .Case("BSTART.TXOR", TileBlockAlias{"BSTART.TEPL", 0x008u}) - .Case("BSTART.TSHL", TileBlockAlias{"BSTART.TEPL", 0x009u}) - .Case("BSTART.TSHR", TileBlockAlias{"BSTART.TEPL", 0x00au}) - .Case("BSTART.TRELU", TileBlockAlias{"BSTART.TEPL", 0x00bu}) - .Case("BSTART.TPRELU", TileBlockAlias{"BSTART.TEPL", 0x00cu}) - .Case("BSTART.TCVT", TileBlockAlias{"BSTART.TEPL", 0x00du}) - .Case("BSTART.TEXP", TileBlockAlias{"BSTART.TEPL", 0x00eu}) - .Case("BSTART.TLOG", TileBlockAlias{"BSTART.TEPL", 0x00fu}) - .Case("BSTART.TSQRT", TileBlockAlias{"BSTART.TEPL", 0x010u}) - .Case("BSTART.TRSQRT", TileBlockAlias{"BSTART.TEPL", 0x011u}) - .Case("BSTART.TROWMAX", TileBlockAlias{"BSTART.TEPL", 0x012u}) - .Case("BSTART.TROWMIN", TileBlockAlias{"BSTART.TEPL", 0x013u}) - .Case("BSTART.TROWSUM", TileBlockAlias{"BSTART.TEPL", 0x014u}) + auto Alias = StringSwitch>(Up) + .Case("BSTART.MGATHER", TileBlockAlias{"BSTART.TMA", 0u}) + .Case("BSTART.MSCATTER", TileBlockAlias{"BSTART.TMA", 1u}) + .Case("BSTART.TLOAD", TileBlockAlias{"BSTART.TMA", 2u}) + .Case("BSTART.TPREFETCH", TileBlockAlias{"BSTART.TMA", 3u}) + .Case("BSTART.TSTORE", TileBlockAlias{"BSTART.TMA", 4u}) + .Case("BSTART.TSTORE_FP", TileBlockAlias{"BSTART.TMA", 4u}) + .Case("BSTART.TGEMV", TileBlockAlias{"BSTART.CUBE", 0u}) + .Case("BSTART.TGEMV.ACC", TileBlockAlias{"BSTART.CUBE", 0u}) + .Case("BSTART.TGEMV.BIAS", TileBlockAlias{"BSTART.CUBE", 0u}) + .Case("BSTART.TGEMV.MX", TileBlockAlias{"BSTART.CUBE", 0u}) + .Case("BSTART.TGEMV.MX.ACC", TileBlockAlias{"BSTART.CUBE", 0u}) + .Case("BSTART.TGEMV.MX.BIAS", TileBlockAlias{"BSTART.CUBE", 0u}) + .Case("BSTART.TMATMUL", TileBlockAlias{"BSTART.CUBE", 1u}) + .Case("BSTART.TMATMUL.ACC", TileBlockAlias{"BSTART.CUBE", 1u}) + .Case("BSTART.TMATMUL.BIAS", TileBlockAlias{"BSTART.CUBE", 1u}) + .Case("BSTART.TMATMUL.MX", TileBlockAlias{"BSTART.CUBE", 1u}) + .Case("BSTART.TMATMUL.MX.ACC", TileBlockAlias{"BSTART.CUBE", 1u}) + .Case("BSTART.TMATMUL.MX.BIAS", TileBlockAlias{"BSTART.CUBE", 1u}) + .Case("BSTART.TDEQUANT", TileBlockAlias{"BSTART.FIXP", 0u}) + .Case("BSTART.TEXTRACT", TileBlockAlias{"BSTART.FIXP", 1u}) + .Case("BSTART.TEXTRACT_FP", TileBlockAlias{"BSTART.FIXP", 1u}) + .Case("BSTART.TINSERT", TileBlockAlias{"BSTART.FIXP", 2u}) + .Case("BSTART.TINSERT_FP", TileBlockAlias{"BSTART.FIXP", 2u}) + .Case("BSTART.TMOV", TileBlockAlias{"BSTART.FIXP", 3u}) + .Case("BSTART.TMOV.FP", TileBlockAlias{"BSTART.FIXP", 3u}) + .Case("BSTART.TMOV_FP", TileBlockAlias{"BSTART.FIXP", 3u}) + .Case("BSTART.TCONCAT", TileBlockAlias{"BSTART.FIXP", 4u}) + .Case("BSTART.TCONCATIDX", TileBlockAlias{"BSTART.FIXP", 4u}) + .Case("BSTART.TFILLPAD", TileBlockAlias{"BSTART.FIXP", 5u}) + .Case("BSTART.TFILLPAD_EXPAND", TileBlockAlias{"BSTART.FIXP", 5u}) + .Case("BSTART.TFILLPAD_INPLACE", TileBlockAlias{"BSTART.FIXP", 5u}) + .Case("BSTART.TQUANT", TileBlockAlias{"BSTART.FIXP", 6u}) + .Case("BSTART.TTRANS", TileBlockAlias{"BSTART.FIXP", 7u}) + .Case("BSTART.TADD", TileBlockAlias{"BSTART.TEPL", 0x001u}) + .Case("BSTART.TSUB", TileBlockAlias{"BSTART.TEPL", 0x055u}) + .Case("BSTART.TMUL", TileBlockAlias{"BSTART.TEPL", 0x02au}) + .Case("BSTART.TDIV", TileBlockAlias{"BSTART.TEPL", 0x01au}) + .Case("BSTART.TMAX", TileBlockAlias{"BSTART.TEPL", 0x025u}) + .Case("BSTART.TMIN", TileBlockAlias{"BSTART.TEPL", 0x027u}) + .Case("BSTART.TAND", TileBlockAlias{"BSTART.TEPL", 0x005u}) + .Case("BSTART.TOR", TileBlockAlias{"BSTART.TEPL", 0x02eu}) + .Case("BSTART.TXOR", TileBlockAlias{"BSTART.TEPL", 0x05au}) + .Case("BSTART.TSHL", TileBlockAlias{"BSTART.TEPL", 0x04fu}) + .Case("BSTART.TSHR", TileBlockAlias{"BSTART.TEPL", 0x051u}) + .Case("BSTART.TRELU", TileBlockAlias{"BSTART.TEPL", 0x03au}) + .Case("BSTART.TPRELU", TileBlockAlias{"BSTART.TEPL", 0x037u}) + .Case("BSTART.TCVT", TileBlockAlias{"BSTART.TEPL", 0x019u}) + .Case("BSTART.TEXP", TileBlockAlias{"BSTART.TEPL", 0x01cu}) + .Case("BSTART.TLOG", TileBlockAlias{"BSTART.TEPL", 0x023u}) + .Case("BSTART.TSQRT", TileBlockAlias{"BSTART.TEPL", 0x054u}) + .Case("BSTART.TRSQRT", TileBlockAlias{"BSTART.TEPL", 0x04bu}) + .Case("BSTART.TROWMAX", TileBlockAlias{"BSTART.TEPL", 0x047u}) + .Case("BSTART.TROWMIN", TileBlockAlias{"BSTART.TEPL", 0x048u}) + .Case("BSTART.TROWSUM", TileBlockAlias{"BSTART.TEPL", 0x04au}) .Case("BSTART.TCOLMAX", TileBlockAlias{"BSTART.TEPL", 0x015u}) .Case("BSTART.TCOLMIN", TileBlockAlias{"BSTART.TEPL", 0x016u}) - .Case("BSTART.TCOLSUM", TileBlockAlias{"BSTART.TEPL", 0x017u}) - .Case("BSTART.TRECIP", TileBlockAlias{"BSTART.TEPL", 0x018u}) - .Case("BSTART.TEXPANDS", TileBlockAlias{"BSTART.TEPL", 0x019u}) - .Case("BSTART.TGATHER", TileBlockAlias{"BSTART.TEPL", 0x01au}) - .Case("BSTART.TSCATTER", TileBlockAlias{"BSTART.TEPL", 0x01bu}) - .Case("BSTART.TRESHAPE", TileBlockAlias{"BSTART.TEPL", 0x01cu}) - .Case("BSTART.TTRANSPOSE", TileBlockAlias{"BSTART.TEPL", 0x01du}) - .Case("BSTART.TCOLEXPAND", TileBlockAlias{"BSTART.TEPL", 0x01eu}) - .Case("BSTART.TROWEXPAND", TileBlockAlias{"BSTART.TEPL", 0x01fu}) - .Case("BSTART.TADDS", TileBlockAlias{"BSTART.TEPL", 0x020u}) - .Case("BSTART.TSUBS", TileBlockAlias{"BSTART.TEPL", 0x021u}) - .Case("BSTART.TMULS", TileBlockAlias{"BSTART.TEPL", 0x022u}) - .Case("BSTART.TDIVS", TileBlockAlias{"BSTART.TEPL", 0x023u}) - .Case("BSTART.TMAXS", TileBlockAlias{"BSTART.TEPL", 0x024u}) - .Case("BSTART.TMINS", TileBlockAlias{"BSTART.TEPL", 0x025u}) - .Case("BSTART.TANDS", TileBlockAlias{"BSTART.TEPL", 0x026u}) - .Case("BSTART.TORS", TileBlockAlias{"BSTART.TEPL", 0x027u}) - .Case("BSTART.TXORS", TileBlockAlias{"BSTART.TEPL", 0x028u}) - .Case("BSTART.TSHLS", TileBlockAlias{"BSTART.TEPL", 0x029u}) - .Case("BSTART.TSHRS", TileBlockAlias{"BSTART.TEPL", 0x02au}) + .Case("BSTART.TCOLSUM", TileBlockAlias{"BSTART.TEPL", 0x018u}) + .Case("BSTART.TRECIP", TileBlockAlias{"BSTART.TEPL", 0x039u}) + .Case("BSTART.TEXPANDS", TileBlockAlias{"BSTART.TEPL", 0x01du}) + .Case("BSTART.TGATHER", TileBlockAlias{"BSTART.TEPL", 0x020u}) + .Case("BSTART.TSCATTER", TileBlockAlias{"BSTART.TEPL", 0x04cu}) + .Case("BSTART.TROWEXPAND", TileBlockAlias{"BSTART.TEPL", 0x03fu}) + .Case("BSTART.TADDS", TileBlockAlias{"BSTART.TEPL", 0x003u}) + .Case("BSTART.TSUBS", TileBlockAlias{"BSTART.TEPL", 0x057u}) + .Case("BSTART.TMULS", TileBlockAlias{"BSTART.TEPL", 0x02bu}) + .Case("BSTART.TDIVS", TileBlockAlias{"BSTART.TEPL", 0x01bu}) + .Case("BSTART.TMAXS", TileBlockAlias{"BSTART.TEPL", 0x026u}) + .Case("BSTART.TMINS", TileBlockAlias{"BSTART.TEPL", 0x028u}) + .Case("BSTART.TANDS", TileBlockAlias{"BSTART.TEPL", 0x006u}) + .Case("BSTART.TORS", TileBlockAlias{"BSTART.TEPL", 0x02fu}) + .Case("BSTART.TXORS", TileBlockAlias{"BSTART.TEPL", 0x05bu}) + .Case("BSTART.TSHLS", TileBlockAlias{"BSTART.TEPL", 0x050u}) + .Case("BSTART.TSHRS", TileBlockAlias{"BSTART.TEPL", 0x052u}) + .Case("BSTART.TREM", TileBlockAlias{"BSTART.TEPL", 0x03bu}) + .Case("BSTART.TREMS", TileBlockAlias{"BSTART.TEPL", 0x03cu}) + .Case("BSTART.TSEL", TileBlockAlias{"BSTART.TEPL", 0x04du}) + .Case("BSTART.TCMP", TileBlockAlias{"BSTART.TEPL", 0x009u}) .Default(std::nullopt); + if (Alias) + return Alias; + + StringRef DirectTEPL(Up); + if (DirectTEPL.consume_front("BSTART.")) + if (auto Op = parseTEPLTileOpcodeKeyword(DirectTEPL)) + return TileBlockAlias{"BSTART.TEPL", *Op}; + + return std::nullopt; } static std::optional parseSSRIdName(StringRef Name) { @@ -1468,34 +1595,41 @@ bool LinxISAAsmParser::parseArrowDestOperand(ParsedReg &OutDest) { Lex(); // Optional tile-descriptor angle suffix: - // - `->t` / `->acc` (B.IOTI) - // - `->t` / `->acc` (B.IOT) - // This syntax is used by B.IOT/B.IOTI and is not a normal register operand. + // - `->t#N` / `->acc#N` (B.OTA) + // This syntax is used by tile descriptor forms and is not a normal register + // operand. if (getTok().is(AsmToken::Less)) { - std::string Up = toUpperStr(Base); - unsigned Kind = 0; - if (Up == "T") { - Kind = 0; - } else if (Up == "U") { - Kind = 1; - } else if (Up == "M") { - Kind = 2; - } else if (Up == "N") { - Kind = 3; - } else if (Up == "ACC") { - Kind = 4; + bool Reuse = false; + if (auto Tile = parseTileRef(Base, Reuse, /*V057Namespace=*/true)) { + if (Reuse) + return Error(D.Loc, "destination tile suffix cannot use .reuse"); + D.Code = *Tile & 0xffu; } else { - return Error(D.Loc, - "invalid tile kind for '-><...>' (expected t/u/m/n/acc)"); + std::string Up = toUpperStr(Base); + unsigned Kind = 0; + if (Up == "T") { + Kind = 0; + } else if (Up == "U") { + Kind = 1; + } else if (Up == "M") { + Kind = 2; + } else if (Up == "N") { + Kind = 3; + } else if (Up == "ACC") { + Kind = 4; + } else { + return Error(D.Loc, + "invalid tile destination for '-><...>'"); + } + D.Code = Kind; } - D.Code = Kind; Lex(); // '<' if (getTok().is(AsmToken::Identifier)) { if (auto Reg = parseRegCode(getTok().getString())) { if (*Reg >= 32u) return Error(getTok().getLoc(), - "B.IOT RegSrc must be a scalar 5-bit register"); + "tile descriptor RegSrc must be a scalar 5-bit register"); D.HasAngleReg = true; D.AngleReg = *Reg & 0x1fu; Lex(); @@ -1637,6 +1771,7 @@ bool LinxISAAsmParser::parseInstruction(ParseInstructionInfo &Info, bool AllowMemOperands = false; bool AllowFrameRangeOperands = false; bool IsTileIODesc = false; + bool IsTileInputPairDesc = false; unsigned MaxArrowDests = 0; { std::string Key = toUpperStr(Name); @@ -1700,7 +1835,8 @@ bool LinxISAAsmParser::parseInstruction(ParseInstructionInfo &Info, } StringRef KeyRef(Key); - IsTileIODesc = KeyRef == "B.IOT" || KeyRef == "B.IOTI"; + IsTileInputPairDesc = KeyRef == "B.ITP"; + IsTileIODesc = KeyRef == "B.ITP" || KeyRef == "B.OTA"; } while (!getTok().is(AsmToken::EndOfStatement)) { @@ -1871,11 +2007,15 @@ bool LinxISAAsmParser::parseInstruction(ParseInstructionInfo &Info, // encode `.reuse` inline. if (IsTileIODesc) { bool Reuse = false; - if (auto Tile = parseTileRef(getTok().getString(), Reuse)) { + if (auto Tile = parseTileRef(getTok().getString(), Reuse, + IsTileInputPairDesc)) { SMLoc S = getTok().getLoc(); SMLoc E = getTok().getEndLoc(); Lex(); - const unsigned Enc = (*Tile & 0x1fu) | (Reuse ? (1u << 5) : 0u); + const unsigned Enc = + IsTileInputPairDesc + ? ((*Tile & 0xffu) | (Reuse ? (1u << 8) : 0u)) + : ((*Tile & 0x1fu) | (Reuse ? (1u << 5) : 0u)); Operands.push_back(LinxOperand::createImm( MCConstantExpr::create(Enc, getContext()), S, E)); continue; @@ -2743,7 +2883,8 @@ bool LinxISAAsmParser::buildMCInstForForm(unsigned FormIndex, const ParsedInst & } if (AsmFmt.starts_with("B.IOD")) { - Err = "B.IOD is deprecated in canonical v0.4; use B.IOR/B.IOT/B.IOTI"; + Err = "B.IOD is not accepted in LinxISA v0.57 descriptor assembly; " + "use B.IOR plus B.ITP/B.OTA"; return false; } @@ -2976,140 +3117,131 @@ bool LinxISAAsmParser::buildMCInstForForm(unsigned FormIndex, const ParsedInst & return true; } - // Special-case: tile block IO descriptors (B.IOT / B.IOTI). - if (AsmFmt.starts_with("B.IOT")) { - const bool IsIOTI = AsmFmt.starts_with("B.IOTI"); + // Special-case: v0.57 tile input-pair descriptor. + if (AsmFmt.starts_with("B.ITP")) { if (!require(!PI.Mem && !PI.SetRetTarget, - "unexpected operands for B.IOT/B.IOTI")) + "unexpected operands for B.ITP")) + return false; + if (!require(PI.Regs.empty() && PI.ArrowDests.empty(), + "unexpected register operands for B.ITP")) return false; - if (!require(PI.Regs.empty(), - "unexpected register operands for B.IOT/B.IOTI")) + if (!require(!PI.Imms.empty(), "B.ITP expects trailing src_pair")) + return false; + if (!require(PI.Imms.size() <= 3, + "B.ITP supports at most 2 source tiles plus src_pair")) return false; bool WantLast = false; for (const ParsedKeyword &K : PI.Keywords) if (K.TextUpper == "LAST") WantLast = true; - const bool FormLast = AsmFmt.contains("group=1"); - if (!require(WantLast == FormLast, - "group/last marker does not match encoding")) - return false; - if (!require(PI.ArrowDests.size() == 1, - "expected tile destination suffix (for example '->t<1KB>' or " - "'->t')")) - return false; + auto takeConstImm = [&](unsigned i, int64_t &V) -> bool { + return isConstExpr(PI.Imms[i].Expr, V); + }; - const unsigned DstHand = PI.ArrowDests[0].Code & 0x7u; - unsigned DstTile = DstHand; - if (DstHand < 4u) - DstTile = DstHand + 1u; - else if (DstHand == 4u) - DstTile = 4u; - const unsigned SizeCode = PI.ArrowDests[0].AngleSize & 0x1fu; - const unsigned RegSrc = PI.ArrowDests[0].AngleReg & 0x1fu; - - if (!require(PI.Imms.size() <= 2, - "B.IOT/B.IOTI supports at most 2 SrcTile operands")) + int64_t PairRaw = 0; + if (!require(takeConstImm(PI.Imms.size() - 1, PairRaw), + "B.ITP src_pair must be constant")) return false; + const unsigned SrcPair = static_cast(PairRaw) & 0x3u; - unsigned S0V = 1, S1V = 1; - unsigned S0R = 0, S1R = 0; - unsigned Src0 = 0, Src1 = 0; + unsigned Src0 = 0; + unsigned Src1 = 0; + unsigned S0R = 0; + unsigned S1R = 0; auto takeTileImm = [&](unsigned i, unsigned &Tile, unsigned &Reuse) -> bool { int64_t V = 0; - if (!isConstExpr(PI.Imms[i].Expr, V)) + if (!takeConstImm(i, V)) return false; - Tile = static_cast(V) & 0x1fu; - Reuse = (static_cast(V) >> 5) & 0x1u; + Tile = static_cast(V) & 0xffu; + Reuse = (static_cast(V) >> 8) & 0x1u; return true; }; - if (PI.Imms.size() >= 1) { - unsigned Reuse = 0; - if (!require(takeTileImm(0, Src0, Reuse), - "tile refs must be constant in bring-up")) + const unsigned SourceCount = static_cast(PI.Imms.size() - 1); + if (SourceCount >= 1) { + if (!require(takeTileImm(0, Src0, S0R), + "B.ITP tile refs must be constant in bring-up")) return false; - S0V = 0; - S0R = Reuse & 1u; } - if (PI.Imms.size() >= 2) { - unsigned Reuse = 0; - if (!require(takeTileImm(1, Src1, Reuse), - "tile refs must be constant in bring-up")) + if (SourceCount >= 2) { + if (!require(takeTileImm(1, Src1, S1R), + "B.ITP tile refs must be constant in bring-up")) return false; - S1V = 0; - S1R = Reuse & 1u; - } - - // Canonical v0.4 printer elides the hidden sentinel slots when the - // descriptor has no explicit source tile list. Preserve those sentinels so - // llvm-mc can reassemble the disassembled form byte-for-byte. - const bool UseHiddenEmptySentinels = PI.Imms.empty() && DstHand < 4u; - if (UseHiddenEmptySentinels) { - const unsigned Hidden = ((DstHand & 0x3u) << 3) | 1u; - Src0 = Hidden; - Src1 = Hidden; - S0R = 1u; - S1R = 1u; - S0V = 1u; - S1V = 1u; } + if (!require((Src0 != 0u || S0R == 0u) && (Src1 != 0u || S1R == 0u), + "B.ITP TZERO source cannot use .reuse")) + return false; - // Bring-up contract: if an output tile register is not explicitly present - // in the source list, encode the default destination tile ID in the first - // absent slot (preferring SrcTile1). This supports QEMU/local-base binding. - if (!UseHiddenEmptySentinels && DstTile != 4u) { // not acc - const unsigned DefaultDst = (DstHand & 0x3u) << 3; // depth 0 - if (S1V == 1u) { - Src1 = DefaultDst; - } else if (S0V == 1u) { - Src0 = DefaultDst; - } + for (unsigned i = 0; i < Form.field_count; ++i) { + const linxisa_field &Field = linxisa_fields[Form.field_start + i]; + StringRef FN(Field.name); + if (FN == "SrcTile1") + emitFieldImm(Src1); + else if (FN == "SrcTile0") + emitFieldImm(Src0); + else if (FN == "L") + emitFieldImm(WantLast ? 1u : 0u); + else if (FN == "src_pair") + emitFieldImm(SrcPair); + else if (FN == "S1R") + emitFieldImm(S1R); + else if (FN == "S0R") + emitFieldImm(S0R); + else + return require(false, ("unsupported B.ITP field: " + FN).str()); } + return true; + } - if (!require(Form.field_count == 8, "unexpected B.IOT/B.IOTI field layout")) + // Special-case: v0.57 tile output allocation descriptor. + if (AsmFmt.starts_with("B.OTA")) { + if (!require(!PI.Mem && !PI.SetRetTarget, + "unexpected operands for B.OTA")) + return false; + if (!require(PI.Regs.empty(), "unexpected register operands for B.OTA")) + return false; + if (!require(PI.ArrowDests.size() == 1, + "B.OTA expects one tile destination suffix")) + return false; + if (!require(PI.Imms.size() == 1, "B.OTA expects trailing dst_slot")) + return false; + if (!require(PI.ArrowDests[0].HasAngleSize && + !PI.ArrowDests[0].HasAngleReg, + "B.OTA expects CellCountM1 suffix, for example ->t#1<31>")) return false; - if (IsIOTI) { - if (!require(PI.ArrowDests[0].HasAngleSize && !PI.ArrowDests[0].HasAngleReg, - "B.IOTI expects size suffix '->t'")) - return false; - } else { - if (!require(PI.ArrowDests[0].HasAngleReg && !PI.ArrowDests[0].HasAngleSize, - "B.IOT expects register suffix '->t'")) - return false; - } + bool WantLast = false; + for (const ParsedKeyword &K : PI.Keywords) + if (K.TextUpper == "LAST") + WantLast = true; + + int64_t SlotRaw = 0; + if (!require(isConstExpr(PI.Imms[0].Expr, SlotRaw), + "B.OTA dst_slot must be constant")) + return false; + const unsigned DstSlot = static_cast(SlotRaw) & 0x3u; + const unsigned DstTile = PI.ArrowDests[0].Code & 0xffu; + const unsigned CellCountM1 = PI.ArrowDests[0].AngleSize & 0xffu; + if (!require(DstTile != 0u, "B.OTA destination cannot be TZERO")) + return false; - // Emit fields in the actual spec order. B.IOT places RegSrc immediately - // after DstTile, while B.IOTI carries imm5 at the tail. for (unsigned i = 0; i < Form.field_count; ++i) { const linxisa_field &Field = linxisa_fields[Form.field_start + i]; StringRef FN(Field.name); if (FN == "DstTile") emitFieldImm(DstTile); - else if (FN == "RegSrc") - emitFieldImm(RegSrc); - else if (FN == "S0R") - emitFieldImm(S0R); - else if (FN == "S0V") - emitFieldImm(S0V); - else if (FN == "S1R") - emitFieldImm(S1R); - else if (FN == "S1V") - emitFieldImm(S1V); - else if (FN == "SrcTile0") - emitFieldImm(Src0); - else if (FN == "SrcTile1") - emitFieldImm(Src1); - else if (FN == "imm5" || FN == "uimm5") - emitFieldImm(SizeCode); + else if (FN == "CellCountM1") + emitFieldImm(CellCountM1); + else if (FN == "L") + emitFieldImm(WantLast ? 1u : 0u); + else if (FN == "dst_slot") + emitFieldImm(DstSlot); else - return require(false, - ("unsupported B.IOT/B.IOTI field: " + FN).str()); + return require(false, ("unsupported B.OTA field: " + FN).str()); } - return true; } @@ -3137,8 +3269,7 @@ bool LinxISAAsmParser::buildMCInstForForm(unsigned FormIndex, const ParsedInst & !PI.SetRetTarget, (Twine("unexpected operands for ") + Kind))) return false; - const char *Selector = (IsBStartTEPL || IsBStartFIXP) ? "TileOpcode" - : "Function"; + const char *Selector = IsBStartTEPL ? "TileOpcode" : "Function"; if (!require( PI.Imms.size() == 2, (Twine("expected operands '") + Selector + ", DataType' for " + Kind))) @@ -3163,7 +3294,10 @@ bool LinxISAAsmParser::buildMCInstForForm(unsigned FormIndex, const ParsedInst & } else if (IsBStartCUBE) { if (auto Fn = parseCubeFunctionKeyword(Sym)) return static_cast(*Fn); - } else if (IsBStartTEPL || IsBStartFIXP) { + } else if (IsBStartFIXP) { + if (auto Fn = parseFIXPFunctionKeyword(Sym)) + return static_cast(*Fn); + } else if (IsBStartTEPL) { if (auto Op = parseTEPLTileOpcodeKeyword(Sym)) return static_cast(*Op); } @@ -3185,12 +3319,18 @@ bool LinxISAAsmParser::buildMCInstForForm(unsigned FormIndex, const ParsedInst & if (IsBStartTMA) { if (!require(FuncVal.has_value(), - "Function must be a constant or one of {TLOAD,TSTORE,TMOV}")) + "Function must be a constant or one of " + "{MGATHER,MSCATTER,TLOAD,TPREFETCH,TSTORE}")) return false; } else if (IsBStartCUBE) { if (!require(FuncVal.has_value(), "Function must be a constant or one of " - "{MAMULB,TMATMUL,MAMULB.ACC,TMATMUL.ACC,ACCCVT}")) + "{TGEMV,TMATMUL,TGEMV.MX,TMATMUL.MX}")) + return false; + } else if (IsBStartFIXP) { + if (!require(FuncVal.has_value(), + "Function must be a constant or one of " + "{TDEQUANT,TEXTRACT,TINSERT,TMOV,TCONCAT,TFILLPAD,TQUANT,TTRANS}")) return false; } else { if (!require(FuncVal.has_value(), @@ -3206,17 +3346,16 @@ bool LinxISAAsmParser::buildMCInstForForm(unsigned FormIndex, const ParsedInst & "UINT64,UINT32,UINT16,UINT8,UINT4}")) return false; if (IsBStartTMA) - if (!require(*FuncVal >= 0 && *FuncVal <= 2, - "BSTART.TMA Function must be in range 0..2 in canonical " - "v0.4")) + if (!require(*FuncVal >= 0 && *FuncVal <= 4, + "BSTART.TMA Function must be in v0.57 range 0..4")) return false; if (IsBStartTEPL) if (!require(*FuncVal >= 0 && *FuncVal <= 0x3ff, "BSTART.TEPL TileOpcode must be in range 0..1023")) return false; if (IsBStartFIXP) - if (!require(*FuncVal >= 0 && *FuncVal <= 0x3ff, - "BSTART.FIXP selector must be in range 0..1023")) + if (!require(*FuncVal >= 0 && *FuncVal <= 31, + "BSTART.FIXP Function must be in range 0..31")) return false; if (!require(*DataTypeVal >= 0 && *DataTypeVal <= 31, (Twine(Kind) + " DataType out of range"))) diff --git a/llvm/lib/Target/LinxISA/Disassembler/LinxISADisassembler.cpp b/llvm/lib/Target/LinxISA/Disassembler/LinxISADisassembler.cpp index 8996fe1f23fea..ea1e57ffb8653 100644 --- a/llvm/lib/Target/LinxISA/Disassembler/LinxISADisassembler.cpp +++ b/llvm/lib/Target/LinxISA/Disassembler/LinxISADisassembler.cpp @@ -80,6 +80,12 @@ static const linxisa_inst_form *findMatch(uint64_t Insn, unsigned Bits, // under-constrained and can otherwise steal TEPL/TMA/CUBE headers during // disassembly. StringRef Mnem(F.mnemonic ? F.mnemonic : ""); + // v0.57 routes all TMA selectors through BSTART.TMA Function, DataType. + // The generated direct TLOAD/TSTORE/TMOV rows still carry pre-v0.57 fixed + // selector bits, so let the generic TMA row decode those encodings. + if (Mnem == "BSTART.TLOAD" || Mnem == "BSTART.TSTORE" || + Mnem == "BSTART.TMOV") + continue; if ((Mnem == "BSTART.MSEQ" || Mnem == "BSTART.MPAR") && (((Insn >> 25) & 0x1ULL) != 0ULL)) continue; diff --git a/llvm/lib/Target/LinxISA/LinxISABlockify.cpp b/llvm/lib/Target/LinxISA/LinxISABlockify.cpp index f75bbd561617c..6de20a01dd5a9 100644 --- a/llvm/lib/Target/LinxISA/LinxISABlockify.cpp +++ b/llvm/lib/Target/LinxISA/LinxISABlockify.cpp @@ -119,54 +119,7 @@ static void validateTileOpcode(int64_t TileOpcode, StringRef Context) { } static bool isWhitelistedTEPLTileOpcode(int64_t TileOpcode) { - switch (TileOpcode & 0x3ff) { - case 0x000: - case 0x001: - case 0x002: - case 0x003: - case 0x004: - case 0x005: - case 0x006: - case 0x007: - case 0x008: - case 0x009: - case 0x00a: - case 0x00b: - case 0x00c: - case 0x00d: - case 0x00e: - case 0x00f: - case 0x010: - case 0x011: - case 0x012: - case 0x013: - case 0x014: - case 0x015: - case 0x016: - case 0x017: - case 0x018: - case 0x019: - case 0x01a: - case 0x01b: - case 0x01c: - case 0x01d: - case 0x01e: - case 0x01f: - case 0x020: - case 0x021: - case 0x022: - case 0x023: - case 0x024: - case 0x025: - case 0x026: - case 0x027: - case 0x028: - case 0x029: - case 0x02a: - return true; - default: - return false; - } + return (TileOpcode & 0x3ff) <= 0x05b; } static void validateWhitelistedTEPLTileOpcode(int64_t TileOpcode, @@ -174,7 +127,7 @@ static void validateWhitelistedTEPLTileOpcode(int64_t TileOpcode, validateTileOpcode(TileOpcode, Context); if (!isWhitelistedTEPLTileOpcode(TileOpcode)) report_fatal_error(Twine("Linx: ") + Context + - " uses TileOpcode outside the canonical v0.4 TEPL set"); + " uses TileOpcode outside the LinxISA v0.57 TEPL set"); } static void validateCubeDimImm(int64_t Dim, StringRef DimName, @@ -340,26 +293,6 @@ static unsigned tileIdFromRelRef(const TileRelRef &Ref) { return tileHandBase(Ref.Hand) + static_cast(Ref.Depth - 1u); } -static unsigned dstTileFieldFromHand(TileHand Hand) { - switch (Hand) { - case TileHand::T: - return 0; - case TileHand::U: - return 1; - case TileHand::M: - return 2; - case TileHand::N: - return 3; - case TileHand::ACC: - return 4; - } - llvm_unreachable("invalid tile hand"); -} - -static unsigned dstTileFieldFromRelRef(const TileRelRef &Ref) { - return dstTileFieldFromHand(Ref.Hand); -} - static unsigned tileRegIdFromReg(const TargetRegisterInfo &TRI, Register Reg) { if (!Reg || !Reg.isPhysical() || !LinxISA::TILERegClass.contains(Reg)) report_fatal_error("Linx: expected physical tile register"); @@ -378,6 +311,7 @@ static bool isMarkerInstr(const MachineInstr &MI) { case LinxISA::BSTART_STD_RET: case LinxISA::BSTART_TMA: case LinxISA::BSTART_CUBE: + case LinxISA::BSTART_FIXP: case LinxISA::BSTART_TEPL: case LinxISA::BSTART_VPAR: case LinxISA::BSTART_VSEQ: @@ -406,16 +340,15 @@ static bool isHeaderDescriptorOpcode(unsigned Opc) { switch (Opc) { case LinxISA::B_TEXT: case LinxISA::B_ARG: + case LinxISA::B_META: case LinxISA::B_ATTR: case LinxISA::B_DIM_LB0: case LinxISA::B_DIM_LB1: case LinxISA::B_DIM_LB2: case LinxISA::C_B_DIMI: case LinxISA::B_IOR: - case LinxISA::B_IOT_G0: - case LinxISA::B_IOT_G1: - case LinxISA::B_IOTI_G0: - case LinxISA::B_IOTI_G1: + case LinxISA::B_ITP: + case LinxISA::B_OTA: return true; default: return false; @@ -443,12 +376,17 @@ static bool isTilePseudoInstr(const MachineInstr &MI) { case LinxISA::PSEUDO_TMA_TLOAD_DESC: case LinxISA::PSEUDO_TMA_TSTORE: case LinxISA::PSEUDO_TMA_TSTORE_DESC: + case LinxISA::PSEUDO_TMA_MGATHER_DESC: + case LinxISA::PSEUDO_TMA_MSCATTER_DESC: case LinxISA::PSEUDO_TMA_TMOV: + case LinxISA::PSEUDO_FIXP_TINSERT: + case LinxISA::PSEUDO_FIXP_TTRANS: case LinxISA::PSEUDO_CUBE_MAMULB: case LinxISA::PSEUDO_CUBE_MAMULB_ACC: case LinxISA::PSEUDO_CUBE_ACCCVT: case LinxISA::PSEUDO_TEPL_UNARY: case LinxISA::PSEUDO_TEPL_BINARY: + case LinxISA::PSEUDO_TEPL_TERNARY: case LinxISA::PSEUDO_TEPL_BINARY_SCALAR: case LinxISA::PSEUDO_TEPL_SPLAT: case LinxISA::PSEUDO_VPAR_TADD: @@ -475,6 +413,7 @@ static bool isTileBlockStartInstr(const MachineInstr &MI) { switch (MI.getOpcode()) { case LinxISA::BSTART_TMA: case LinxISA::BSTART_CUBE: + case LinxISA::BSTART_FIXP: case LinxISA::BSTART_TEPL: return true; default: @@ -820,7 +759,7 @@ class LinxISABlockify : public MachineFunctionPass { if (isArchivedRawVectorOperandName(Base)) report_fatal_error( "Linx blockify: archived raw vector operand name is not allowed " - "in canonical v0.4; use TA/TB/TC/TD/TO/TS"); + "in LinxISA v0.57; use TA/TB/TC/TD/TO/TS"); auto RegCode = parseRegCode(Base); if (!RegCode) @@ -1712,11 +1651,11 @@ class LinxISABlockify : public MachineFunctionPass { .addSym(VTileAddBodySym); static const char kBodyAsm[] = - " v.lw.local [ta, lc0<<2, lc1<<8], ->vt.w\n" - " v.lw.local [tb, lc0<<2, lc1<<8], ->vu.w\n" - " v.add vt#1.sw, vu#1.sw, ->vt.w\n" - " v.sw.local vt#1, [to, lc0<<2, lc1<<8]\n" - " C.BSTOP\n"; + " v.lw.local [ta, lc0<<2, lc1<<8], ->vt.w\n" + " v.lw.local [tb, lc0<<2, lc1<<8], ->vu.w\n" + " v.add vt#1.sw, vu#1.sw, ->vt.w\n" + " v.sw.local vt#2, [to, lc0<<2, lc1<<8]\n" + " C.BSTOP\n"; emitVectorBodyText(*VTileAddBodyBB, StringRef(kBodyAsm), "vtile add body"); BuildMI(*VTileAddBodyBB, VTileAddBodyBB->end(), DebugLoc(), TII.get(TargetOpcode::EH_LABEL)) @@ -1746,11 +1685,11 @@ class LinxISABlockify : public MachineFunctionPass { .addSym(VTileSubBodySym); static const char kBodyAsm[] = - " v.lw.local [ta, lc0<<2, lc1<<8], ->vt.w\n" - " v.lw.local [tb, lc0<<2, lc1<<8], ->vu.w\n" - " v.sub vt#1.sw, vu#1.sw, ->vt.w\n" - " v.sw.local vt#1, [to, lc0<<2, lc1<<8]\n" - " C.BSTOP\n"; + " v.lw.local [ta, lc0<<2, lc1<<8], ->vt.w\n" + " v.lw.local [tb, lc0<<2, lc1<<8], ->vu.w\n" + " v.sub vt#1.sw, vu#1.sw, ->vt.w\n" + " v.sw.local vt#2, [to, lc0<<2, lc1<<8]\n" + " C.BSTOP\n"; emitVectorBodyText(*VTileSubBodyBB, StringRef(kBodyAsm), "vtile sub body"); BuildMI(*VTileSubBodyBB, VTileSubBodyBB->end(), DebugLoc(), TII.get(TargetOpcode::EH_LABEL)) @@ -2054,12 +1993,14 @@ class LinxISABlockify : public MachineFunctionPass { } constexpr unsigned DType_I32 = 17; - constexpr unsigned TMA_TLOAD = 0; - constexpr unsigned TMA_TSTORE = 1; - constexpr unsigned TMA_TMOV = 2; - constexpr unsigned CUBE_MAMULB = 0; - constexpr unsigned CUBE_MAMULB_ACC = 2; - constexpr unsigned CUBE_ACCCVT = 8; + constexpr unsigned TMA_MGATHER = 0; + constexpr unsigned TMA_MSCATTER = 1; + constexpr unsigned TMA_TLOAD = 2; + constexpr unsigned TMA_TSTORE = 4; + constexpr unsigned FIXP_TINSERT = 2; + constexpr unsigned FIXP_TMOV = 3; + constexpr unsigned FIXP_TTRANS = 7; + constexpr unsigned CUBE_TMATMUL = 1; auto emitDim = [&](MachineBasicBlock &DimMBB, MachineBasicBlock::iterator DimInsertPt, unsigned LoopNest, int64_t Imm) { @@ -2088,6 +2029,48 @@ class LinxISABlockify : public MachineFunctionPass { .addImm(0); }; + auto tileArchId = [](unsigned InternalTileId) -> unsigned { + if (InternalTileId >= 255u) + report_fatal_error("Linx: TileReg id does not fit v0.57 namespace"); + return InternalTileId + 1u; + }; + + auto cellCountM1FromSizeCode = [](int64_t SizeCode) -> unsigned { + if (SizeCode < 0 || SizeCode > 31) + report_fatal_error("Linx: tile allocation size code must fit u5"); + if (SizeCode < 3) + return 0u; + const unsigned Cells = 1u << static_cast(SizeCode - 3); + if (Cells == 0u || Cells > 256u) + report_fatal_error("Linx: tile allocation exceeds v0.57 B.OTA capacity"); + return Cells - 1u; + }; + + auto emitTileInputPair = [&](MachineBasicBlock &DescMBB, + MachineBasicBlock::iterator DescInsertPt, + unsigned Src0, unsigned Src1, + unsigned S0R, unsigned S1R, + unsigned Last, unsigned SrcPair) { + return BuildMI(DescMBB, DescInsertPt, DL, TII.get(LinxISA::B_ITP)) + .addImm(Src0) + .addImm(Src1) + .addImm(S0R) + .addImm(S1R) + .addImm(Last) + .addImm(SrcPair); + }; + + auto emitTileOutputAlloc = [&](MachineBasicBlock &DescMBB, + MachineBasicBlock::iterator DescInsertPt, + unsigned DstTile, int64_t SizeCode, + unsigned Last, unsigned DstSlot) { + return BuildMI(DescMBB, DescInsertPt, DL, TII.get(LinxISA::B_OTA)) + .addImm(DstTile) + .addImm(cellCountM1FromSizeCode(SizeCode)) + .addImm(Last) + .addImm(DstSlot); + }; + switch (PseudoMI->getOpcode()) { case LinxISA::PSEUDO_TMA_TLOAD: { const Register Dst = PseudoMI->getOperand(0).getReg(); @@ -2103,8 +2086,8 @@ class LinxISABlockify : public MachineFunctionPass { .addImm(DType_I32) .addImm(TMA_TLOAD); - // Canonical descriptor-carrying TLOAD header: - // B.DIM(LB0/LB1) + B.ARG + B.IOR + B.IOT/B.IOTI. + // Descriptor-carrying TLOAD header: + // B.DIM(LB0/LB1) + B.ARG + B.IOR + B.OTA. // // The current PTO auto-mode bridge does not pass explicit layout/dim // metadata yet, so use bring-up defaults here: @@ -2119,18 +2102,8 @@ class LinxISABlockify : public MachineFunctionPass { .addReg(Base) // RegSrc1: base pointer .addReg(LinxISA::R0);// RegSrc2: aux/layout source (default 0) - // Canonical v0.4 contract: B.IOTI is the canonical descriptor; encode the - // tile destination register in the first absent source slot (SrcTile1) - // and set S0V/S1V to indicate no tile inputs. - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(dstTileFieldFromRelRef(tileRelRefFromId(DstID))) // DstTile (hand) - .addImm(0) // S0R - .addImm(1) // S0V (absent) - .addImm(0) // S1R - .addImm(1) // S1V (absent) - .addImm(0) // SrcTile0 - .addImm(DstID) // SrcTile1 (dst tile reg id) - .addImm(Size) // SizeCode (imm5) + emitTileOutputAlloc(MBB, InsertPt, tileArchId(DstID), Size, + /*Last=*/1, /*DstSlot=*/0) .addReg(Dst, RegState::Define | RegState::Implicit); PseudoMI->eraseFromParent(); @@ -2160,15 +2133,8 @@ class LinxISABlockify : public MachineFunctionPass { .addReg(Base) .addReg(LinxISA::R0); - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(dstTileFieldFromRelRef(tileRelRefFromId(DstID))) - .addImm(0) - .addImm(1) - .addImm(0) - .addImm(1) - .addImm(0) - .addImm(DstID) - .addImm(Size) + emitTileOutputAlloc(MBB, InsertPt, tileArchId(DstID), Size, + /*Last=*/1, /*DstSlot=*/0) .addReg(Dst, RegState::Define | RegState::Implicit); PseudoMI->eraseFromParent(); @@ -2212,15 +2178,58 @@ class LinxISABlockify : public MachineFunctionPass { .addReg(Base) .addReg(LinxISA::R0); - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(dstTileFieldFromRelRef(tileRelRefFromId(DstID))) - .addImm(0) - .addImm(1) - .addImm(0) - .addImm(1) - .addImm(0) - .addImm(DstID) - .addImm(Size) + emitTileOutputAlloc(MBB, InsertPt, tileArchId(DstID), Size, + /*Last=*/1, /*DstSlot=*/0) + .addReg(Dst, RegState::Define | RegState::Implicit); + + PseudoMI->eraseFromParent(); + Changed = true; + break; + } + + case LinxISA::PSEUDO_TMA_MGATHER_DESC: { + const Register Dst = PseudoMI->getOperand(0).getReg(); + const Register Base = PseudoMI->getOperand(1).getReg(); + const Register Index = PseudoMI->getOperand(2).getReg(); + const int64_t DType = PseudoMI->getOperand(3).getImm(); + const int64_t Layout = PseudoMI->getOperand(4).getImm(); + const int64_t LB0 = PseudoMI->getOperand(5).getImm(); + const int64_t LB1 = PseudoMI->getOperand(6).getImm(); + const int64_t Size = PseudoMI->getOperand(7).getImm(); + const Register StrideReg = PseudoMI->getOperand(8).getReg(); + if (DType < 0 || DType > 31) + report_fatal_error("Linx: TMA.MGATHER dtype must fit u5"); + validateStrictTileSizeCode(Size, "TMA.MGATHER"); + const uint64_t Dim0 = requirePositiveDimImm(LB0, "lb0", "TMA.MGATHER"); + const uint64_t Dim1 = requirePositiveDimImm(LB1, "lb1", "TMA.MGATHER"); + validateTileByteBudget("TMA.MGATHER", Dim0, Dim1, /*dim2=*/1u, + dtypeElementBitsForTileCheck(DType), + static_cast(Size)); + if (!StrideReg) + report_fatal_error("Linx: TMA.MGATHER requires stride register binding"); + + const unsigned DstID = tileRegIdFromReg(TRI, Dst); + const unsigned IndexID = tileRegIdFromReg(TRI, Index); + if (DstID >= 16) + report_fatal_error("Linx: TMA.MGATHER dst must be in TILE0..TILE15"); + + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::BSTART_TMA)) + .addImm(DType) + .addImm(TMA_MGATHER); + emitDim(MBB, InsertPt, /*LoopNest=*/0, LB0); + emitDim(MBB, InsertPt, /*LoopNest=*/1, LB1); + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_ARG)).addImm(Layout); + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOR)) + .addReg(LinxISA::R0) + .addReg(StrideReg) + .addReg(Base) + .addReg(LinxISA::R0); + + emitTileInputPair(MBB, InsertPt, tileArchId(IndexID), /*Src1=*/0, + /*S0R=*/0, /*S1R=*/0, /*Last=*/0, /*SrcPair=*/0) + .addReg(Index, RegState::Implicit); + emitTileOutputAlloc(MBB, InsertPt, tileArchId(DstID), Size, + /*Last=*/1, /*DstSlot=*/0) .addReg(Dst, RegState::Define | RegState::Implicit); PseudoMI->eraseFromParent(); @@ -2240,8 +2249,8 @@ class LinxISABlockify : public MachineFunctionPass { .addImm(DType_I32) .addImm(TMA_TSTORE); - // Canonical descriptor-carrying TSTORE header: - // B.DIM(LB0/LB1) + B.ARG + B.IOR + B.IOT/B.IOTI. + // Descriptor-carrying TSTORE header: + // B.DIM(LB0/LB1) + B.ARG + B.IOR + B.ITP. emitDim(MBB, InsertPt, /*LoopNest=*/0, /*Imm=*/0); emitDim(MBB, InsertPt, /*LoopNest=*/1, /*Imm=*/0); BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_ARG)) @@ -2252,16 +2261,8 @@ class LinxISABlockify : public MachineFunctionPass { .addReg(Base) // RegSrc1: base pointer .addReg(LinxISA::R0);// RegSrc2: aux/layout source (default 0) - // Store: encode the source tile in SrcTile0 and mark it present (S0V=0). - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(dstTileFieldFromRelRef(tileRelRefFromId(SrcID))) // DstTile (hand hint) - .addImm(0) // S0R - .addImm(0) // S0V (present) - .addImm(0) // S1R - .addImm(1) // S1V (absent) - .addImm(SrcID) // SrcTile0 - .addImm(0) // SrcTile1 (unused) - .addImm(Size) // SizeCode (imm5) + emitTileInputPair(MBB, InsertPt, tileArchId(SrcID), /*Src1=*/0, + /*S0R=*/0, /*S1R=*/0, /*Last=*/1, /*SrcPair=*/0) .addReg(Src, RegState::Implicit); PseudoMI->eraseFromParent(); @@ -2269,6 +2270,54 @@ class LinxISABlockify : public MachineFunctionPass { break; } + case LinxISA::PSEUDO_TMA_MSCATTER_DESC: { + const Register Base = PseudoMI->getOperand(0).getReg(); + const Register Src = PseudoMI->getOperand(1).getReg(); + const Register Index = PseudoMI->getOperand(2).getReg(); + const int64_t DType = PseudoMI->getOperand(3).getImm(); + const int64_t Layout = PseudoMI->getOperand(4).getImm(); + const int64_t LB0 = PseudoMI->getOperand(5).getImm(); + const int64_t LB1 = PseudoMI->getOperand(6).getImm(); + const int64_t Size = PseudoMI->getOperand(7).getImm(); + const Register StrideReg = PseudoMI->getOperand(8).getReg(); + if (DType < 0 || DType > 31) + report_fatal_error("Linx: TMA.MSCATTER dtype must fit u5"); + validateStrictTileSizeCode(Size, "TMA.MSCATTER"); + const uint64_t Dim0 = + requirePositiveDimImm(LB0, "lb0", "TMA.MSCATTER"); + const uint64_t Dim1 = + requirePositiveDimImm(LB1, "lb1", "TMA.MSCATTER"); + validateTileByteBudget("TMA.MSCATTER", Dim0, Dim1, /*dim2=*/1u, + dtypeElementBitsForTileCheck(DType), + static_cast(Size)); + if (!StrideReg) + report_fatal_error("Linx: TMA.MSCATTER requires stride register binding"); + + const unsigned SrcID = tileRegIdFromReg(TRI, Src); + const unsigned IndexID = tileRegIdFromReg(TRI, Index); + + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::BSTART_TMA)) + .addImm(DType) + .addImm(TMA_MSCATTER); + emitDim(MBB, InsertPt, /*LoopNest=*/0, LB0); + emitDim(MBB, InsertPt, /*LoopNest=*/1, LB1); + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_ARG)).addImm(Layout); + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOR)) + .addReg(LinxISA::R0) + .addReg(StrideReg) + .addReg(Base) + .addReg(LinxISA::R0); + + emitTileInputPair(MBB, InsertPt, tileArchId(SrcID), tileArchId(IndexID), + /*S0R=*/0, /*S1R=*/0, /*Last=*/1, /*SrcPair=*/0) + .addReg(Src, RegState::Implicit) + .addReg(Index, RegState::Implicit); + + PseudoMI->eraseFromParent(); + Changed = true; + break; + } + case LinxISA::PSEUDO_TMA_TSTORE_DESC: { const Register Base = PseudoMI->getOperand(0).getReg(); const Register Src = PseudoMI->getOperand(1).getReg(); @@ -2302,15 +2351,8 @@ class LinxISABlockify : public MachineFunctionPass { .addReg(Base) .addReg(LinxISA::R0); - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(dstTileFieldFromRelRef(tileRelRefFromId(SrcID))) - .addImm(0) - .addImm(0) - .addImm(0) - .addImm(1) - .addImm(SrcID) - .addImm(0) - .addImm(Size) + emitTileInputPair(MBB, InsertPt, tileArchId(SrcID), /*Src1=*/0, + /*S0R=*/0, /*S1R=*/0, /*Last=*/1, /*SrcPair=*/0) .addReg(Src, RegState::Implicit); PseudoMI->eraseFromParent(); @@ -2340,10 +2382,6 @@ class LinxISABlockify : public MachineFunctionPass { const unsigned DstID = tileRegIdFromReg(TRI, Dst); const TileRelRef DstRef = tileRelRefFromId(DstID); - // PR6 parity baseline: encode concrete destination tile id. - // Queue-push hand-only encoding requires full runtime relref queue - // semantics, which is not yet modeled in QEMU tile execution. - const unsigned EncDstTile = DstID; unsigned EncSrc = 0; RegState SrcFlags = RegState::Implicit; @@ -2360,37 +2398,24 @@ class LinxISABlockify : public MachineFunctionPass { (void)tileIdFromRelRef(DstRef); } - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::BSTART_TMA)) + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::BSTART_FIXP)) .addImm(Meta.DataType) - .addImm(TMA_TMOV); + .addImm(FIXP_TMOV); // B.ARG carries TMOV mode (strict profile: V2V + A2V). BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_ARG)).addImm(Mode); if (!IsA2V) { - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(dstTileFieldFromRelRef(DstRef)) // DstTile (hand) - .addImm(SrcReuse ? 1 : 0) // S0R - .addImm(0) // S0V (present) - .addImm(0) // S1R - .addImm(1) // S1V (absent) - .addImm(EncSrc) // SrcTile0 - .addImm(EncDstTile) // SrcTile1 (dst tile id) - .addImm(Meta.SizeCode) // SizeCode - .addReg(Src, SrcFlags) + emitTileInputPair(MBB, InsertPt, tileArchId(EncSrc), /*Src1=*/0, + SrcReuse ? 1 : 0, /*S1R=*/0, /*Last=*/0, + /*SrcPair=*/0) + .addReg(Src, SrcFlags); + emitTileOutputAlloc(MBB, InsertPt, tileArchId(DstID), Meta.SizeCode, + /*Last=*/1, /*DstSlot=*/0) .addReg(Dst, RegState::Define | RegState::Implicit); } else { - // A2V: source is implicit accumulator state, so no explicit source - // tile is bound in B.IOTI. - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(dstTileFieldFromRelRef(DstRef)) // DstTile (hand) - .addImm(0) // S0R - .addImm(1) // S0V (absent) - .addImm(0) // S1R - .addImm(1) // S1V (absent) - .addImm(0) // SrcTile0 (unused) - .addImm(EncDstTile) // SrcTile1 (dst tile id) - .addImm(Meta.SizeCode) // SizeCode + emitTileOutputAlloc(MBB, InsertPt, tileArchId(DstID), Meta.SizeCode, + /*Last=*/1, /*DstSlot=*/0) .addReg(Dst, RegState::Define | RegState::Implicit); } @@ -2399,10 +2424,102 @@ class LinxISABlockify : public MachineFunctionPass { break; } + case LinxISA::PSEUDO_FIXP_TINSERT: { + const Register Dst = PseudoMI->getOperand(0).getReg(); + const Register DstBase = PseudoMI->getOperand(1).getReg(); + const Register Src = PseudoMI->getOperand(2).getReg(); + const Register MetaReg = PseudoMI->getOperand(3).getReg(); + const int64_t Size = PseudoMI->getOperand(4).getImm(); + const int64_t DType = PseudoMI->getOperand(5).getImm(); + const int64_t DstRows = PseudoMI->getOperand(6).getImm(); + const int64_t DstCols = PseudoMI->getOperand(7).getImm(); + const int64_t SrcRows = PseudoMI->getOperand(8).getImm(); + const int64_t SrcCols = PseudoMI->getOperand(9).getImm(); + validateStrictTileSizeCode(Size, "FIXP.TINSERT"); + if (DType < 0 || DType > 31) + report_fatal_error("Linx: FIXP.TINSERT dtype must fit u5"); + requirePositiveDimImm(DstRows, "dst_rows", "FIXP.TINSERT"); + requirePositiveDimImm(DstCols, "dst_cols", "FIXP.TINSERT"); + requirePositiveDimImm(SrcRows, "src_rows", "FIXP.TINSERT"); + requirePositiveDimImm(SrcCols, "src_cols", "FIXP.TINSERT"); + + const unsigned DstID = tileRegIdFromReg(TRI, Dst); + const unsigned DstBaseID = tileRegIdFromReg(TRI, DstBase); + const unsigned SrcID = tileRegIdFromReg(TRI, Src); + const int64_t SrcShape = + ((SrcRows & 0xffff) << 16) | (SrcCols & 0xffff); + + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::BSTART_FIXP)) + .addImm(DType) + .addImm(FIXP_TINSERT); + emitDim(MBB, InsertPt, /*LoopNest=*/0, DstRows); + emitDim(MBB, InsertPt, /*LoopNest=*/1, DstCols); + emitDim(MBB, InsertPt, /*LoopNest=*/2, SrcShape); + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_ARG)).addImm(0); + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_META)) + .addReg(MetaReg) + .addImm(0); + emitTileInputPair(MBB, InsertPt, tileArchId(DstBaseID), + tileArchId(SrcID), /*S0R=*/0, /*S1R=*/0, + /*Last=*/0, /*SrcPair=*/0) + .addReg(DstBase, RegState::Implicit) + .addReg(Src, RegState::Implicit); + emitTileOutputAlloc(MBB, InsertPt, tileArchId(DstID), Size, + /*Last=*/1, /*DstSlot=*/0) + .addReg(Dst, RegState::Define | RegState::Implicit); + + PseudoMI->eraseFromParent(); + Changed = true; + break; + } + + case LinxISA::PSEUDO_FIXP_TTRANS: { + const Register Dst = PseudoMI->getOperand(0).getReg(); + const Register Src = PseudoMI->getOperand(1).getReg(); + const Register Tmp = PseudoMI->getOperand(2).getReg(); + const int64_t Size = PseudoMI->getOperand(3).getImm(); + const int64_t DType = PseudoMI->getOperand(4).getImm(); + const int64_t DstRows = PseudoMI->getOperand(5).getImm(); + const int64_t DstCols = PseudoMI->getOperand(6).getImm(); + const int64_t SrcRows = PseudoMI->getOperand(7).getImm(); + const int64_t SrcCols = PseudoMI->getOperand(8).getImm(); + validateStrictTileSizeCode(Size, "FIXP.TTRANS"); + if (DType < 0 || DType > 31) + report_fatal_error("Linx: FIXP.TTRANS dtype must fit u5"); + requirePositiveDimImm(DstRows, "dst_rows", "FIXP.TTRANS"); + requirePositiveDimImm(DstCols, "dst_cols", "FIXP.TTRANS"); + requirePositiveDimImm(SrcRows, "src_rows", "FIXP.TTRANS"); + requirePositiveDimImm(SrcCols, "src_cols", "FIXP.TTRANS"); + + const unsigned DstID = tileRegIdFromReg(TRI, Dst); + const unsigned SrcID = tileRegIdFromReg(TRI, Src); + const unsigned TmpID = tileRegIdFromReg(TRI, Tmp); + const int64_t DstShape = + ((DstRows & 0xffff) << 16) | (DstCols & 0xffff); + + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::BSTART_FIXP)) + .addImm(DType) + .addImm(FIXP_TTRANS); + emitDim(MBB, InsertPt, /*LoopNest=*/0, SrcRows); + emitDim(MBB, InsertPt, /*LoopNest=*/1, SrcCols); + emitDim(MBB, InsertPt, /*LoopNest=*/2, DstShape); + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_ARG)).addImm(0); + emitTileInputPair(MBB, InsertPt, tileArchId(SrcID), tileArchId(TmpID), + /*S0R=*/0, /*S1R=*/0, /*Last=*/0, + /*SrcPair=*/0) + .addReg(Src, RegState::Implicit) + .addReg(Tmp, RegState::Implicit); + emitTileOutputAlloc(MBB, InsertPt, tileArchId(DstID), Size, + /*Last=*/1, /*DstSlot=*/0) + .addReg(Dst, RegState::Define | RegState::Implicit); + + PseudoMI->eraseFromParent(); + Changed = true; + break; + } + case LinxISA::PSEUDO_CUBE_MAMULB: { - // Expand into two blocks: - // BSTART.CUBE(MAMULB) + dims + B.IOT(srcA, srcB) -> ACC - // BSTART.CUBE(ACCCVT) + B.IOT(dst) -> tile + // Expand into one merged v0.57 TMATMUL block. const Register Dst = PseudoMI->getOperand(0).getReg(); const Register SrcA = PseudoMI->getOperand(1).getReg(); const Register SrcB = PseudoMI->getOperand(2).getReg(); @@ -2412,65 +2529,31 @@ class LinxISABlockify : public MachineFunctionPass { validateCubeDimImm(M, "m", "CUBE.MAMULB"); validateCubeDimImm(N, "n", "CUBE.MAMULB"); validateCubeDimImm(K, "k", "CUBE.MAMULB"); - validateTileByteBudget("CUBE.MAMULB", - requirePositiveDimImm(M, "m", "CUBE.MAMULB"), - requirePositiveDimImm(N, "n", "CUBE.MAMULB"), - requirePositiveDimImm(K, "k", "CUBE.MAMULB"), - dtypeElementBitsForTileCheck(DType_I32), - std::nullopt); + requirePositiveDimImm(M, "m", "CUBE.MAMULB"); + requirePositiveDimImm(N, "n", "CUBE.MAMULB"); + requirePositiveDimImm(K, "k", "CUBE.MAMULB"); const unsigned DstID = tileRegIdFromReg(TRI, Dst); if (DstID < 16) - report_fatal_error("Linx: CUBE.ACCCVT dst must be in TILE16..TILE31"); - const unsigned Group = (DstID >> 3) & 0x1u; - const unsigned Depth = DstID & 0x7u; + report_fatal_error("Linx: CUBE.TMATMUL dst must be in TILE16..TILE31"); const unsigned AID = tileRegIdFromReg(TRI, SrcA); const unsigned BID = tileRegIdFromReg(TRI, SrcB); - // First block: MAMULB BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::BSTART_CUBE)) .addImm(DType_I32) - .addImm(CUBE_MAMULB); + .addImm(CUBE_TMATMUL); emitDim(MBB, InsertPt, /*LoopNest=*/0, M); emitDim(MBB, InsertPt, /*LoopNest=*/1, N); emitDim(MBB, InsertPt, /*LoopNest=*/2, K); - - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(4) // DstTile (acc) - .addImm(0) // S0R - .addImm(0) // S0V (present) - .addImm(0) // S1R - .addImm(0) // S1V (present) - .addImm(AID) // SrcTile0 - .addImm(BID) // SrcTile1 - .addImm(8) // SizeCode (bring-up: 4KiB accumulator) + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_ARG)).addImm(0); + emitTileInputPair(MBB, InsertPt, tileArchId(AID), tileArchId(BID), + /*S0R=*/0, /*S1R=*/0, /*Last=*/0, /*SrcPair=*/0) .addReg(SrcA, RegState::Implicit) .addReg(SrcB, RegState::Implicit); - - // Second block: ACCCVT into dst tile. - MachineFunction &MF = *MBB.getParent(); - auto *AccBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); - MF.insert(std::next(MBB.getIterator()), AccBB); - AccBB->transferSuccessorsAndUpdatePHIs(&MBB); - MBB.addSuccessor(AccBB); - - BuildMI(*AccBB, AccBB->end(), DL, TII.get(LinxISA::BSTART_CUBE)) - .addImm(DType_I32) - .addImm(CUBE_ACCCVT); - - const unsigned DstKind = - dstTileFieldFromRelRef(tileRelRefFromId(Depth | (Group << 3) | 16u)); - BuildMI(*AccBB, AccBB->end(), DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(DstKind) - .addImm(0) // S0R - .addImm(1) // S0V (absent) - .addImm(0) // S1R - .addImm(1) // S1V (absent) - .addImm(0) // SrcTile0 (unused) - .addImm(16u | (Group << 3) | Depth) // SrcTile1 (dst tile reg id) - .addImm(8) // SizeCode (bring-up: 4KiB) + emitTileOutputAlloc(MBB, InsertPt, tileArchId(DstID), + /*SizeCode=*/8, /*Last=*/1, /*DstSlot=*/0) .addReg(Dst, RegState::Define | RegState::Implicit); PseudoMI->eraseFromParent(); @@ -2479,13 +2562,7 @@ class LinxISABlockify : public MachineFunctionPass { } case LinxISA::PSEUDO_CUBE_MAMULB_ACC: { - // Expand into two blocks: - // BSTART.CUBE(MAMULB.ACC) + dims + B.IOT(srcA, srcB) -> ACC - // BSTART.CUBE(ACCCVT) + B.IOT(dst) -> tile - // - // The explicit ACC operand is preserved as an implicit use so SSA - // dependencies are maintained (the emulator models the accumulator as - // implicit state). + // Expand into one merged v0.57 TMATMUL.ACC-form block. const Register Dst = PseudoMI->getOperand(0).getReg(); const Register Acc = PseudoMI->getOperand(1).getReg(); const Register SrcA = PseudoMI->getOperand(2).getReg(); @@ -2496,66 +2573,35 @@ class LinxISABlockify : public MachineFunctionPass { validateCubeDimImm(M, "m", "CUBE.MAMULB.ACC"); validateCubeDimImm(N, "n", "CUBE.MAMULB.ACC"); validateCubeDimImm(K, "k", "CUBE.MAMULB.ACC"); - validateTileByteBudget( - "CUBE.MAMULB.ACC", - requirePositiveDimImm(M, "m", "CUBE.MAMULB.ACC"), - requirePositiveDimImm(N, "n", "CUBE.MAMULB.ACC"), - requirePositiveDimImm(K, "k", "CUBE.MAMULB.ACC"), - dtypeElementBitsForTileCheck(DType_I32), std::nullopt); + requirePositiveDimImm(M, "m", "CUBE.MAMULB.ACC"); + requirePositiveDimImm(N, "n", "CUBE.MAMULB.ACC"); + requirePositiveDimImm(K, "k", "CUBE.MAMULB.ACC"); const unsigned DstID = tileRegIdFromReg(TRI, Dst); if (DstID < 16) - report_fatal_error("Linx: CUBE.ACCCVT dst must be in TILE16..TILE31"); - const unsigned Group = (DstID >> 3) & 0x1u; - const unsigned Depth = DstID & 0x7u; + report_fatal_error("Linx: CUBE.TMATMUL.ACC dst must be in TILE16..TILE31"); const unsigned AID = tileRegIdFromReg(TRI, SrcA); const unsigned BID = tileRegIdFromReg(TRI, SrcB); + const unsigned AccID = tileRegIdFromReg(TRI, Acc); - // First block: MAMULB.ACC BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::BSTART_CUBE)) .addImm(DType_I32) - .addImm(CUBE_MAMULB_ACC); + .addImm(CUBE_TMATMUL); emitDim(MBB, InsertPt, /*LoopNest=*/0, M); emitDim(MBB, InsertPt, /*LoopNest=*/1, N); emitDim(MBB, InsertPt, /*LoopNest=*/2, K); - - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(4) // DstTile (acc) - .addImm(0) // S0R - .addImm(0) // S0V (present) - .addImm(0) // S1R - .addImm(0) // S1V (present) - .addImm(AID) // SrcTile0 - .addImm(BID) // SrcTile1 - .addImm(8) // SizeCode (bring-up: 4KiB accumulator) + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_ARG)).addImm(1); + emitTileInputPair(MBB, InsertPt, tileArchId(AID), tileArchId(BID), + /*S0R=*/0, /*S1R=*/0, /*Last=*/0, /*SrcPair=*/0) .addReg(SrcA, RegState::Implicit) - .addReg(SrcB, RegState::Implicit) + .addReg(SrcB, RegState::Implicit); + emitTileInputPair(MBB, InsertPt, tileArchId(AccID), /*Src1=*/0, + /*S0R=*/0, /*S1R=*/0, /*Last=*/0, /*SrcPair=*/1) .addReg(Acc, RegState::Implicit); - - // Second block: ACCCVT into dst tile. - MachineFunction &MF = *MBB.getParent(); - auto *AccBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock()); - MF.insert(std::next(MBB.getIterator()), AccBB); - AccBB->transferSuccessorsAndUpdatePHIs(&MBB); - MBB.addSuccessor(AccBB); - - BuildMI(*AccBB, AccBB->end(), DL, TII.get(LinxISA::BSTART_CUBE)) - .addImm(DType_I32) - .addImm(CUBE_ACCCVT); - - const unsigned DstKind = - dstTileFieldFromRelRef(tileRelRefFromId(Depth | (Group << 3) | 16u)); - BuildMI(*AccBB, AccBB->end(), DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(DstKind) - .addImm(0) // S0R - .addImm(1) // S0V (absent) - .addImm(0) // S1R - .addImm(1) // S1V (absent) - .addImm(0) // SrcTile0 (unused) - .addImm(16u | (Group << 3) | Depth) // SrcTile1 (dst tile reg id) - .addImm(8) // SizeCode (bring-up: 4KiB) + emitTileOutputAlloc(MBB, InsertPt, tileArchId(DstID), + /*SizeCode=*/8, /*Last=*/1, /*DstSlot=*/0) .addReg(Dst, RegState::Define | RegState::Implicit); PseudoMI->eraseFromParent(); @@ -2565,7 +2611,7 @@ class LinxISABlockify : public MachineFunctionPass { case LinxISA::PSEUDO_CUBE_ACCCVT: { // Expand into one block: - // BSTART.CUBE(ACCCVT) + B.ARG(qarg0) + B.IOT(dst) + // BSTART.FIXP(TMOV) + B.ARG(qarg0) + B.OTA(dst) // // qarg1 is reserved for follow-on quant wiring and must be 0 in PR5. const Register Dst = PseudoMI->getOperand(0).getReg(); @@ -2580,27 +2626,18 @@ class LinxISABlockify : public MachineFunctionPass { report_fatal_error("Linx: CUBE.ACCCVT dtype must fit u5"); if (QArg1 != 0) report_fatal_error( - "Linx: CUBE.ACCCVT currently requires qarg1=0 in canonical v0.4"); + "Linx: CUBE.ACCCVT currently requires qarg1=0 in LinxISA v0.57"); const unsigned DstID = tileRegIdFromReg(TRI, Dst); if (DstID < 16) report_fatal_error("Linx: CUBE.ACCCVT dst must be in TILE16..TILE31"); - const unsigned DstKind = - dstTileFieldFromRelRef(tileRelRefFromId(DstID)); - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::BSTART_CUBE)) + BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::BSTART_FIXP)) .addImm(DType) - .addImm(CUBE_ACCCVT); + .addImm(FIXP_TMOV); BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_ARG)).addImm(QArg0); - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(DstKind) - .addImm(0) // S0R - .addImm(1) // S0V (absent) - .addImm(0) // S1R - .addImm(1) // S1V (absent) - .addImm(0) // SrcTile0 (unused) - .addImm(DstID) // SrcTile1 (dst tile id) - .addImm(Size) // SizeCode + emitTileOutputAlloc(MBB, InsertPt, tileArchId(DstID), Size, + /*Last=*/1, /*DstSlot=*/0) .addReg(Acc, RegState::Implicit) .addReg(Dst, RegState::Define | RegState::Implicit); @@ -2611,36 +2648,63 @@ class LinxISABlockify : public MachineFunctionPass { case LinxISA::PSEUDO_TEPL_UNARY: case LinxISA::PSEUDO_TEPL_BINARY: + case LinxISA::PSEUDO_TEPL_TERNARY: case LinxISA::PSEUDO_TEPL_BINARY_SCALAR: case LinxISA::PSEUDO_TEPL_SPLAT: { const unsigned Opc = PseudoMI->getOpcode(); const bool IsUnary = Opc == LinxISA::PSEUDO_TEPL_UNARY; const bool IsBinary = Opc == LinxISA::PSEUDO_TEPL_BINARY; + const bool IsTernary = Opc == LinxISA::PSEUDO_TEPL_TERNARY; const bool IsBinaryScalar = Opc == LinxISA::PSEUDO_TEPL_BINARY_SCALAR; const bool IsSplat = Opc == LinxISA::PSEUDO_TEPL_SPLAT; - const char *Ctx = IsUnary - ? "TEPL.UNARY" - : (IsBinary ? "TEPL.BINARY" - : (IsBinaryScalar ? "TEPL.BINARY.SCALAR" - : "TEPL.SPLAT")); + const char *Ctx = + IsUnary + ? "TEPL.UNARY" + : (IsBinary ? "TEPL.BINARY" + : (IsTernary + ? "TEPL.TERNARY" + : (IsBinaryScalar ? "TEPL.BINARY.SCALAR" + : "TEPL.SPLAT"))); const Register Dst = PseudoMI->getOperand(0).getReg(); - const Register SrcA = (IsUnary || IsBinary || IsBinaryScalar) + const Register SrcA = (IsUnary || IsBinary || IsTernary || IsBinaryScalar) ? PseudoMI->getOperand(1).getReg() : Register(); - const Register SrcB = IsBinary ? PseudoMI->getOperand(2).getReg() : Register(); + const Register SrcB = + (IsBinary || IsTernary) ? PseudoMI->getOperand(2).getReg() + : Register(); + const Register SrcC = + IsTernary ? PseudoMI->getOperand(3).getReg() : Register(); const Register SrcS = IsBinaryScalar ? PseudoMI->getOperand(2).getReg() : (IsSplat ? PseudoMI->getOperand(1).getReg() : Register()); const int64_t TileOpcode = - PseudoMI->getOperand(IsUnary ? 2 : (IsBinary ? 3 : (IsBinaryScalar ? 3 : 2))) + PseudoMI + ->getOperand(IsUnary ? 2 + : (IsBinary ? 3 + : (IsTernary + ? 4 + : (IsBinaryScalar ? 3 + : 2)))) .getImm(); const int64_t Size = - PseudoMI->getOperand(IsUnary ? 3 : (IsBinary ? 4 : (IsBinaryScalar ? 4 : 3))) + PseudoMI + ->getOperand(IsUnary ? 3 + : (IsBinary ? 4 + : (IsTernary + ? 5 + : (IsBinaryScalar ? 4 + : 3)))) .getImm(); const int64_t DType = - PseudoMI->getOperand(IsUnary ? 4 : (IsBinary ? 5 : (IsBinaryScalar ? 5 : 4))) + PseudoMI + ->getOperand(IsUnary ? 4 + : (IsBinary ? 5 + : (IsTernary + ? 6 + : (IsBinaryScalar ? 5 + : 4)))) .getImm(); const int64_t Mode = IsBinaryScalar @@ -2658,23 +2722,21 @@ class LinxISABlockify : public MachineFunctionPass { report_fatal_error("Linx: TEPL.BINARY.SCALAR requires mode=1 (VS)"); if (IsSplat && Mode != static_cast(TEPLMode::SV)) report_fatal_error("Linx: TEPL.SPLAT requires mode=2 (SV)"); - if ((IsUnary || IsBinary) && Mode != static_cast(TEPLMode::VV)) - report_fatal_error("Linx: TEPL.UNARY/BINARY require mode=0 (VV)"); + if ((IsUnary || IsBinary || IsTernary) && + Mode != static_cast(TEPLMode::VV)) + report_fatal_error( + "Linx: TEPL.UNARY/BINARY/TERNARY require mode=0 (VV)"); const unsigned DstID = tileRegIdFromReg(TRI, Dst); - const unsigned AID = (IsUnary || IsBinary || IsBinaryScalar) + const unsigned AID = (IsUnary || IsBinary || IsTernary || IsBinaryScalar) ? tileRegIdFromReg(TRI, SrcA) : 0u; - const unsigned BID = IsBinary ? tileRegIdFromReg(TRI, SrcB) : 0u; - const TileRelRef DstRef = tileRelRefFromId(DstID); - const unsigned EncA = - (IsUnary || IsBinary || IsBinaryScalar) - ? tileIdFromRelRef(tileRelRefFromId(AID)) - : 0u; - const unsigned EncB = - IsBinary ? tileIdFromRelRef(tileRelRefFromId(BID)) : 0u; - const bool HasS0Tile = IsUnary || IsBinary || IsBinaryScalar; - const bool HasS1Tile = IsBinary; + const unsigned BID = + (IsBinary || IsTernary) ? tileRegIdFromReg(TRI, SrcB) : 0u; + const unsigned CID = IsTernary ? tileRegIdFromReg(TRI, SrcC) : 0u; + const bool HasS0Tile = IsUnary || IsBinary || IsTernary || IsBinaryScalar; + const bool HasS1Tile = IsBinary || IsTernary; + const bool HasS2Tile = IsTernary; BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::BSTART_TEPL)) .addImm(DType) @@ -2690,51 +2752,27 @@ class LinxISABlockify : public MachineFunctionPass { .addReg(SrcS, RegState::Implicit); } - // Descriptor 0: input bindings. - auto InDesc = BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G0)) - .addImm(dstTileFieldFromRelRef(DstRef)) // DstTile hand - .addImm(0) // S0R - .addImm(HasS0Tile ? 0 : 1) // S0V - .addImm(0) // S1R - .addImm(HasS1Tile ? 0 : 1) // S1V - .addImm(EncA) // SrcTile0 - .addImm(EncB) // SrcTile1 - .addImm(Size); // SizeCode - if (HasS0Tile) - InDesc.addReg(SrcA, RegState::Implicit); - if (HasS1Tile) - InDesc.addReg(SrcB, RegState::Implicit); - - // Descriptor 1: destination tile binding (queue-push destination). - // - // In-place TEPL forms (dst aliases a source tile) still use B.IOTI so - // size metadata stays explicit and disassembly remains canonical. - // Runtime destination allocation is guarded by descriptor shape checks. - const bool InPlace = (HasS0Tile && DstID == AID) || - (HasS1Tile && DstID == BID); - if (InPlace) { - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(dstTileFieldFromRelRef(DstRef)) - .addImm(0) // S0R - .addImm(1) // S0V (absent) - .addImm(0) // S1R - .addImm(1) // S1V (absent) - .addImm(0) // SrcTile0 (unused) - .addImm(DstID) // SrcTile1 (dst tile id) - .addImm(Size) // SizeCode - .addReg(Dst, RegState::Define | RegState::Implicit); - } else { - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(dstTileFieldFromRelRef(DstRef)) - .addImm(0) // S0R - .addImm(1) // S0V (absent) - .addImm(0) // S1R - .addImm(1) // S1V (absent) - .addImm(0) // SrcTile0 (unused) - .addImm(DstID) // SrcTile1 (dst tile id) - .addImm(Size) // SizeCode - .addReg(Dst, RegState::Define | RegState::Implicit); + if (HasS0Tile || HasS1Tile) { + auto InDesc = + emitTileInputPair(MBB, InsertPt, + HasS0Tile ? tileArchId(AID) : 0u, + HasS1Tile ? tileArchId(BID) : 0u, + /*S0R=*/0, /*S1R=*/0, /*Last=*/0, + /*SrcPair=*/0); + if (HasS0Tile) + InDesc.addReg(SrcA, RegState::Implicit); + if (HasS1Tile) + InDesc.addReg(SrcB, RegState::Implicit); } + if (HasS2Tile) { + emitTileInputPair(MBB, InsertPt, tileArchId(CID), /*Src1=*/0, + /*S0R=*/0, /*S1R=*/0, /*Last=*/0, + /*SrcPair=*/1) + .addReg(SrcC, RegState::Implicit); + } + emitTileOutputAlloc(MBB, InsertPt, tileArchId(DstID), Size, + /*Last=*/1, /*DstSlot=*/0) + .addReg(Dst, RegState::Define | RegState::Implicit); PseudoMI->eraseFromParent(); Changed = true; @@ -2746,8 +2784,8 @@ class LinxISABlockify : public MachineFunctionPass { case LinxISA::PSEUDO_VTILE_ADD: case LinxISA::PSEUDO_VTILE_SUB: { // Expand into a VPAR decoupled header that binds: - // - input tiles through TA/TB (first B.IOTI) - // - output tile through TO (second B.IOTI or B.IOT for in-place) + // - input tiles through a B.ITP source-pair descriptor + // - output tile through B.OTA // // The out-of-line body is a single-lane snippet that executes: // load TA, load TB, add/sub, store TO @@ -2788,45 +2826,13 @@ class LinxISABlockify : public MachineFunctionPass { BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::BSTART_VPAR)).addImm(0); BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_TEXT)).addSym(BodySym); - // Descriptor 0: inputs (TA/TB), group=0. - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G0)) - .addImm(dstTileFieldFromRelRef(tileRelRefFromId(DstID))) // DstTile (hand hint) - .addImm(0) // S0R - .addImm(0) // S0V (present) - .addImm(0) // S1R - .addImm(0) // S1V (present) - .addImm(AID) // SrcTile0 (TA) - .addImm(BID) // SrcTile1 (TB) - .addImm(Size) // SizeCode + emitTileInputPair(MBB, InsertPt, tileArchId(AID), tileArchId(BID), + /*S0R=*/0, /*S1R=*/0, /*Last=*/0, /*SrcPair=*/0) .addReg(SrcA, RegState::Implicit) .addReg(SrcB, RegState::Implicit); - - // Descriptor 1: output (TO), group=1 (last). Keep B.IOTI for both - // in-place and out-of-place forms so size metadata stays explicit. - const bool InPlace = (DstID == AID) || (DstID == BID); - if (InPlace) { - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(dstTileFieldFromRelRef(tileRelRefFromId(DstID))) // DstTile (hand hint) - .addImm(0) // S0R - .addImm(1) // S0V (absent) - .addImm(0) // S1R - .addImm(1) // S1V (absent) - .addImm(0) // SrcTile0 (unused) - .addImm(DstID) // SrcTile1 (dst tile id) - .addImm(Size) // SizeCode - .addReg(Dst, RegState::Define | RegState::Implicit); - } else { - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(dstTileFieldFromRelRef(tileRelRefFromId(DstID))) // DstTile (hand hint) - .addImm(0) // S0R - .addImm(1) // S0V (absent) - .addImm(0) // S1R - .addImm(1) // S1V (absent) - .addImm(0) // SrcTile0 (unused) - .addImm(DstID) // SrcTile1 (dst tile id) - .addImm(Size) // SizeCode - .addReg(Dst, RegState::Define | RegState::Implicit); - } + emitTileOutputAlloc(MBB, InsertPt, tileArchId(DstID), Size, + /*Last=*/1, /*DstSlot=*/0) + .addReg(Dst, RegState::Define | RegState::Implicit); emitDim(MBB, InsertPt, /*LoopNest=*/0, LB0); emitDim(MBB, InsertPt, /*LoopNest=*/1, LB1); @@ -2904,7 +2910,7 @@ class LinxISABlockify : public MachineFunctionPass { } if ((Attr & ~AttrAQRLMask) != 0u) { report_fatal_error( - "Linx: vblock.launch only supports aq/rl B.ATTR bits in canonical v0.4"); + "Linx: vblock.launch only supports aq/rl B.ATTR bits in LinxISA v0.57"); } if (Attr != 0u) { BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_ATTR)) @@ -2940,24 +2946,11 @@ class LinxISABlockify : public MachineFunctionPass { if (EmitLocalScratch) { // Reserve the first two output descriptors for TO/TS so the body // can use the canonical `.local` output-tile order. - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(dstTileFieldFromHand(TileHand::T)) - .addImm(0) - .addImm(1) - .addImm(0) - .addImm(1) - .addImm(0) - .addImm(0) - .addImm(0); - BuildMI(MBB, InsertPt, DL, TII.get(LinxISA::B_IOTI_G1)) - .addImm(dstTileFieldFromHand(TileHand::U)) - .addImm(0) - .addImm(1) - .addImm(0) - .addImm(1) - .addImm(0) - .addImm(8) - .addImm(LocalScratchSizeCode); + emitTileOutputAlloc(MBB, InsertPt, tileArchId(0), /*SizeCode=*/0, + /*Last=*/0, /*DstSlot=*/0); + emitTileOutputAlloc(MBB, InsertPt, tileArchId(8), + LocalScratchSizeCode, /*Last=*/1, + /*DstSlot=*/1); } emitDim(MBB, InsertPt, /*LoopNest=*/0, Dim0); diff --git a/llvm/lib/Target/LinxISA/LinxISAISelDAGToDAG.cpp b/llvm/lib/Target/LinxISA/LinxISAISelDAGToDAG.cpp index 618467736da65..50d037191a327 100644 --- a/llvm/lib/Target/LinxISA/LinxISAISelDAGToDAG.cpp +++ b/llvm/lib/Target/LinxISA/LinxISAISelDAGToDAG.cpp @@ -69,7 +69,7 @@ FunctionPass *llvm::createLinxISAISelDag(LinxISATargetMachine &TM) { static int64_t getMemScaleForVT(MVT MemVT) { if (MemVT == MVT::i8) return 1; - if (MemVT == MVT::i16) + if (MemVT == MVT::i16 || MemVT == MVT::f16 || MemVT == MVT::bf16) return 2; if (MemVT == MVT::i32) return 4; @@ -82,6 +82,12 @@ static int64_t getMemScaleForVT(MVT MemVT) { return 1; } +static bool isZeroConstantOffset(SDValue V) { + if (auto *C = dyn_cast(V.getNode())) + return C->getSExtValue() == 0; + return false; +} + static bool isStrictTileSizeCode(uint64_t SizeCode) { return SizeCode >= 5 && SizeCode <= 8; } @@ -298,54 +304,10 @@ static void validateTileOpcode(uint64_t TileOpcode, StringRef IntrinsicName) { } static bool isWhitelistedTEPLTileOpcode(uint64_t TileOpcode) { - switch (TileOpcode & 0x3ffu) { - case 0x000: // TADD - case 0x001: // TSUB - case 0x002: // TMUL - case 0x003: // TDIV - case 0x004: // TMAX - case 0x005: // TMIN - case 0x006: // TAND - case 0x007: // TOR - case 0x008: // TXOR - case 0x009: // TSHL - case 0x00a: // TSHR - case 0x00b: // TRELU - case 0x00c: // TPRELU - case 0x00d: // TCVT - case 0x00e: // TEXP - case 0x00f: // TLOG - case 0x010: // TSQRT - case 0x011: // TRSQRT - case 0x012: // TROWMAX - case 0x013: // TROWMIN - case 0x014: // TROWSUM - case 0x015: // TCOLMAX - case 0x016: // TCOLMIN - case 0x017: // TCOLSUM - case 0x018: // TRECIP - case 0x019: // TEXPANDS - case 0x01a: // TGATHER - case 0x01b: // TSCATTER - case 0x01c: // TRESHAPE - case 0x01d: // TTRANSPOSE - case 0x01e: // TCOLEXPAND - case 0x01f: // TROWEXPAND - case 0x020: // TADDS - case 0x021: // TSUBS - case 0x022: // TMULS - case 0x023: // TDIVS - case 0x024: // TMAXS - case 0x025: // TMINS - case 0x026: // TANDS - case 0x027: // TORS - case 0x028: // TXORS - case 0x029: // TSHLS - case 0x02a: // TSHRS - return true; - default: - return false; - } + // LinxISA v0.57 TEPL assigns the PTO tile-op window densely from TABS + // through TXORS. Keep the selector validator aligned with the generated + // v0.57 table instead of the older sequential alias map. + return (TileOpcode & 0x3ffu) <= 0x05bu; } static void validateWhitelistedTEPLTileOpcode(uint64_t TileOpcode, @@ -353,20 +315,28 @@ static void validateWhitelistedTEPLTileOpcode(uint64_t TileOpcode, validateTileOpcode(TileOpcode, IntrinsicName); if (!isWhitelistedTEPLTileOpcode(TileOpcode)) { report_fatal_error(Twine("Linx: ") + IntrinsicName + - " uses TileOpcode outside the canonical v0.4 TEPL set"); + " uses TileOpcode outside the LinxISA v0.57 TEPL set"); } } -static constexpr uint64_t TEPL_TILEOP_TADD = 0x000u; -static constexpr uint64_t TEPL_TILEOP_TSUB = 0x001u; -static constexpr uint64_t TEPL_TILEOP_TROWMAX = 0x012u; -static constexpr uint64_t TEPL_TILEOP_TCOLEXPAND = 0x01eu; -static constexpr uint64_t TEPL_TILEOP_TROWEXPAND = 0x01fu; -static constexpr uint64_t TEPL_TILEOP_TEXPANDS = 0x019u; +static constexpr uint64_t TEPL_TILEOP_TADD = 0x001u; +static constexpr uint64_t TEPL_TILEOP_TSUB = 0x055u; +static constexpr uint64_t TEPL_TILEOP_TROWMAX = 0x047u; +static constexpr uint64_t TEPL_TILEOP_TCOLEXPAND = 0x00du; +static constexpr uint64_t TEPL_TILEOP_TROWEXPAND = 0x03fu; +static constexpr uint64_t TEPL_TILEOP_TEXPANDS = 0x01du; static constexpr uint64_t TEPL_MODE_VV = 0u; static constexpr uint64_t TEPL_MODE_VS = 1u; static constexpr uint64_t TEPL_MODE_SV = 2u; +static void validatePositiveU16Dim(uint64_t Dim, StringRef IntrinsicName, + StringRef DimName) { + if (Dim == 0 || Dim > 0xffffu) { + report_fatal_error(Twine("Linx: ") + IntrinsicName + " requires " + + DimName + " in range 1..65535"); + } +} + bool LinxISADAGToDAGISel::selectMemAddr(SDValue Addr, SDValue &Base, SDValue &Off, int64_t Scale) { SDLoc DL(Addr); @@ -961,8 +931,39 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { CurDAG->getMachineNode(LinxISA::SRLrr, DL, MVT::i64, Shl, ShAmt), 0); } - report_fatal_error( - "Linx: constant register materialization out of range in tile path"); + + uint32_t Hi32 = static_cast(UVal >> 32); + uint32_t Lo32 = static_cast(UVal & 0xffffffffu); + SDValue ShImm = CurDAG->getTargetConstant(32, DL, MVT::i64); + SDValue ShAmt = + SDValue(CurDAG->getMachineNode(LinxISA::ADDIri, DL, MVT::i64, Zero, + ShImm), + 0); + + SDValue HiImm = CurDAG->getTargetConstant( + static_cast(static_cast(Hi32)), DL, MVT::i64); + SDValue Hi = + SDValue(CurDAG->getMachineNode(LinxISA::HLLUI, DL, MVT::i64, HiImm), + 0); + SDValue HiShifted = + SDValue(CurDAG->getMachineNode(LinxISA::SLLrr, DL, MVT::i64, Hi, + ShAmt), + 0); + + SDValue LoImm = CurDAG->getTargetConstant( + static_cast(static_cast(Lo32)), DL, MVT::i64); + SDValue Lo = + SDValue(CurDAG->getMachineNode(LinxISA::HLLUI, DL, MVT::i64, LoImm), + 0); + SDValue LoShl = SDValue( + CurDAG->getMachineNode(LinxISA::SLLrr, DL, MVT::i64, Lo, ShAmt), 0); + SDValue LoZext = SDValue(CurDAG->getMachineNode(LinxISA::SRLrr, DL, + MVT::i64, LoShl, ShAmt), + 0); + + return SDValue(CurDAG->getMachineNode(LinxISA::ORrr, DL, MVT::i64, + HiShifted, LoZext), + 0); }; auto forceGpr = [&](SDValue V) -> SDValue { if (auto *CN = dyn_cast(V)) { @@ -1077,6 +1078,60 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { return; } + if (IntrID == Intrinsic::linx_tma_mgather_desc) { + // (chain, id, base, index, immDType, immLayout, immLB0, immLB1, + // immSizeCode, immStrideBytes) + const uint64_t DType = + requireConstUImmOperand(N, 4, "tma.mgather.desc", "dtype"); + const int64_t Layout = + requireConstSImmOperand(N, 5, "tma.mgather.desc", "layout"); + const int64_t LB0 = + requireConstSImmOperand(N, 6, "tma.mgather.desc", "lb0"); + const int64_t LB1 = + requireConstSImmOperand(N, 7, "tma.mgather.desc", "lb1"); + const uint64_t SizeCode = + requireConstUImmOperand(N, 8, "tma.mgather.desc", "size_code"); + const int64_t StrideBytes = + requireConstSImmOperand(N, 9, "tma.mgather.desc", "stride_bytes"); + validateStrictTileSizeCode(SizeCode, "tma.mgather.desc"); + validateTileDataTypeU5(DType, "tma.mgather.desc"); + const uint64_t Dim0 = requirePositiveDim(LB0, "tma.mgather.desc", "lb0"); + const uint64_t Dim1 = requirePositiveDim(LB1, "tma.mgather.desc", "lb1"); + validateTileByteBudget("tma.mgather.desc", Dim0, Dim1, /*dim2=*/1u, + dtypeElementBitsForTileCheck(DType), SizeCode); + if (StrideBytes < 0) + report_fatal_error("Linx: tma.mgather.desc requires stride_bytes >= 0"); + if (StrideBytes != 0) { + const uint64_t ElemBits = dtypeElementBitsForTileCheck(DType); + const uint64_t ElemBytes = (ElemBits + 7u) / 8u; + const uint64_t RowSpanBytes = computeTileBytesOrDie( + "tma.mgather.desc stride", Dim0, 1u, 1u, ElemBits); + const uint64_t StrideU64 = static_cast(StrideBytes); + if ((StrideU64 % ElemBytes) != 0u || StrideU64 < RowSpanBytes) { + report_fatal_error( + "Linx: tma.mgather.desc stride_bytes must be element-aligned " + "and >= lb0*elem_bytes(rounded)"); + } + } + + SDValue Chain = N->getOperand(0); + SDValue Base = N->getOperand(2); + SDValue Index = N->getOperand(3); + SDValue DTypeImm = CurDAG->getTargetConstant(DType, DL, MVT::i64); + SDValue LayoutImm = CurDAG->getTargetConstant(Layout, DL, MVT::i64); + SDValue LB0Imm = CurDAG->getTargetConstant(LB0, DL, MVT::i64); + SDValue LB1Imm = CurDAG->getTargetConstant(LB1, DL, MVT::i64); + SDValue SizeImm = CurDAG->getTargetConstant(SizeCode, DL, MVT::i64); + SDValue Stride = forceGpr(N->getOperand(9)); + SDValue Ops[] = {Base, Index, DTypeImm, LayoutImm, + LB0Imm, LB1Imm, SizeImm, Stride, Chain}; + EVT ResVT = N->getValueType(0); + SDNode *Res = CurDAG->getMachineNode( + LinxISA::PSEUDO_TMA_MGATHER_DESC, DL, ResVT, MVT::Other, Ops); + ReplaceNode(N, Res); + return; + } + if (IntrID == Intrinsic::linx_tile_tmov || IntrID == Intrinsic::linx_tile_tmov_legacy) { // (chain, id, src, immMode, immSizeCode, immDType, immLayout, @@ -1116,6 +1171,91 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { return; } + if (IntrID == Intrinsic::linx_tile_tinsert || + IntrID == Intrinsic::linx_tile_tinsert_legacy) { + // (chain, id, dst_base, src, immSizeCode, immDType, immDstRows, + // immDstCols, immSrcRows, immSrcCols, meta) + const uint64_t SizeCode = + requireConstUImmOperand(N, 4, "tile.tinsert", "size_code"); + const uint64_t DType = + requireConstUImmOperand(N, 5, "tile.tinsert", "dtype"); + const uint64_t DstRows = + requireConstUImmOperand(N, 6, "tile.tinsert", "dst_rows"); + const uint64_t DstCols = + requireConstUImmOperand(N, 7, "tile.tinsert", "dst_cols"); + const uint64_t SrcRows = + requireConstUImmOperand(N, 8, "tile.tinsert", "src_rows"); + const uint64_t SrcCols = + requireConstUImmOperand(N, 9, "tile.tinsert", "src_cols"); + validateStrictTileSizeCode(SizeCode, "tile.tinsert"); + validateTileDataTypeU5(DType, "tile.tinsert"); + validatePositiveU16Dim(DstRows, "tile.tinsert", "dst_rows"); + validatePositiveU16Dim(DstCols, "tile.tinsert", "dst_cols"); + validatePositiveU16Dim(SrcRows, "tile.tinsert", "src_rows"); + validatePositiveU16Dim(SrcCols, "tile.tinsert", "src_cols"); + + SDValue Chain = N->getOperand(0); + SDValue DstBase = N->getOperand(2); + SDValue Src = N->getOperand(3); + SDValue Meta = forceGpr(N->getOperand(10)); + SDValue SizeImm = CurDAG->getTargetConstant(SizeCode, DL, MVT::i64); + SDValue DTypeImm = CurDAG->getTargetConstant(DType, DL, MVT::i64); + SDValue DstRowsImm = CurDAG->getTargetConstant(DstRows, DL, MVT::i64); + SDValue DstColsImm = CurDAG->getTargetConstant(DstCols, DL, MVT::i64); + SDValue SrcRowsImm = CurDAG->getTargetConstant(SrcRows, DL, MVT::i64); + SDValue SrcColsImm = CurDAG->getTargetConstant(SrcCols, DL, MVT::i64); + SDValue Ops[] = {DstBase, Src, Meta, SizeImm, + DTypeImm, DstRowsImm, DstColsImm, SrcRowsImm, + SrcColsImm, Chain}; + EVT ResVT = N->getValueType(0); + SDNode *Res = CurDAG->getMachineNode(LinxISA::PSEUDO_FIXP_TINSERT, DL, + ResVT, MVT::Other, Ops); + ReplaceNode(N, Res); + return; + } + + if (IntrID == Intrinsic::linx_tile_ttrans || + IntrID == Intrinsic::linx_tile_ttrans_legacy) { + // (chain, id, src, tmp, immSizeCode, immDType, immDstRows, + // immDstCols, immSrcRows, immSrcCols) + const uint64_t SizeCode = + requireConstUImmOperand(N, 4, "tile.ttrans", "size_code"); + const uint64_t DType = + requireConstUImmOperand(N, 5, "tile.ttrans", "dtype"); + const uint64_t DstRows = + requireConstUImmOperand(N, 6, "tile.ttrans", "dst_rows"); + const uint64_t DstCols = + requireConstUImmOperand(N, 7, "tile.ttrans", "dst_cols"); + const uint64_t SrcRows = + requireConstUImmOperand(N, 8, "tile.ttrans", "src_rows"); + const uint64_t SrcCols = + requireConstUImmOperand(N, 9, "tile.ttrans", "src_cols"); + validateStrictTileSizeCode(SizeCode, "tile.ttrans"); + validateTileDataTypeU5(DType, "tile.ttrans"); + validatePositiveU16Dim(DstRows, "tile.ttrans", "dst_rows"); + validatePositiveU16Dim(DstCols, "tile.ttrans", "dst_cols"); + validatePositiveU16Dim(SrcRows, "tile.ttrans", "src_rows"); + validatePositiveU16Dim(SrcCols, "tile.ttrans", "src_cols"); + + SDValue Chain = N->getOperand(0); + SDValue Src = N->getOperand(2); + SDValue Tmp = N->getOperand(3); + SDValue SizeImm = CurDAG->getTargetConstant(SizeCode, DL, MVT::i64); + SDValue DTypeImm = CurDAG->getTargetConstant(DType, DL, MVT::i64); + SDValue DstRowsImm = CurDAG->getTargetConstant(DstRows, DL, MVT::i64); + SDValue DstColsImm = CurDAG->getTargetConstant(DstCols, DL, MVT::i64); + SDValue SrcRowsImm = CurDAG->getTargetConstant(SrcRows, DL, MVT::i64); + SDValue SrcColsImm = CurDAG->getTargetConstant(SrcCols, DL, MVT::i64); + SDValue Ops[] = {Src, Tmp, SizeImm, DTypeImm, + DstRowsImm, DstColsImm, SrcRowsImm, SrcColsImm, + Chain}; + EVT ResVT = N->getValueType(0); + SDNode *Res = CurDAG->getMachineNode(LinxISA::PSEUDO_FIXP_TTRANS, DL, + ResVT, MVT::Other, Ops); + ReplaceNode(N, Res); + return; + } + if (IntrID == Intrinsic::linx_cube_mamulb || IntrID == Intrinsic::linx_cube_mamulb_legacy) { // (chain, id, a, b, immM, immN, immK) @@ -1126,12 +1266,8 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { validateCubeDimU17(NDim, "cube.mamulb", "n"); validateCubeDimU17(K, "cube.mamulb", "k"); if (M == 0 || NDim == 0 || K == 0) { - report_fatal_error( - "Linx: cube.mamulb requires m/n/k > 0 for tile-byte validation"); + report_fatal_error("Linx: cube.mamulb requires m/n/k > 0"); } - validateTileByteBudget("cube.mamulb", M, NDim, K, - dtypeElementBitsForTileCheck(/*DType=*/17), - std::nullopt); SDValue Chain = N->getOperand(0); SDValue A = N->getOperand(2); @@ -1160,13 +1296,8 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { validateCubeDimU17(NDim, "cube.mamulb.acc", "n"); validateCubeDimU17(K, "cube.mamulb.acc", "k"); if (M == 0 || NDim == 0 || K == 0) { - report_fatal_error( - "Linx: cube.mamulb.acc requires m/n/k > 0 for tile-byte " - "validation"); + report_fatal_error("Linx: cube.mamulb.acc requires m/n/k > 0"); } - validateTileByteBudget("cube.mamulb.acc", M, NDim, K, - dtypeElementBitsForTileCheck(/*DType=*/17), - std::nullopt); validateCubeAccumulatorOperandChain(N->getOperand(2), "cube.mamulb.acc"); SDValue Chain = N->getOperand(0); @@ -1199,7 +1330,7 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { validateTileDataTypeU5(DType, "cube.acccvt"); if (QArg1 != 0) report_fatal_error( - "Linx: cube.acccvt requires qarg1=0 in canonical v0.4"); + "Linx: cube.acccvt requires qarg1=0 in LinxISA v0.57"); validateCubeAccumulatorOperandChain(N->getOperand(2), "cube.acccvt"); SDValue Chain = N->getOperand(0); @@ -1340,6 +1471,34 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { return; } + if (IntrID == Intrinsic::linx_tepl_ternary || + IntrID == Intrinsic::linx_tepl_ternary_legacy) { + // (chain, id, a, b, c, immTileOpcode, immSizeCode, immDType) + const uint64_t TileOpcode = + requireConstUImmOperand(N, 5, "tepl.ternary", "tile_opcode"); + const uint64_t SizeCode = + requireConstUImmOperand(N, 6, "tepl.ternary", "size_code"); + const uint64_t DType = + requireConstUImmOperand(N, 7, "tepl.ternary", "dtype"); + validateWhitelistedTEPLTileOpcode(TileOpcode, "tepl.ternary"); + validateStrictTileSizeCode(SizeCode, "tepl.ternary"); + validateTileDataTypeU5(DType, "tepl.ternary"); + + SDValue Chain = N->getOperand(0); + SDValue A = N->getOperand(2); + SDValue B = N->getOperand(3); + SDValue C = N->getOperand(4); + SDValue TileOpImm = CurDAG->getTargetConstant(TileOpcode, DL, MVT::i64); + SDValue SizeImm = CurDAG->getTargetConstant(SizeCode, DL, MVT::i64); + SDValue DTypeImm = CurDAG->getTargetConstant(DType, DL, MVT::i64); + SDValue Ops[] = {A, B, C, TileOpImm, SizeImm, DTypeImm, Chain}; + EVT ResVT = N->getValueType(0); + SDNode *Res = CurDAG->getMachineNode(LinxISA::PSEUDO_TEPL_TERNARY, DL, + ResVT, MVT::Other, Ops); + ReplaceNode(N, Res); + return; + } + if (IntrID == Intrinsic::linx_tepl_binary_scalar || IntrID == Intrinsic::linx_tepl_binary_scalar_legacy) { // (chain, id, a, scalar, immTileOpcode, immSizeCode, immDType, immMode) @@ -1356,7 +1515,7 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { validateTileDataTypeU5(DType, "tepl.binary.scalar"); if (Mode != TEPL_MODE_VS) report_fatal_error( - "Linx: tepl.binary.scalar requires operand mode=1 (VS) in canonical v0.4"); + "Linx: tepl.binary.scalar requires operand mode=1 (VS) in LinxISA v0.57"); SDValue Chain = N->getOperand(0); SDValue A = N->getOperand(2); @@ -1391,7 +1550,7 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { validateTileDataTypeU5(DType, "tepl.splat"); if (Mode != TEPL_MODE_SV) report_fatal_error( - "Linx: tepl.splat requires operand mode=2 (SV) in canonical v0.4"); + "Linx: tepl.splat requires operand mode=2 (SV) in LinxISA v0.57"); SDValue Chain = N->getOperand(0); SDValue Scalar = N->getOperand(2); @@ -1503,8 +1662,39 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { CurDAG->getMachineNode(LinxISA::SRLrr, DL, MVT::i64, Shl, ShAmt), 0); } - report_fatal_error( - "Linx: constant register materialization out of range in tile path"); + + uint32_t Hi32 = static_cast(UVal >> 32); + uint32_t Lo32 = static_cast(UVal & 0xffffffffu); + SDValue ShImm = CurDAG->getTargetConstant(32, DL, MVT::i64); + SDValue ShAmt = + SDValue(CurDAG->getMachineNode(LinxISA::ADDIri, DL, MVT::i64, Zero, + ShImm), + 0); + + SDValue HiImm = CurDAG->getTargetConstant( + static_cast(static_cast(Hi32)), DL, MVT::i64); + SDValue Hi = + SDValue(CurDAG->getMachineNode(LinxISA::HLLUI, DL, MVT::i64, HiImm), + 0); + SDValue HiShifted = + SDValue(CurDAG->getMachineNode(LinxISA::SLLrr, DL, MVT::i64, Hi, + ShAmt), + 0); + + SDValue LoImm = CurDAG->getTargetConstant( + static_cast(static_cast(Lo32)), DL, MVT::i64); + SDValue Lo = + SDValue(CurDAG->getMachineNode(LinxISA::HLLUI, DL, MVT::i64, LoImm), + 0); + SDValue LoShl = SDValue( + CurDAG->getMachineNode(LinxISA::SLLrr, DL, MVT::i64, Lo, ShAmt), 0); + SDValue LoZext = SDValue(CurDAG->getMachineNode(LinxISA::SRLrr, DL, + MVT::i64, LoShl, ShAmt), + 0); + + return SDValue(CurDAG->getMachineNode(LinxISA::ORrr, DL, MVT::i64, + HiShifted, LoZext), + 0); }; auto forceGpr = [&](SDValue V) -> SDValue { if (auto *CN = dyn_cast(V)) { @@ -1619,6 +1809,63 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { return; } + if (IntrID == Intrinsic::linx_tma_mscatter_desc) { + // (chain, id, base, tile, index, immDType, immLayout, immLB0, immLB1, + // immSizeCode, immStrideBytes) + const uint64_t DType = + requireConstUImmOperand(N, 5, "tma.mscatter.desc", "dtype"); + const int64_t Layout = + requireConstSImmOperand(N, 6, "tma.mscatter.desc", "layout"); + const int64_t LB0 = + requireConstSImmOperand(N, 7, "tma.mscatter.desc", "lb0"); + const int64_t LB1 = + requireConstSImmOperand(N, 8, "tma.mscatter.desc", "lb1"); + const uint64_t SizeCode = + requireConstUImmOperand(N, 9, "tma.mscatter.desc", "size_code"); + const int64_t StrideBytes = + requireConstSImmOperand(N, 10, "tma.mscatter.desc", "stride_bytes"); + validateStrictTileSizeCode(SizeCode, "tma.mscatter.desc"); + validateTileDataTypeU5(DType, "tma.mscatter.desc"); + const uint64_t Dim0 = + requirePositiveDim(LB0, "tma.mscatter.desc", "lb0"); + const uint64_t Dim1 = + requirePositiveDim(LB1, "tma.mscatter.desc", "lb1"); + validateTileByteBudget("tma.mscatter.desc", Dim0, Dim1, /*dim2=*/1u, + dtypeElementBitsForTileCheck(DType), SizeCode); + if (StrideBytes < 0) + report_fatal_error( + "Linx: tma.mscatter.desc requires stride_bytes >= 0"); + if (StrideBytes != 0) { + const uint64_t ElemBits = dtypeElementBitsForTileCheck(DType); + const uint64_t ElemBytes = (ElemBits + 7u) / 8u; + const uint64_t RowSpanBytes = computeTileBytesOrDie( + "tma.mscatter.desc stride", Dim0, 1u, 1u, ElemBits); + const uint64_t StrideU64 = static_cast(StrideBytes); + if ((StrideU64 % ElemBytes) != 0u || StrideU64 < RowSpanBytes) { + report_fatal_error( + "Linx: tma.mscatter.desc stride_bytes must be element-aligned " + "and >= lb0*elem_bytes(rounded)"); + } + } + + SDValue Chain = N->getOperand(0); + SDValue Base = N->getOperand(2); + SDValue Tile = N->getOperand(3); + SDValue Index = N->getOperand(4); + SDValue DTypeImm = CurDAG->getTargetConstant(DType, DL, MVT::i64); + SDValue LayoutImm = CurDAG->getTargetConstant(Layout, DL, MVT::i64); + SDValue LB0Imm = CurDAG->getTargetConstant(LB0, DL, MVT::i64); + SDValue LB1Imm = CurDAG->getTargetConstant(LB1, DL, MVT::i64); + SDValue SizeImm = CurDAG->getTargetConstant(SizeCode, DL, MVT::i64); + SDValue Stride = forceGpr(N->getOperand(10)); + SDValue Ops[] = {Base, Tile, Index, DTypeImm, LayoutImm, + LB0Imm, LB1Imm, SizeImm, Stride, Chain}; + SDNode *Res = CurDAG->getMachineNode( + LinxISA::PSEUDO_TMA_MSCATTER_DESC, DL, MVT::Other, Ops); + ReplaceNode(N, Res); + return; + } + if (IntrID == Intrinsic::linx_vblock_launch) { // (chain, id, vkind, body_sym, dim0, dim1, dim2, attr_bits, // bind0..bind11) @@ -1940,6 +2187,23 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { MVT MemVT = LD->getMemoryVT().getSimpleVT(); int64_t Scale = getMemScaleForVT(MemVT); + if (MemVT == MVT::v1024i32 || MemVT == MVT::linxtile) { + SDValue Base, Off; + selectMemAddr(LD->getBasePtr(), Base, Off, /*Scale=*/1); + if (!isZeroConstantOffset(Off)) { + report_fatal_error( + "Linx: raw tile load with non-zero offset is unsupported"); + } + + SDValue SizeImm = CurDAG->getTargetConstant(8, DL, MVT::i64); + SDVTList VTs = CurDAG->getVTList(LD->getValueType(0), MVT::Other); + SDValue Ops[] = {Base, SizeImm, LD->getChain()}; + SDNode *Res = + CurDAG->getMachineNode(LinxISA::PSEUDO_TMA_TLOAD_ANY, DL, VTs, Ops); + ReplaceNode(N, Res); + return; + } + // Fold materialized PC-relative addresses back into a single *.PCR load: // addr = ADDI(ADDTPC(sym), sym) [+ const] // load [addr] -> *.pcr [sym+const] @@ -1955,6 +2219,10 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { Opc = (LD->getExtensionType() == ISD::ZEXTLOAD) ? LinxISA::LHU_PCR : LinxISA::LH_PCR; break; + case MVT::f16: + case MVT::bf16: + Opc = LinxISA::LHU_PCR; + break; case MVT::i32: Opc = (LD->getExtensionType() == ISD::ZEXTLOAD) ? LinxISA::LWU_PCR : LinxISA::LW_PCR; @@ -1995,6 +2263,10 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { Opc = (LD->getExtensionType() == ISD::ZEXTLOAD) ? LinxISA::LHU : LinxISA::LH; break; + case MVT::f16: + case MVT::bf16: + Opc = LinxISA::LHU; + break; case MVT::i32: Opc = (LD->getExtensionType() == ISD::ZEXTLOAD) ? LinxISA::LWU : LinxISA::LW; @@ -2039,6 +2311,10 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { Opc = (LD->getExtensionType() == ISD::ZEXTLOAD) ? LinxISA::LHUI : LinxISA::LHI; break; + case MVT::f16: + case MVT::bf16: + Opc = LinxISA::LHUI; + break; case MVT::i32: if (LD->getExtensionType() == ISD::ZEXTLOAD) Opc = LinxISA::LWUI; @@ -2056,7 +2332,8 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { Opc = LinxISA::LDI; break; default: - report_fatal_error("Linx: unsupported load type"); + report_fatal_error(Twine("Linx: unsupported load type ") + + Twine(static_cast(MemVT.SimpleTy))); } SDValue Base, Off; @@ -2079,6 +2356,22 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { MVT MemVT = ST->getMemoryVT().getSimpleVT(); int64_t Scale = getMemScaleForVT(MemVT); + if (MemVT == MVT::v1024i32 || MemVT == MVT::linxtile) { + SDValue Base, Off; + selectMemAddr(ST->getBasePtr(), Base, Off, /*Scale=*/1); + if (!isZeroConstantOffset(Off)) { + report_fatal_error( + "Linx: raw tile store with non-zero offset is unsupported"); + } + + SDValue SizeImm = CurDAG->getTargetConstant(8, DL, MVT::i64); + SDValue Ops[] = {Base, ST->getValue(), SizeImm, ST->getChain()}; + SDNode *Res = + CurDAG->getMachineNode(LinxISA::PSEUDO_TMA_TSTORE, DL, MVT::Other, Ops); + ReplaceNode(N, Res); + return; + } + // Fold materialized PC-relative addresses back into a single *.PCR store: // addr = ADDI(ADDTPC(sym), sym) [+ const] // store [addr] -> *.pcr [sym+const] @@ -2092,6 +2385,10 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { case MVT::i16: Opc = LinxISA::SH_PCR; break; + case MVT::f16: + case MVT::bf16: + Opc = LinxISA::SH_PCR; + break; case MVT::i32: case MVT::f32: Opc = LinxISA::SW_PCR; @@ -2129,6 +2426,11 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { ExpectedShift = 1; Opc = LinxISA::SH; break; + case MVT::f16: + case MVT::bf16: + ExpectedShift = 1; + Opc = LinxISA::SH; + break; case MVT::i32: ExpectedShift = 2; Opc = LinxISA::SW; @@ -2170,6 +2472,10 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { case MVT::i16: Opc = LinxISA::SHI; break; + case MVT::f16: + case MVT::bf16: + Opc = LinxISA::SHI; + break; case MVT::i32: Opc = LinxISA::SWI; break; @@ -2183,7 +2489,8 @@ void LinxISADAGToDAGISel::Select(SDNode *N) { Opc = LinxISA::SDI; break; default: - report_fatal_error("Linx: unsupported store type"); + report_fatal_error(Twine("Linx: unsupported store type ") + + Twine(static_cast(MemVT.SimpleTy))); } SDValue Base, Off; diff --git a/llvm/lib/Target/LinxISA/LinxISAISelLowering.cpp b/llvm/lib/Target/LinxISA/LinxISAISelLowering.cpp index ac8cc7a9250d9..a2af85f4edd53 100644 --- a/llvm/lib/Target/LinxISA/LinxISAISelLowering.cpp +++ b/llvm/lib/Target/LinxISA/LinxISAISelLowering.cpp @@ -1875,6 +1875,9 @@ const char *LinxISATargetLowering::getTargetNodeName(unsigned Opcode) const { std::pair LinxISATargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { + if (Constraint == "Tr") + return std::make_pair(0u, &LinxISA::TILERegClass); + if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': diff --git a/llvm/lib/Target/LinxISA/LinxISAInstrInfo.td b/llvm/lib/Target/LinxISA/LinxISAInstrInfo.td index 286eca8c7f3e4..7e3ccaee41303 100644 --- a/llvm/lib/Target/LinxISA/LinxISAInstrInfo.td +++ b/llvm/lib/Target/LinxISA/LinxISAInstrInfo.td @@ -142,6 +142,10 @@ def uimm6 : Operand, PatLeaf<(i64 imm), [{ return isUInt<6>(N->getZExtValue let ParserMatchClass = ImmAsmOperand; } +def uimm8 : Operand, PatLeaf<(i64 imm), [{ return isUInt<8>(N->getZExtValue()); }]> { + let ParserMatchClass = ImmAsmOperand; +} + def uimm10 : Operand, PatLeaf<(i64 imm), [{ return isUInt<10>(N->getZExtValue()); }]> { let ParserMatchClass = ImmAsmOperand; } @@ -433,7 +437,7 @@ def BSTOP : LinxISAInst<(outs), (ins), "", []>; // Tile blocks (TAU) - bring-up (selected manually from intrinsics) // // These are expanded very late (pre-emit) into block-structured sequences: -// BSTART.TMA / BSTART.CUBE + B.IOT/B.IOTI/B.DIM descriptors. +// BSTART.TMA / BSTART.CUBE + B.ITP/B.OTA/B.DIM descriptors. //===----------------------------------------------------------------------===// let hasSideEffects = 1, isBarrier = 1, mayLoad = 1, Size = 4 in @@ -465,6 +469,23 @@ def PSEUDO_TMA_TSTORE_DESC : Pseudo<(outs), i64imm:$size, GPR_Arch:$stride_bytes), "# PSEUDO_TMA_TSTORE_DESC", []>; +let hasSideEffects = 1, isBarrier = 1, mayLoad = 1, Size = 4 in +def PSEUDO_TMA_MGATHER_DESC : Pseudo<(outs TILE_TU:$dst), + (ins GPR_Arch:$base, TILE:$index, + i64imm:$dtype, i64imm:$layout, + i64imm:$lb0, i64imm:$lb1, + i64imm:$size, GPR_Arch:$stride_bytes), + "# PSEUDO_TMA_MGATHER_DESC", []>; + +let hasSideEffects = 1, isBarrier = 1, mayStore = 1, Size = 4 in +def PSEUDO_TMA_MSCATTER_DESC : Pseudo<(outs), + (ins GPR_Arch:$base, TILE:$src, + TILE:$index, i64imm:$dtype, + i64imm:$layout, i64imm:$lb0, + i64imm:$lb1, i64imm:$size, + GPR_Arch:$stride_bytes), + "# PSEUDO_TMA_MSCATTER_DESC", []>; + // Compiler-inserted tile SSA edge balancing pseudo. // // Operand contract: @@ -483,6 +504,23 @@ def PSEUDO_TMA_TMOV : Pseudo<(outs TILE:$dst), i64imm:$mode, i64imm:$src_reuse), "# PSEUDO_TMA_TMOV", []>; +let hasSideEffects = 1, isBarrier = 1, Size = 4 in +def PSEUDO_FIXP_TINSERT : Pseudo<(outs TILE:$dst), + (ins TILE:$dst_base, TILE:$src, + GPR_Arch:$meta, i64imm:$size, + i64imm:$dtype, i64imm:$dst_rows, + i64imm:$dst_cols, i64imm:$src_rows, + i64imm:$src_cols), + "# PSEUDO_FIXP_TINSERT", []>; + +let hasSideEffects = 1, isBarrier = 1, Size = 4 in +def PSEUDO_FIXP_TTRANS : Pseudo<(outs TILE:$dst), + (ins TILE:$src, TILE:$tmp, i64imm:$size, + i64imm:$dtype, i64imm:$dst_rows, + i64imm:$dst_cols, i64imm:$src_rows, + i64imm:$src_cols), + "# PSEUDO_FIXP_TTRANS", []>; + let hasSideEffects = 1, isBarrier = 1, Size = 4 in def PSEUDO_CUBE_MAMULB : Pseudo<(outs TILE_MN:$dst), (ins TILE:$srcA, TILE:$srcB, @@ -516,6 +554,13 @@ def PSEUDO_TEPL_BINARY : Pseudo<(outs TILE:$dst), i64imm:$size, i64imm:$dtype), "# PSEUDO_TEPL_BINARY", []>; +let hasSideEffects = 1, isBarrier = 1, Size = 4 in +def PSEUDO_TEPL_TERNARY : Pseudo<(outs TILE:$dst), + (ins TILE:$srcA, TILE:$srcB, TILE:$srcC, + i64imm:$tileop, i64imm:$size, + i64imm:$dtype), + "# PSEUDO_TEPL_TERNARY", []>; + let hasSideEffects = 1, isBarrier = 1, Size = 4 in def PSEUDO_TEPL_BINARY_SCALAR : Pseudo<(outs TILE:$dst), (ins TILE:$srcA, GPR_Arch:$srcS, @@ -750,9 +795,10 @@ let hasSideEffects = 1, Size = 2 in { // Real tile-block descriptor instructions (emitted after register allocation). let hasSideEffects = 1, isBarrier = 1, Size = 4 in { - def BSTART_TMA : LinxISAInst<(outs), (ins uimm5:$dtype, uimm5:$func), "", []>; - def BSTART_CUBE : LinxISAInst<(outs), (ins uimm5:$dtype, uimm5:$func), "", []>; - def BSTART_TEPL : LinxISAInst<(outs), (ins uimm5:$dtype, uimm10:$tileop), "", []>; + def BSTART_TMA : LinxISAInst<(outs), (ins uimm5:$dtype, uimm8:$func), "", []>; + def BSTART_CUBE : LinxISAInst<(outs), (ins uimm5:$dtype, uimm8:$func), "", []>; + def BSTART_FIXP : LinxISAInst<(outs), (ins uimm5:$dtype, uimm8:$func), "", []>; + def BSTART_TEPL : LinxISAInst<(outs), (ins uimm5:$dtype, uimm8:$tileop), "", []>; // Vector block start markers (decoupled headers). // @@ -767,6 +813,7 @@ let hasSideEffects = 1, isBarrier = 1, Size = 4 in { def B_TEXT : LinxISAInst<(outs), (ins BrTarget:$dst), "", []>; def B_ARG : LinxISAInst<(outs), (ins i64imm:$format), "", []>; + def B_META : LinxISAInst<(outs), (ins GPR_Arch:$meta, i64imm:$mode), "", []>; // Full B.ATTR descriptor (strict-v0.3): // {C, DR, DataLayout, DataType, PadValue, T, aq, atom, far, rl} @@ -786,25 +833,13 @@ let hasSideEffects = 1, isBarrier = 1, Size = 4 in { GPR_Arch:$regsrc1, GPR_Arch:$regsrc2), "", []>; - // B.IOT / B.IOTI descriptor forms: group bit is selected by opcode. - def B_IOT_G0 : LinxISAInst<(outs), (ins i64imm:$dst, GPR_Arch:$regsrc, - i64imm:$s0r, i64imm:$s0v, - i64imm:$s1r, i64imm:$s1v, - i64imm:$src0, i64imm:$src1), "", []>; - def B_IOT_G1 : LinxISAInst<(outs), (ins i64imm:$dst, GPR_Arch:$regsrc, - i64imm:$s0r, i64imm:$s0v, - i64imm:$s1r, i64imm:$s1v, - i64imm:$src0, i64imm:$src1), "", []>; - def B_IOTI_G0 : LinxISAInst<(outs), (ins i64imm:$dst, - i64imm:$s0r, i64imm:$s0v, - i64imm:$s1r, i64imm:$s1v, - i64imm:$src0, i64imm:$src1, - i64imm:$size), "", []>; - def B_IOTI_G1 : LinxISAInst<(outs), (ins i64imm:$dst, - i64imm:$s0r, i64imm:$s0v, - i64imm:$s1r, i64imm:$s1v, - i64imm:$src0, i64imm:$src1, - i64imm:$size), "", []>; + def B_ITP : LinxISAInst<(outs), (ins i64imm:$src0, i64imm:$src1, + i64imm:$s0r, i64imm:$s1r, + i64imm:$last, i64imm:$src_pair), "", []>; + def B_OTA : LinxISAInst<(outs), (ins i64imm:$dst, + i64imm:$cell_count_m1, + i64imm:$last, + i64imm:$dst_slot), "", []>; } // Branch-condition setters used by BlockISA (no explicit branch instructions). diff --git a/llvm/lib/Target/LinxISA/LinxISAMCInstLower.cpp b/llvm/lib/Target/LinxISA/LinxISAMCInstLower.cpp index d6d3f6f4abdbd..a18e4b8307d3a 100644 --- a/llvm/lib/Target/LinxISA/LinxISAMCInstLower.cpp +++ b/llvm/lib/Target/LinxISA/LinxISAMCInstLower.cpp @@ -353,7 +353,7 @@ void LinxISAMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case LinxISA::BSTART_TMA: { const int64_t DataType = I(0) & 0x1f; - const int64_t Func = I(1) & 0x1f; + const int64_t Func = I(1) & 0xff; OutMI.setOpcode( getSpecOpcodeByAsmFmt("BSTART.TMA Function, DataType", /*LengthBits=*/32)); OutMI.addOperand(MCOperand::createImm(DataType)); @@ -362,7 +362,7 @@ void LinxISAMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } case LinxISA::BSTART_CUBE: { const int64_t DataType = I(0) & 0x1f; - const int64_t Func = I(1) & 0x1f; + const int64_t Func = I(1) & 0xff; OutMI.setOpcode( getSpecOpcodeByAsmFmt("BSTART.CUBE Function, DataType", /*LengthBits=*/32)); @@ -370,9 +370,19 @@ void LinxISAMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { OutMI.addOperand(MCOperand::createImm(Func)); return; } + case LinxISA::BSTART_FIXP: { + const int64_t DataType = I(0) & 0x1f; + const int64_t Func = I(1) & 0xff; + OutMI.setOpcode( + getSpecOpcodeByAsmFmt("BSTART.FIXP Function, DataType", + /*LengthBits=*/32)); + OutMI.addOperand(MCOperand::createImm(DataType)); + OutMI.addOperand(MCOperand::createImm(Func)); + return; + } case LinxISA::BSTART_TEPL: { const int64_t DataType = I(0) & 0x1f; - const int64_t TileOpcode = I(1) & 0x3ff; + const int64_t TileOpcode = I(1) & 0xff; OutMI.setOpcode(getSpecOpcodeByAsmFmt("BSTART.TEPL TileOpcode, DataType", /*LengthBits=*/32)); OutMI.addOperand(MCOperand::createImm(DataType)); @@ -409,6 +419,14 @@ void LinxISAMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { return; } + case LinxISA::B_META: { + OutMI.setOpcode( + getSpecOpcodeByAsmFmt("B.META MetaGpr, MetaMode", /*LengthBits=*/32)); + OutMI.addOperand(MCOperand::createImm(R(0))); // MetaGpr + OutMI.addOperand(MCOperand::createImm(I(1) & 0x1)); // MetaMode + return; + } + case LinxISA::B_ATTR: { OutMI.setOpcode(getSpecOpcodeByAsmFmt( "B.ATTR {trap, atomic, , far, DataLayout.{canon, normal}, " @@ -469,39 +487,26 @@ void LinxISAMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { return; } - case LinxISA::B_IOT_G0: - case LinxISA::B_IOT_G1: { - StringRef AsmFmt = - (Opc == LinxISA::B_IOT_G0) - ? "B.IOT [SrcTile0<.reuse>, SrcTile1<.reuse>], group=0, ->DstTile" - : "B.IOT [SrcTile0<.reuse>, SrcTile1<.reuse>], group=1, ->DstTile"; - OutMI.setOpcode(getSpecOpcodeByAsmFmt(AsmFmt, /*LengthBits=*/32)); - OutMI.addOperand(MCOperand::createImm(I(0))); // DstTile - OutMI.addOperand(MCOperand::createImm(R(1))); // RegSrc + case LinxISA::B_ITP: { + OutMI.setOpcode(getSpecOpcodeByAsmFmt( + "B.ITP [SrcTile0<.reuse>, SrcTile1<.reuse>], , src_pair", + /*LengthBits=*/32)); + OutMI.addOperand(MCOperand::createImm(I(1))); // SrcTile1 + OutMI.addOperand(MCOperand::createImm(I(0))); // SrcTile0 + OutMI.addOperand(MCOperand::createImm(I(4))); // L + OutMI.addOperand(MCOperand::createImm(I(5))); // src_pair + OutMI.addOperand(MCOperand::createImm(I(3))); // S1R OutMI.addOperand(MCOperand::createImm(I(2))); // S0R - OutMI.addOperand(MCOperand::createImm(I(3))); // S0V - OutMI.addOperand(MCOperand::createImm(I(4))); // S1R - OutMI.addOperand(MCOperand::createImm(I(5))); // S1V - OutMI.addOperand(MCOperand::createImm(I(6))); // SrcTile0 - OutMI.addOperand(MCOperand::createImm(I(7))); // SrcTile1 return; } - - case LinxISA::B_IOTI_G0: - case LinxISA::B_IOTI_G1: { - StringRef AsmFmt = - (Opc == LinxISA::B_IOTI_G0) - ? "B.IOT SrcTile0<.reuse>, SrcTile1<.reuse>, , ->DstTile" - : "B.IOT SrcTile0<.reuse>, SrcTile1<.reuse>, , ->DstTile"; - OutMI.setOpcode(getSpecOpcodeByAsmFmt(AsmFmt, /*LengthBits=*/32)); + case LinxISA::B_OTA: { + OutMI.setOpcode(getSpecOpcodeByAsmFmt( + "B.OTA ->DstTile, , dst_slot", + /*LengthBits=*/32)); OutMI.addOperand(MCOperand::createImm(I(0))); // DstTile - OutMI.addOperand(MCOperand::createImm(I(1))); // S0R - OutMI.addOperand(MCOperand::createImm(I(2))); // S0V - OutMI.addOperand(MCOperand::createImm(I(3))); // S1R - OutMI.addOperand(MCOperand::createImm(I(4))); // S1V - OutMI.addOperand(MCOperand::createImm(I(5))); // SrcTile0 - OutMI.addOperand(MCOperand::createImm(I(6))); // SrcTile1 - OutMI.addOperand(MCOperand::createImm(I(7))); // imm5 (Size) + OutMI.addOperand(MCOperand::createImm(I(1))); // CellCountM1 + OutMI.addOperand(MCOperand::createImm(I(2))); // L + OutMI.addOperand(MCOperand::createImm(I(3))); // dst_slot return; } diff --git a/llvm/lib/Target/LinxISA/LinxISARegisterInfo.cpp b/llvm/lib/Target/LinxISA/LinxISARegisterInfo.cpp index 500410bb27ff8..400af46cb653b 100644 --- a/llvm/lib/Target/LinxISA/LinxISARegisterInfo.cpp +++ b/llvm/lib/Target/LinxISA/LinxISARegisterInfo.cpp @@ -199,7 +199,9 @@ bool LinxISARegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, switch (MI.getOpcode()) { case LinxISA::PSEUDO_TMA_TLOAD: case LinxISA::PSEUDO_TMA_TLOAD_ANY: - case LinxISA::PSEUDO_TMA_TSTORE: { + case LinxISA::PSEUDO_TMA_TSTORE: + case LinxISA::PSEUDO_TMA_MGATHER_DESC: + case LinxISA::PSEUDO_TMA_MSCATTER_DESC: { if (OffsetBytes == 0) { MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/false); return false; diff --git a/llvm/lib/Target/LinxISA/LinxISATileSSABalance.cpp b/llvm/lib/Target/LinxISA/LinxISATileSSABalance.cpp index f0fa824c9a212..31500aeee21d0 100644 --- a/llvm/lib/Target/LinxISA/LinxISATileSSABalance.cpp +++ b/llvm/lib/Target/LinxISA/LinxISATileSSABalance.cpp @@ -212,6 +212,15 @@ static bool extractDefMetadata(const MachineInstr &MI, TileMeta &Meta) { Meta.Layout = MI.getOperand(3).getImm(); return true; + case LinxISA::PSEUDO_TMA_MGATHER_DESC: + Meta.HasSize = true; + Meta.SizeCode = static_cast(MI.getOperand(7).getImm() & 0x1f); + Meta.HasDataType = true; + Meta.DataType = static_cast(MI.getOperand(3).getImm() & 0x1f); + Meta.HasLayout = true; + Meta.Layout = MI.getOperand(4).getImm(); + return true; + case LinxISA::PSEUDO_CUBE_MAMULB: case LinxISA::PSEUDO_CUBE_MAMULB_ACC: Meta.HasSize = true; @@ -257,6 +266,13 @@ static bool extractDefMetadata(const MachineInstr &MI, TileMeta &Meta) { Meta.DataType = static_cast(MI.getOperand(5).getImm() & 0x1f); return true; + case LinxISA::PSEUDO_TEPL_TERNARY: + Meta.HasSize = true; + Meta.SizeCode = static_cast(MI.getOperand(5).getImm() & 0x1f); + Meta.HasDataType = true; + Meta.DataType = static_cast(MI.getOperand(6).getImm() & 0x1f); + return true; + case LinxISA::PSEUDO_TEPL_BINARY_SCALAR: Meta.HasSize = true; Meta.SizeCode = static_cast(MI.getOperand(4).getImm() & 0x1f); @@ -281,6 +297,20 @@ static bool extractDefMetadata(const MachineInstr &MI, TileMeta &Meta) { Meta.Layout = MI.getOperand(4).getImm(); return true; + case LinxISA::PSEUDO_FIXP_TINSERT: + Meta.HasSize = true; + Meta.SizeCode = static_cast(MI.getOperand(4).getImm() & 0x1f); + Meta.HasDataType = true; + Meta.DataType = static_cast(MI.getOperand(5).getImm() & 0x1f); + return true; + + case LinxISA::PSEUDO_FIXP_TTRANS: + Meta.HasSize = true; + Meta.SizeCode = static_cast(MI.getOperand(3).getImm() & 0x1f); + Meta.HasDataType = true; + Meta.DataType = static_cast(MI.getOperand(4).getImm() & 0x1f); + return true; + default: return false; } @@ -515,18 +545,19 @@ class LinxISATileSSABalance : public MachineFunctionPass { const unsigned SrcId = getTileRegId(TRI, CopyMI->getOperand(1).getReg()); BundleSrcByDst.try_emplace(DstId, SrcId); } - DenseMap BundleResolvedMeta; + DenseMap BundleSourceMeta; + DenseMap BundleDstMeta; auto resolveMetaForTile = [&](auto &&Self, unsigned TileId, SmallVectorImpl &Visiting) -> std::optional { - auto ResolvedIt = BundleResolvedMeta.find(TileId); - if (ResolvedIt != BundleResolvedMeta.end()) - return ResolvedIt->second; + auto SourceIt = BundleSourceMeta.find(TileId); + if (SourceIt != BundleSourceMeta.end()) + return SourceIt->second; auto GlobalIt = RegMetaById.find(TileId); if (GlobalIt != RegMetaById.end()) { - BundleResolvedMeta[TileId] = GlobalIt->second; + BundleSourceMeta[TileId] = GlobalIt->second; return GlobalIt->second; } @@ -544,7 +575,7 @@ class LinxISATileSSABalance : public MachineFunctionPass { if (!SrcMeta) return std::nullopt; - BundleResolvedMeta[TileId] = *SrcMeta; + BundleSourceMeta[TileId] = *SrcMeta; return *SrcMeta; }; @@ -581,20 +612,12 @@ class LinxISATileSSABalance : public MachineFunctionPass { ")"); } - if (auto DstMetaIt = RegMetaById.find(DstId); - DstMetaIt != RegMetaById.end()) { - std::string Reason; - if (!metadataCompatible(DstMetaIt->second, CopyMeta, Reason)) { - reportTileBalanceError( - MF, *CopyMI, - Twine("tile COPY metadata mismatch across CFG edges (dst id=") + - Twine(DstId) + ", src id=" + Twine(SrcId) + "): " + - Reason); - } - CopyMeta = mergeMetadata(CopyMeta, DstMetaIt->second); - } - if (auto DstResolvedIt = BundleResolvedMeta.find(DstId); - DstResolvedIt != BundleResolvedMeta.end()) { + // Parallel COPYs describe incoming values on an edge. A physical + // destination register may have stale metadata from a previous value + // with a different dtype/layout, so only duplicate destination writes + // in this COPY bundle constrain the new destination metadata. + if (auto DstResolvedIt = BundleDstMeta.find(DstId); + DstResolvedIt != BundleDstMeta.end()) { std::string Reason; if (!metadataCompatible(DstResolvedIt->second, CopyMeta, Reason)) { reportTileBalanceError( @@ -617,7 +640,7 @@ class LinxISATileSSABalance : public MachineFunctionPass { (void)encodeTileRelRef(DstRef); Op.Meta = CopyMeta; - BundleResolvedMeta[DstId] = CopyMeta; + BundleDstMeta[DstId] = CopyMeta; Ops.push_back(Op); } diff --git a/llvm/lib/Target/LinxISA/MCTargetDesc/LinxISAInstPrinter.cpp b/llvm/lib/Target/LinxISA/MCTargetDesc/LinxISAInstPrinter.cpp index 58b96902dff2e..31b10632362a4 100644 --- a/llvm/lib/Target/LinxISA/MCTargetDesc/LinxISAInstPrinter.cpp +++ b/llvm/lib/Target/LinxISA/MCTargetDesc/LinxISAInstPrinter.cpp @@ -89,6 +89,17 @@ static void printTileRef(raw_ostream &OS, unsigned TileId) { OS << Prefix << "#" << utostr(Depth + 1u); } +static void printTileRef8(raw_ostream &OS, unsigned TileId) { + TileId &= 0xffu; + if (TileId == 0u) { + OS << "TZERO"; + } else if (TileId <= 32u) { + printTileRef(OS, TileId - 1u); + } else { + OS << "tile#" << utostr(TileId); + } +} + static StringRef dtypeName(unsigned DT) { switch (DT & 0x1f) { case 0: @@ -156,11 +167,15 @@ static StringRef parTileOpName(unsigned TileOpcode) { static StringRef tmaAliasMnemonic(unsigned Func) { switch (Func & 0x1fu) { case 0: - return "BSTART.TLOAD"; + return "BSTART.MGATHER"; case 1: - return "BSTART.TSTORE"; + return "BSTART.MSCATTER"; case 2: - return "BSTART.TMOV"; + return "BSTART.TLOAD"; + case 3: + return "BSTART.TPREFETCH"; + case 4: + return "BSTART.TSTORE"; default: return StringRef(); } @@ -169,11 +184,32 @@ static StringRef tmaAliasMnemonic(unsigned Func) { static StringRef cubeAliasMnemonic(unsigned Func) { switch (Func & 0x1fu) { case 0: + return "BSTART.TGEMV"; + case 1: return "BSTART.TMATMUL"; + default: + return StringRef(); + } +} + +static StringRef fixpAliasMnemonic(unsigned Func) { + switch (Func & 0x1fu) { + case 0: + return "BSTART.TDEQUANT"; + case 1: + return "BSTART.TEXTRACT"; case 2: - return "BSTART.TMATMUL.ACC"; - case 8: - return "BSTART.ACCCVT"; + return "BSTART.TINSERT"; + case 3: + return "BSTART.TMOV"; + case 4: + return "BSTART.TCONCAT"; + case 5: + return "BSTART.TFILLPAD"; + case 6: + return "BSTART.TQUANT"; + case 7: + return "BSTART.TTRANS"; default: return StringRef(); } @@ -201,91 +237,189 @@ static StringRef legacyPackedAliasMnemonic(unsigned TileOpcode) { static StringRef teplAliasMnemonic(unsigned TileOpcode) { switch (TileOpcode & 0x3ffu) { case 0x000: - return "BSTART.TADD"; + return "BSTART.TABS"; case 0x001: - return "BSTART.TSUB"; + return "BSTART.TADD"; case 0x002: - return "BSTART.TMUL"; + return "BSTART.TADDC"; case 0x003: - return "BSTART.TDIV"; + return "BSTART.TADDS"; case 0x004: - return "BSTART.TMAX"; + return "BSTART.TADDSC"; case 0x005: - return "BSTART.TMIN"; - case 0x006: return "BSTART.TAND"; + case 0x006: + return "BSTART.TANDS"; case 0x007: - return "BSTART.TOR"; + return "BSTART.TAXPY"; case 0x008: - return "BSTART.TXOR"; + return "BSTART.TCI"; case 0x009: - return "BSTART.TSHL"; + return "BSTART.TCMP"; case 0x00a: - return "BSTART.TSHR"; + return "BSTART.TCMPS"; case 0x00b: - return "BSTART.TRELU"; + return "BSTART.TCOLARGMAX"; case 0x00c: - return "BSTART.TPRELU"; + return "BSTART.TCOLARGMIN"; case 0x00d: - return "BSTART.TCVT"; + return "BSTART.TCOLEXPAND"; case 0x00e: - return "BSTART.TEXP"; + return "BSTART.TCOLEXPANDADD"; case 0x00f: - return "BSTART.TLOG"; + return "BSTART.TCOLEXPANDDIV"; case 0x010: - return "BSTART.TSQRT"; + return "BSTART.TCOLEXPANDEXPDIF"; case 0x011: - return "BSTART.TRSQRT"; + return "BSTART.TCOLEXPANDMAX"; case 0x012: - return "BSTART.TROWMAX"; + return "BSTART.TCOLEXPANDMIN"; case 0x013: - return "BSTART.TROWMIN"; + return "BSTART.TCOLEXPANDMUL"; case 0x014: - return "BSTART.TROWSUM"; + return "BSTART.TCOLEXPANDSUB"; case 0x015: return "BSTART.TCOLMAX"; case 0x016: return "BSTART.TCOLMIN"; case 0x017: - return "BSTART.TCOLSUM"; + return "BSTART.TCOLPROD"; case 0x018: - return "BSTART.TRECIP"; + return "BSTART.TCOLSUM"; case 0x019: - return "BSTART.TEXPANDS"; + return "BSTART.TCVT"; case 0x01a: - return "BSTART.TGATHER"; + return "BSTART.TDIV"; case 0x01b: - return "BSTART.TSCATTER"; + return "BSTART.TDIVS"; case 0x01c: - return "BSTART.TRESHAPE"; + return "BSTART.TEXP"; case 0x01d: - return "BSTART.TTRANSPOSE"; + return "BSTART.TEXPANDS"; case 0x01e: - return "BSTART.TCOLEXPAND"; + return "BSTART.TFMOD"; case 0x01f: - return "BSTART.TROWEXPAND"; + return "BSTART.TFMODS"; case 0x020: - return "BSTART.TADDS"; + return "BSTART.TGATHER"; case 0x021: - return "BSTART.TSUBS"; + return "BSTART.TGATHERB"; case 0x022: - return "BSTART.TMULS"; + return "BSTART.THISTOGRAM"; case 0x023: - return "BSTART.TDIVS"; + return "BSTART.TLOG"; case 0x024: - return "BSTART.TMAXS"; + return "BSTART.TLRELU"; case 0x025: - return "BSTART.TMINS"; + return "BSTART.TMAX"; case 0x026: - return "BSTART.TANDS"; + return "BSTART.TMAXS"; case 0x027: - return "BSTART.TORS"; + return "BSTART.TMIN"; case 0x028: - return "BSTART.TXORS"; + return "BSTART.TMINS"; case 0x029: - return "BSTART.TSHLS"; + return "BSTART.TMRGSORT"; case 0x02a: + return "BSTART.TMUL"; + case 0x02b: + return "BSTART.TMULS"; + case 0x02c: + return "BSTART.TNEG"; + case 0x02d: + return "BSTART.TNOT"; + case 0x02e: + return "BSTART.TOR"; + case 0x02f: + return "BSTART.TORS"; + case 0x030: + return "BSTART.TPARTADD"; + case 0x031: + return "BSTART.TPARTARGMAX"; + case 0x032: + return "BSTART.TPARTARGMIN"; + case 0x033: + return "BSTART.TPARTMAX"; + case 0x034: + return "BSTART.TPARTMIN"; + case 0x035: + return "BSTART.TPARTMUL"; + case 0x036: + return "BSTART.TPOW"; + case 0x037: + return "BSTART.TPRELU"; + case 0x038: + return "BSTART.TRANDOM"; + case 0x039: + return "BSTART.TRECIP"; + case 0x03a: + return "BSTART.TRELU"; + case 0x03b: + return "BSTART.TREM"; + case 0x03c: + return "BSTART.TREMS"; + case 0x03d: + return "BSTART.TROWARGMAX"; + case 0x03e: + return "BSTART.TROWARGMIN"; + case 0x03f: + return "BSTART.TROWEXPAND"; + case 0x040: + return "BSTART.TROWEXPANDADD"; + case 0x041: + return "BSTART.TROWEXPANDDIV"; + case 0x042: + return "BSTART.TROWEXPANDEXPDIF"; + case 0x043: + return "BSTART.TROWEXPANDMAX"; + case 0x044: + return "BSTART.TROWEXPANDMIN"; + case 0x045: + return "BSTART.TROWEXPANDMUL"; + case 0x046: + return "BSTART.TROWEXPANDSUB"; + case 0x047: + return "BSTART.TROWMAX"; + case 0x048: + return "BSTART.TROWMIN"; + case 0x049: + return "BSTART.TROWPROD"; + case 0x04a: + return "BSTART.TROWSUM"; + case 0x04b: + return "BSTART.TRSQRT"; + case 0x04c: + return "BSTART.TSCATTER"; + case 0x04d: + return "BSTART.TSEL"; + case 0x04e: + return "BSTART.TSELS"; + case 0x04f: + return "BSTART.TSHL"; + case 0x050: + return "BSTART.TSHLS"; + case 0x051: + return "BSTART.TSHR"; + case 0x052: return "BSTART.TSHRS"; + case 0x053: + return "BSTART.TSORT32"; + case 0x054: + return "BSTART.TSQRT"; + case 0x055: + return "BSTART.TSUB"; + case 0x056: + return "BSTART.TSUBC"; + case 0x057: + return "BSTART.TSUBS"; + case 0x058: + return "BSTART.TSUBSC"; + case 0x059: + return "BSTART.TTRI"; + case 0x05a: + return "BSTART.TXOR"; + case 0x05b: + return "BSTART.TXORS"; default: return StringRef(); } @@ -1169,9 +1303,14 @@ void LinxISAInstPrinter::printInst(const MCInst *MI, uint64_t Address, if (IsTypedFIXP) { const unsigned Function = - static_cast(findFieldImm("Function").value_or(0)) & 0x3ffu; - OS << "BSTART.FIXP\t" << utostr(Function) << ", "; - printDataType(); + static_cast(findFieldImm("Function").value_or(0)) & 0x1fu; + if (StringRef Alias = fixpAliasMnemonic(Function); !Alias.empty()) { + OS << Alias << "\t"; + printDataType(); + } else { + OS << "BSTART.FIXP\t" << utostr(Function) << ", "; + printDataType(); + } LastParTileOp = 0u; LastParTileOpValid = false; LastTileHeader = LastTileHeaderKind::None; @@ -1209,23 +1348,21 @@ void LinxISAInstPrinter::printInst(const MCInst *MI, uint64_t Address, if (Alias.empty()) Alias = teplAliasMnemonic(TileOpcode); if (!Alias.empty() && - (Alias.starts_with("BSTART.TLOAD") || Alias.starts_with("BSTART.TSTORE") || - Alias.starts_with("BSTART.TMOV"))) { + (Alias.starts_with("BSTART.MGATHER") || + Alias.starts_with("BSTART.MSCATTER") || + Alias.starts_with("BSTART.TLOAD") || + Alias.starts_with("BSTART.TPREFETCH") || + Alias.starts_with("BSTART.TSTORE"))) { LastTileHeader = LastTileHeaderKind::TMA; } else if (!Alias.empty() && - (Alias.starts_with("BSTART.TMATMUL") || - Alias.starts_with("BSTART.ACCCVT"))) { + (Alias.starts_with("BSTART.TGEMV") || + Alias.starts_with("BSTART.TMATMUL"))) { LastTileHeader = LastTileHeaderKind::CUBE; } else if (!Alias.empty() || !isLegacyCubeTileOp(TileOpcode)) { LastTileHeader = LastTileHeaderKind::TEPL; } else { LastTileHeader = LastTileHeaderKind::None; } - - LastParTileOp = ParStateOp; - LastParTileOpValid = true; - printAnnotation(OS, Annot); - return; } if (!Alias.empty()) { @@ -1453,7 +1590,7 @@ void LinxISAInstPrinter::printInst(const MCInst *MI, uint64_t Address, } // Non-tile block starts terminate any tile-header descriptor context used - // by B.ARG/B.IOT pretty-printing. + // by B.ARG/B.ITP/B.OTA pretty-printing. LastParTileOp = 0; LastParTileOpValid = false; LastTileHeader = LastTileHeaderKind::None; @@ -1655,127 +1792,54 @@ void LinxISAInstPrinter::printInst(const MCInst *MI, uint64_t Address, return; } - // Special-case: tile block IO descriptors (B.IOT / B.IOTI). - // - // These use bracket syntax in the ISA, but they are not memory operands and - // should not be routed through the generic load/store pretty printer. - if (AsmFmt.starts_with("B.IOT")) { - const bool IsIOTI = AsmFmt.starts_with("B.IOTI"); - const unsigned S0V = - static_cast(findFieldImm("S0V").value_or(0)) & 0x1u; - const unsigned S1V = - static_cast(findFieldImm("S1V").value_or(0)) & 0x1u; + // Special-case: v0.57 tile input-pair descriptor. + if (AsmFmt.starts_with("B.ITP")) { + const unsigned Src0 = + static_cast(findFieldImm("SrcTile0").value_or(0)) & 0xffu; + const unsigned Src1 = + static_cast(findFieldImm("SrcTile1").value_or(0)) & 0xffu; const unsigned S0R = static_cast(findFieldImm("S0R").value_or(0)) & 0x1u; const unsigned S1R = static_cast(findFieldImm("S1R").value_or(0)) & 0x1u; - const unsigned DstTile = - static_cast(findFieldImm("DstTile").value_or(0)) & 0x7u; - const unsigned Src0 = - static_cast(findFieldImm("SrcTile0").value_or(0)) & 0x1fu; - const unsigned Src1 = - static_cast(findFieldImm("SrcTile1").value_or(0)) & 0x1fu; - const unsigned Reg = - static_cast(findFieldImm("RegSrc").value_or(0)) & 0x1fu; - std::optional SizeOpt = findFieldImm("Size"); - if (!SizeOpt) - SizeOpt = findFieldImm("imm5"); - if (!SizeOpt) - SizeOpt = findFieldImm("uimm5"); - const unsigned Size = static_cast(SizeOpt.value_or(0)) & 0x1fu; - - OS << (IsIOTI ? "B.IOTI" : "B.IOT"); - OS << "\t["; - - const bool Src0Present = (S0V == 0u); - const bool Src1Present = (S1V == 0u); - bool First = true; - if (Src0Present) { - printTileRef(OS, Src0); - if (S0R) - OS << ".reuse"; - First = false; - } - if (Src1Present) { - if (!First) - OS << ", "; - printTileRef(OS, Src1); - if (S1R) - OS << ".reuse"; - } - + const unsigned Last = + static_cast(findFieldImm("L").value_or(0)) & 0x1u; + const unsigned SrcPair = + static_cast(findFieldImm("src_pair").value_or(0)) & 0x3u; + + OS << "B.ITP\t["; + printTileRef8(OS, Src0); + if (S0R) + OS << ".reuse"; + OS << ", "; + printTileRef8(OS, Src1); + if (S1R) + OS << ".reuse"; OS << "]"; - const bool Group1 = AsmFmt.contains("group=1"); - if (Group1) + if (Last) OS << ", last"; + OS << ", " << utostr(SrcPair); + printAnnotation(OS, Annot); + return; + } - const unsigned ActiveParOp = LastParTileOpValid ? LastParTileOp : 0u; - - // Canonical v0.4 CUBE contract: MAMULB-class blocks write an implicit - // accumulator destination. - const bool IsAccDst = (LastTileHeader == LastTileHeaderKind::CUBE) && - (ActiveParOp == 2u || ActiveParOp == 66u); - - const char *DstKind = "t"; - if (IsAccDst) { - DstKind = "acc"; - } else { - // If a tile destination is encoded, it lives in the first *absent* source - // slot (preferring SrcTile1). This matches the disassembly snippet - // contract where the arrow kind tracks the destination tile hand. - std::optional DstTileReg; - if (!Src1Present) - DstTileReg = Src1; - else if (!Src0Present) - DstTileReg = Src0; - - if (DstTileReg) { - const unsigned Tile = *DstTileReg & 0x1fu; - if (Tile < 8u) - DstKind = "t"; - else if (Tile < 16u) - DstKind = "u"; - else if (Tile < 24u) - DstKind = "m"; - else - DstKind = "n"; - } else { - // Fallback: treat DstTile as an enum in bring-up streams. - switch (DstTile) { - case 0u: - DstKind = "t"; - break; - case 1u: - DstKind = "u"; - break; - case 2u: - DstKind = "m"; - break; - case 3u: - DstKind = "n"; - break; - case 4u: - DstKind = "acc"; - break; - default: - DstKind = "t"; - break; - } - } - } - - OS << "\t->" << DstKind << "<"; - if (IsIOTI) { - const uint64_t Bytes = (Size < 60u) ? (1ull << (Size + 4u)) : 0ull; - if (Bytes >= 1024u && (Bytes % 1024u) == 0u) - OS << utostr(static_cast(Bytes / 1024u)) << "KB"; - else - OS << utostr(Size); - } else { - OS << reg5Name(Reg); - } - OS << ">"; - + // Special-case: v0.57 tile output allocation descriptor. + if (AsmFmt.starts_with("B.OTA")) { + const unsigned Dst = + static_cast(findFieldImm("DstTile").value_or(0)) & 0xffu; + const unsigned CellCountM1 = + static_cast(findFieldImm("CellCountM1").value_or(0)) & 0xffu; + const unsigned Last = + static_cast(findFieldImm("L").value_or(0)) & 0x1u; + const unsigned DstSlot = + static_cast(findFieldImm("dst_slot").value_or(0)) & 0x3u; + + OS << "B.OTA\t->"; + printTileRef8(OS, Dst); + OS << "<" << utostr(CellCountM1) << ">"; + if (Last) + OS << ", last"; + OS << ", " << utostr(DstSlot); printAnnotation(OS, Annot); return; } diff --git a/llvm/lib/Target/LinxISA/MCTargetDesc/linxisa_opcodes.c b/llvm/lib/Target/LinxISA/MCTargetDesc/linxisa_opcodes.c index 475f99dcbdfdd..797170fee13cb 100644 --- a/llvm/lib/Target/LinxISA/MCTargetDesc/linxisa_opcodes.c +++ b/llvm/lib/Target/LinxISA/MCTargetDesc/linxisa_opcodes.c @@ -3348,8 +3348,21 @@ const linxisa_field_piece linxisa_field_pieces[] = { { .insn_lsb = 20, .width = 5, .value_lsb = 0 }, { .insn_lsb = 25, .width = 2, .value_lsb = 0 }, { .insn_lsb = 27, .width = 5, .value_lsb = 0 }, + /* v0.57 PTO block descriptor extension rows. */ + { .insn_lsb = 24, .width = 8, .value_lsb = 0 }, + { .insn_lsb = 16, .width = 8, .value_lsb = 0 }, + { .insn_lsb = 15, .width = 1, .value_lsb = 0 }, + { .insn_lsb = 10, .width = 2, .value_lsb = 0 }, + { .insn_lsb = 9, .width = 1, .value_lsb = 0 }, + { .insn_lsb = 8, .width = 1, .value_lsb = 0 }, + { .insn_lsb = 24, .width = 8, .value_lsb = 0 }, + { .insn_lsb = 16, .width = 8, .value_lsb = 0 }, + { .insn_lsb = 15, .width = 1, .value_lsb = 0 }, + { .insn_lsb = 10, .width = 2, .value_lsb = 0 }, + { .insn_lsb = 15, .width = 5, .value_lsb = 0 }, + { .insn_lsb = 11, .width = 1, .value_lsb = 0 }, }; -const size_t linxisa_field_pieces_count = 3344; +const size_t linxisa_field_pieces_count = 3356; const linxisa_field linxisa_fields[] = { { .name = "RST_Type", .signed_hint = -1, .bit_width = 4, .piece_start = 0, .piece_count = 1 }, @@ -5963,8 +5976,21 @@ const linxisa_field linxisa_fields[] = { { .name = "SrcR", .signed_hint = -1, .bit_width = 5, .piece_start = 3341, .piece_count = 1 }, { .name = "SrcRType", .signed_hint = -1, .bit_width = 2, .piece_start = 3342, .piece_count = 1 }, { .name = "shamt", .signed_hint = -1, .bit_width = 5, .piece_start = 3343, .piece_count = 1 }, + /* v0.57 PTO block descriptor extension fields. */ + { .name = "SrcTile1", .signed_hint = -1, .bit_width = 8, .piece_start = 3344, .piece_count = 1 }, + { .name = "SrcTile0", .signed_hint = -1, .bit_width = 8, .piece_start = 3345, .piece_count = 1 }, + { .name = "L", .signed_hint = -1, .bit_width = 1, .piece_start = 3346, .piece_count = 1 }, + { .name = "src_pair", .signed_hint = -1, .bit_width = 2, .piece_start = 3347, .piece_count = 1 }, + { .name = "S1R", .signed_hint = -1, .bit_width = 1, .piece_start = 3348, .piece_count = 1 }, + { .name = "S0R", .signed_hint = -1, .bit_width = 1, .piece_start = 3349, .piece_count = 1 }, + { .name = "DstTile", .signed_hint = -1, .bit_width = 8, .piece_start = 3350, .piece_count = 1 }, + { .name = "CellCountM1", .signed_hint = -1, .bit_width = 8, .piece_start = 3351, .piece_count = 1 }, + { .name = "L", .signed_hint = -1, .bit_width = 1, .piece_start = 3352, .piece_count = 1 }, + { .name = "dst_slot", .signed_hint = -1, .bit_width = 2, .piece_start = 3353, .piece_count = 1 }, + { .name = "MetaGpr", .signed_hint = -1, .bit_width = 5, .piece_start = 3354, .piece_count = 1 }, + { .name = "MetaMode", .signed_hint = -1, .bit_width = 1, .piece_start = 3355, .piece_count = 1 }, }; -const size_t linxisa_fields_count = 2611; +const size_t linxisa_fields_count = 2623; const linxisa_inst_form linxisa_inst_forms[] = { { .id = "acrc_32_a9c0e33f9904", .mnemonic = "ACRC", .asm_fmt = "acrc rst_type", .length_bits = 32, .mask = 0x00000000ff0fffffULL, .match = 0x000000000000302bULL, .field_start = 0, .field_count = 1 }, @@ -5997,9 +6023,6 @@ const linxisa_inst_form linxisa_inst_forms[] = { { .id = "b_hint_32_a65821182bf3", .mnemonic = "B.HINT", .asm_fmt = "B.HINT TRACE.{begin, end}", .length_bits = 32, .mask = 0x0000000000007fffULL, .match = 0x0000000000001033ULL, .field_start = 70, .field_count = 2 }, { .id = "b_iod_32_d4d0a426dcab", .mnemonic = "B.IOD", .asm_fmt = "B.IOD DepSrc0, DepSrc1, DepSrc2, ->DepDst", .length_bits = 32, .mask = 0x000000000600707fULL, .match = 0x0000000000001013ULL, .field_start = 72, .field_count = 4 }, { .id = "b_ior_32_c3ea71404eb3", .mnemonic = "B.IOR", .asm_fmt = "B.IOR [RegSrc0, RegSrc1, RegSrc2],[RegDst]", .length_bits = 32, .mask = 0x000000000600707fULL, .match = 0x0000000000000013ULL, .field_start = 76, .field_count = 4 }, - { .id = "b_iot_32_10db6db84f5d", .mnemonic = "B.IOT", .asm_fmt = "B.IOT SrcTile0<.reuse>, , ->DstTile", .length_bits = 32, .mask = 0x00000000803f707fULL, .match = 0x0000000000005013ULL, .field_start = 80, .field_count = 5 }, - { .id = "b_iot_32_8b8bce6bffe8", .mnemonic = "B.IOT", .asm_fmt = "B.IOT SrcTile0<.reuse>, SrcTile1<.reuse>, , ->DstTile", .length_bits = 32, .mask = 0x000000000000707fULL, .match = 0x0000000000004013ULL, .field_start = 85, .field_count = 7 }, - { .id = "b_iot_32_efa0fe3fe49a", .mnemonic = "B.IOT", .asm_fmt = "B.IOT , ->DstTile", .length_bits = 32, .mask = 0x00000000c03fffffULL, .match = 0x0000000000006013ULL, .field_start = 92, .field_count = 3 }, { .id = "b_lt_32_2ca5ecd25cfb", .mnemonic = "B.LT", .asm_fmt = "b.lt SrcL, SrcR, label", .length_bits = 32, .mask = 0x000000000000707fULL, .match = 0x0000000000002027ULL, .field_start = 95, .field_count = 3 }, { .id = "b_ltu_32_f1ea7ad44e37", .mnemonic = "B.LTU", .asm_fmt = "b.ltu SrcL, SrcR, label", .length_bits = 32, .mask = 0x000000000000707fULL, .match = 0x0000000000004027ULL, .field_start = 98, .field_count = 3 }, { .id = "b_ne_32_831af6a36ff4", .mnemonic = "B.NE", .asm_fmt = "b.ne SrcL, SrcR, label", .length_bits = 32, .mask = 0x000000000000707fULL, .match = 0x0000000000001027ULL, .field_start = 101, .field_count = 3 }, @@ -6017,7 +6040,7 @@ const linxisa_inst_form linxisa_inst_forms[] = { { .id = "bstart_call_32_9404418d1ae5", .mnemonic = "BSTART CALL", .asm_fmt = "BSTART.CALL, , , -> ra", .length_bits = 32, .mask = 0x00000000f83f000fULL, .match = 0x0000000050160002ULL, .field_start = 123, .field_count = 2 }, { .id = "bstart_acccvt_32_56c3ce3838c5", .mnemonic = "BSTART.ACCCVT", .asm_fmt = "BSTART.ACCCVT DataType", .length_bits = 32, .mask = 0x0000000007ffffffULL, .match = 0x0000000000831181ULL, .field_start = 125, .field_count = 1 }, { .id = "bstart_cube_32_bd3f337acb9d", .mnemonic = "BSTART.CUBE", .asm_fmt = "BSTART.CUBE Function, DataType", .length_bits = 32, .mask = 0x00000000060fffffULL, .match = 0x0000000000031181ULL, .field_start = 126, .field_count = 2 }, - { .id = "bstart_fixp_32_3b0ae11126a6", .mnemonic = "BSTART.FIXP", .asm_fmt = "BSTART.FIXP TileOp, DataType", .length_bits = 32, .mask = 0x00000000060fffffULL, .match = 0x0000000000039181ULL, .field_start = 128, .field_count = 2 }, + { .id = "bstart_fixp_32_3b0ae11126a6", .mnemonic = "BSTART.FIXP", .asm_fmt = "BSTART.FIXP Function, DataType", .length_bits = 32, .mask = 0x00000000060fffffULL, .match = 0x0000000000039181ULL, .field_start = 128, .field_count = 2 }, { .id = "bstart_fp_32_2fbcd8fd8e97", .mnemonic = "BSTART.FP", .asm_fmt = "BSTART.FP RET", .length_bits = 32, .mask = 0x0000000000007fffULL, .match = 0x0000000000007101ULL, .field_start = 130, .field_count = 1 }, { .id = "bstart_fp_32_49b15de09969", .mnemonic = "BSTART.FP", .asm_fmt = "BSTART.FP IND", .length_bits = 32, .mask = 0x0000000000007fffULL, .match = 0x0000000000005101ULL, .field_start = 131, .field_count = 1 }, { .id = "bstart_fp_32_58ad7954fb49", .mnemonic = "BSTART.FP", .asm_fmt = "BSTART.FP COND,