-
Notifications
You must be signed in to change notification settings - Fork 68
[IR] Verify explicit local tile address alignment #875
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2811,12 +2811,87 @@ void mlir::pto::annotatePTOEntryFunctions(ModuleOp module) { | |
| return success(); | ||
| } | ||
|
|
||
| static std::optional<uint64_t> | ||
| getLocalAddressAlignmentBytes(Attribute memorySpace) { | ||
| auto addrSpace = dyn_cast_or_null<AddressSpaceAttr>(memorySpace); | ||
| if (!addrSpace) | ||
| return std::nullopt; | ||
|
|
||
| switch (addrSpace.getAddressSpace()) { | ||
| case AddressSpace::VEC: | ||
| case AddressSpace::MAT: | ||
| case AddressSpace::BIAS: | ||
| case AddressSpace::SCALING: | ||
| return 32; | ||
| case AddressSpace::LEFT: | ||
| case AddressSpace::RIGHT: | ||
| case AddressSpace::ACC: | ||
| return 512; | ||
| case AddressSpace::GM: | ||
| case AddressSpace::Zero: | ||
| return std::nullopt; | ||
| } | ||
| return std::nullopt; | ||
| } | ||
|
|
||
| static std::optional<int64_t> getConstantIntValue(Value value) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor: this duplicates |
||
| if (!value) | ||
| return std::nullopt; | ||
|
|
||
| auto constOp = value.getDefiningOp<arith::ConstantOp>(); | ||
| if (!constOp) | ||
| return std::nullopt; | ||
|
|
||
| auto intAttr = dyn_cast<IntegerAttr>(constOp.getValue()); | ||
| if (!intAttr) | ||
| return std::nullopt; | ||
|
|
||
| return intAttr.getValue().getSExtValue(); | ||
| } | ||
|
|
||
| static LogicalResult verifyConstantLocalAddress(Operation *op, Value addr, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P1 (blocker): the new alignment contract is bypassable on the primary user path.
The tile operand is a TileBufType carrying a memory space, so the fix is one call in TAssignOp::verify(): (This matches the Codex bot's P2.) |
||
| Attribute memorySpace, | ||
| int addrIndex = -1) { | ||
| std::optional<uint64_t> alignment = | ||
| getLocalAddressAlignmentBytes(memorySpace); | ||
| if (!alignment || *alignment == 0) | ||
| return success(); | ||
|
|
||
| std::optional<int64_t> constantAddr = getConstantIntValue(addr); | ||
| if (!constantAddr) | ||
| return success(); | ||
|
|
||
| auto emitAddrError = [&]() { | ||
| InFlightDiagnostic diag = op->emitOpError(); | ||
| if (addrIndex >= 0) | ||
| diag << "addr[" << addrIndex << "]"; | ||
| else | ||
| diag << "addr"; | ||
| return diag; | ||
| }; | ||
|
|
||
| if (*constantAddr < 0) | ||
| return emitAddrError() << " must be non-negative, got " << *constantAddr; | ||
|
|
||
| uint64_t unsignedAddr = static_cast<uint64_t>(*constantAddr); | ||
| if ((unsignedAddr % *alignment) != 0) | ||
| return emitAddrError() | ||
| << " must be aligned to " << *alignment | ||
| << " bytes for local tile memory space, got " << unsignedAddr; | ||
|
|
||
| return success(); | ||
| } | ||
|
Comment on lines
+2852
to
+2883
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current implementation of To fix this and ensure GM/default spaces are completely ignored (as well as improving efficiency by avoiding constant value resolution for non-local spaces), we should retrieve and check the alignment first. If the space is not a local space (i.e., no alignment is returned), we can return static LogicalResult verifyConstantLocalAddress(Operation *op, Value addr,
Attribute memorySpace,
int addrIndex = -1) {
std::optional<uint64_t> alignment =
getLocalAddressAlignmentBytes(memorySpace);
if (!alignment || *alignment == 0)
return success();
std::optional<int64_t> constantAddr = getConstantIntValue(addr);
if (!constantAddr)
return success();
auto emitAddrError = [&]() {
InFlightDiagnostic diag = op->emitOpError();
if (addrIndex >= 0)
diag << "addr[" << addrIndex << "]";
else
diag << "addr";
return diag;
};
if (*constantAddr < 0)
return emitAddrError() << " must be non-negative, got " << *constantAddr;
uint64_t unsignedAddr = static_cast<uint64_t>(*constantAddr);
if ((unsignedAddr % *alignment) != 0)
return emitAddrError()
<< " must be aligned to " << *alignment
<< " bytes for local tile memory space, got " << unsignedAddr;
return success();
}
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Handled in the latest push. |
||
|
|
||
| LogicalResult AllocTileOp::verify() { | ||
| auto ty = getResult().getType(); // TileBufType | ||
|
|
||
| if (failed(verifyTileBufLayoutConstraints(*this, ty, "result"))) | ||
| return failure(); | ||
|
|
||
| if (failed(verifyConstantLocalAddress(getOperation(), getAddr(), | ||
| ty.getMemorySpace()))) | ||
| return failure(); | ||
|
|
||
| // op 上有没有传 operands | ||
| bool hasVR = getValidRow() != nullptr; | ||
| bool hasVC = getValidCol() != nullptr; | ||
|
|
@@ -2848,6 +2923,21 @@ LogicalResult AllocTileOp::verify() { | |
| return success(); | ||
| } | ||
|
|
||
| LogicalResult PointerCastOp::verify() { | ||
| auto memRefTy = dyn_cast<BaseMemRefType>(getResult().getType()); | ||
| if (!memRefTy) | ||
| return emitOpError("result must be a memref type"); | ||
|
|
||
| for (auto [idx, addr] : llvm::enumerate(getAddrs())) { | ||
| if (failed(verifyConstantLocalAddress(getOperation(), addr, | ||
| memRefTy.getMemorySpace(), | ||
| static_cast<int>(idx)))) | ||
| return failure(); | ||
| } | ||
|
|
||
| return success(); | ||
| } | ||
|
|
||
| LogicalResult MaterializeTileOp::verify() { | ||
| auto sourceTy = cast<MemRefType>(getSource().getType()); | ||
| auto resultTy = cast<TileBufType>(getResult().getType()); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| // RUN: not ptoas --pto-level=level3 %s 2>&1 | FileCheck %s | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2 (blocker for the stated deliverable): these regression tests never run in CI. CI runs Please move them under Minor related note: ptoas exits 0 on this verify failure (it prints the error + |
||
|
|
||
| module { | ||
| func.func @unaligned_alloc_tile_addr() { | ||
| %bad = arith.constant 70660 : i64 | ||
| // CHECK: pto.alloc_tile | ||
| // CHECK: addr must be aligned to 32 bytes for local tile memory space, got 70660 | ||
| %tile = pto.alloc_tile addr = %bad : !pto.tile_buf<loc=vec, dtype=f16, rows=32, cols=64, v_row=32, v_col=64, blayout=row_major, slayout=none_box, fractal=512, pad=0> | ||
| return | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| // RUN: ptoas --pto-level=level3 %s >/dev/null | ||
|
|
||
| module { | ||
| func.func @aligned_alloc_tile_addr() { | ||
| %ok = arith.constant 71232 : i64 | ||
| %tile = pto.alloc_tile addr = %ok : !pto.tile_buf<loc=vec, dtype=f16, rows=32, cols=64, v_row=32, v_col=64, blayout=row_major, slayout=none_box, fractal=512, pad=0> | ||
| return | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| // RUN: ptoas --pto-level=level3 %s >/dev/null | ||
|
|
||
| module { | ||
| func.func @gm_pointer_cast_addr_is_ignored() { | ||
| %neg = arith.constant -1 : i64 | ||
| %buf = pto.pointer_cast(%neg) : memref<32x64xf16, #pto.address_space<gm>> | ||
| return | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| // RUN: not ptoas --pto-level=level3 %s 2>&1 | FileCheck %s | ||
|
|
||
| module { | ||
| func.func @unaligned_pointer_cast_addr() { | ||
| %bad = arith.constant 70660 : i64 | ||
| // CHECK: pto.pointer_cast | ||
| // CHECK: addr[0] must be aligned to 32 bytes for local tile memory space, got 70660 | ||
| %buf = pto.pointer_cast(%bad) : memref<32x64xf16, #pto.address_space<vec>> | ||
| return | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P3: these 32 / 512 byte values are unsourced and inconsistent with the tile type's own
fractalfield.TASSIGN_IMPL(include/pto/npu/a2a3/TAssign.hpp) has NO address-alignment static_assert, so the PR description's rationale ("PTO-ISA's TASSIGN static checks are not triggered") does not match the headers - there is no such check here to mirror.fractal=1024yet this requires only 512, and MAT (L1) addresses are 512-aligned in every existing test/sample yet this requires only 32 - so an unaligned MAT/ACC base that would hang still passes this verifier (under-checking).Please cite the HW source for these numbers (and note they are applied arch-agnostically to a2/a3/a5), or derive the requirement from the arch + fractal rather than hardcoding.