Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@

namespace mlir::tt::ttkernel {

/// Populate cleanup patterns for TTKernel ops.
void populateTTKernelCleanupPatterns(RewritePatternSet &patterns);
/// Populate cleanup patterns for TTKernel ops. These patterns optimize
/// TTKernel code by removing redundant operations (e.g., deduplicating
/// consecutive barriers of the same type).
/// When useTridBarriers is true, also adds TRID-barrier deduplication patterns.
void populateTTKernelCleanupPatterns(RewritePatternSet &patterns,
bool useTridBarriers = false);

} // namespace mlir::tt::ttkernel

Expand Down
19 changes: 16 additions & 3 deletions include/ttlang/Dialect/TTL/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -119,16 +119,29 @@ def TTLInsertCopyWait

def TTLConvertTTLToTTKernel
: Pass<"convert-ttl-to-ttkernel", "::mlir::ModuleOp"> {
let summary = "Lower TTL DMA ops to TTKernel using global barriers (temporary)";
let summary = "Lower TTL DMA ops to TTKernel noc ops";
let description = [{
Converts TTL DMA ops to TTKernel noc ops. Uses global barriers until TRID
barriers are available. Covers bind_cb, copy, wait MVP path.
Converts TTL DMA ops to TTKernel noc ops. Covers bind_cb, copy, wait MVP
path.

Two lowering modes are supported:
- Default: global barriers (noc_async_{read,write}_barrier).
- Optional: TRID-aware barriers (noc_async_*_set_trid +
noc_async_*_barrier_with_trid).

TODO(ttl): Profile both modes on representative benchmarks and consider
changing the default.

TODO(ttl): Refine lowering to emit real CB handles and proper NOC addresses.
Issue: #77 (umbrella issue with subtasks #78-#89).
}];

let options = [
Option<"useTridBarriers", "use-trid-barriers", "bool", "false",
"Use TRID-aware DMA waits (barrier_with_trid) instead of global barriers. "
"TRID must be unique per outstanding copy; ordering of TRID values is not "
"semantically significant. Generated TRIDs may be nondeterministic when "
"patterns are applied in parallel.">,
Option<"reduceFullFp32", "reduce-full-fp32", "bool", "true",
"Enable FP32 accumulation for reduce operations.">
];
Comment on lines 138 to +147
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for adding the option! Not asking you to do this in the PR but it would be interesting to profile the different approaches with a small set of representative benchmarks and set the default based on that (perhaps add a short TODO to that effect here if you agree?).

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Can't perform running, no device available
  • Added TODO in pass description: “Profile both modes on representative benchmarks and consider changing the default.”

Expand Down
4 changes: 4 additions & 0 deletions include/ttlang/Dialect/TTL/Pipelines/TTLPipelines.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ struct TTLToTTKernelPipelineOptions
"computations. When disabled, emit an error if any "
"operation requires a compiler-allocated DFB."),
llvm::cl::init(true)};
Option<bool> useTridBarriers{
*this, "use-trid-barriers",
llvm::cl::desc("Use TRID-aware DMA waits (barrier_with_trid)."),
llvm::cl::init(false)};
};

void createTTLToTTKernelPipeline(mlir::OpPassManager &pm,
Expand Down
47 changes: 46 additions & 1 deletion lib/Dialect/TTKernel/Transforms/TTKernelCleanupPatterns.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "mlir/IR/PatternMatch.h"
#include "mlir/Support/LogicalResult.h"
#include "ttmlir/Dialect/TTKernel/IR/TTKernelOps.h"
#include "llvm/ADT/STLExtras.h"

namespace mlir::tt::ttkernel {

Expand All @@ -31,13 +32,57 @@ struct DeduplicateConsecutiveBarriers : OpRewritePattern<BarrierOp> {
}
};

/// Deduplicate consecutive TRID barriers of the same type *only* when they
/// target the same TRID (and optional NOC). Unlike global barriers, barriers
/// with different TRIDs are not redundant and must not be removed.
template <typename BarrierWithTridOp>
struct DeduplicateConsecutiveTridBarriers
: OpRewritePattern<BarrierWithTridOp> {
using OpRewritePattern<BarrierWithTridOp>::OpRewritePattern;

LogicalResult matchAndRewrite(BarrierWithTridOp op,
PatternRewriter &rewriter) const override {
auto *prev = op->getPrevNode();
if (!prev) {
return failure();
}
auto prevBarrier = dyn_cast<BarrierWithTridOp>(prev);
if (!prevBarrier) {
return failure();
}

if (op->getNumOperands() != prevBarrier->getNumOperands()) {
return failure();
}

for (auto [a, b] :
llvm::zip_equal(op->getOperands(), prevBarrier->getOperands())) {
if (a != b) {
return failure();
}
}

rewriter.eraseOp(op);
return success();
}
};

} // namespace

void populateTTKernelCleanupPatterns(RewritePatternSet &patterns) {
void populateTTKernelCleanupPatterns(RewritePatternSet &patterns,
bool useTridBarriers) {
patterns.add<DeduplicateConsecutiveBarriers<NocAsyncReadBarrierOp>>(
patterns.getContext());
patterns.add<DeduplicateConsecutiveBarriers<NocAsyncWriteBarrierOp>>(
patterns.getContext());
if (useTridBarriers) {
patterns
.add<DeduplicateConsecutiveTridBarriers<NocAsyncReadBarrierWithTridOp>>(
patterns.getContext());
patterns.add<
DeduplicateConsecutiveTridBarriers<NocAsyncWriteBarrierWithTridOp>>(
patterns.getContext());
}
}

} // namespace mlir::tt::ttkernel
1 change: 1 addition & 0 deletions lib/Dialect/TTL/Pipelines/TTLPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ void createTTLToTTKernelPipeline(OpPassManager &pm,
pm.addPass(createTTLErasePipeNetScopes());
{
TTLConvertTTLToTTKernelOptions ttkOpts;
ttkOpts.useTridBarriers = options.useTridBarriers;
ttkOpts.reduceFullFp32 = options.reduceFullFp32;
pm.addPass(createTTLConvertTTLToTTKernel(ttkOpts));
}
Expand Down
Loading
Loading