Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/Xilinx/mlir-air into channe…
Browse files Browse the repository at this point in the history
…l-OF-L2
  • Loading branch information
abisca committed Jan 25, 2024
2 parents af39145 + 690ba0e commit 358ecf8
Show file tree
Hide file tree
Showing 17 changed files with 693 additions and 70 deletions.
3 changes: 2 additions & 1 deletion mlir/include/air/Conversion/AIRToAIESchedulingUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ std::vector<unsigned> convertToStdVec(SmallVector<int64_t, 6> vec);

bool areIdenticalVectors(std::vector<unsigned> &a, std::vector<unsigned> &b);

int64_t get1DOffset(SmallVector<Value> memcpy_offsets, Value memref);
int64_t get1DOffset(SmallVector<Value> memcpy_offsets,
SmallVector<Value> memcpy_strides, int byte_count_per_elem);

std::vector<AIE::BDDimLayoutAttr>
getWrapsAndStrides(SmallVector<Value> memcpy_sizes,
Expand Down
2 changes: 1 addition & 1 deletion mlir/include/air/Transform/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -1113,7 +1113,7 @@ def AIRRenumberDmaIdPass : Pass<"air-renumber-dma", "func::FuncOp"> {
let summary = "Renumber air dma op ids";
let constructor = "xilinx::air::createAIRRenumberDmaIdPass()";
let options = [
Option<"clMode", "mode", "std::string", /*default=*/"\"herd\"",
Option<"clMode", "mode", "std::string", /*default=*/"\"global\"",
"In which hierarchy level to renumber the dma ops">,
];
}
Expand Down
9 changes: 4 additions & 5 deletions mlir/lib/Conversion/AIRLoweringPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -463,8 +463,6 @@ AIRChannelInterfaceToAIRRtConversionImpl(OpBuilder builder,
auto ctx = thisOp->getContext();

MemRefType thisMemrefType = thisOp.getMemref().getType().cast<MemRefType>();
MemRefType theOtherMemrefType =
theOtherOp.getMemref().getType().cast<MemRefType>();

bool thisOpIsInShim =
thisMemrefType.getMemorySpaceAsInt() == (int)xilinx::air::MemorySpace::L3;
Expand Down Expand Up @@ -517,19 +515,20 @@ AIRChannelInterfaceToAIRRtConversionImpl(OpBuilder builder,
SmallVector<Value, 4> lengths(4, one);
SmallVector<Value, 3> strides(3, zero);

int idx = 4 - thisMemrefType.getRank();
int idx = 4 - thisOp.getOffsets().size();
for (auto o : thisOp.getOffsets()) {
offsets[idx++] =
builder.create<arith::IndexCastOp>(loc, IntegerType::get(ctx, 64), o);
}

idx = 4 - theOtherMemrefType.getRank();
idx = 4 - thisOp.getStrides().size();
auto op_strides = thisOp.getStrides();
if (op_strides.size())
for (auto o : op_strides.drop_back())
strides[idx++] = builder.create<arith::IndexCastOp>(
loc, IntegerType::get(ctx, 64), o);
idx = 4 - thisMemrefType.getRank();
idx = 4 - std::max(thisOp.getSizes().size(),
(unsigned long)thisMemrefType.getRank());
// If sizes field is empty, then infer sizes from memref shape
if (thisOp.getSizes().empty())
for (auto d : air::getTensorShape(thisMemrefType))
Expand Down
27 changes: 19 additions & 8 deletions mlir/lib/Conversion/AIRRtToIpuPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,14 +407,25 @@ specializeAffineForInAIRRtDmaWrapAndStride(OpBuilder builder,
assert(wraps.size() == 4);
assert(strides.size() == 3);

// Temporary hack: stride currently cannot implement repeat with stride = 0.
// This is to be removed when that constraint is gone.
for (unsigned i = 0; i < strides.size() - 1; i++) {
if (mlir::getConstantIntValue(strides[i]) &&
*mlir::getConstantIntValue(strides[i])) {
for (unsigned j = i + 1; j < strides.size(); j++) {
if (mlir::getConstantIntValue(strides[j]) &&
!*mlir::getConstantIntValue(strides[j]))
// Stride = 0 means repeat that dimension. If highest dimension (dim 0) is not
// used, then move the repeat dimension to dim 0, which is the only dim with
// repeat capability. Else, NYI. Fall back to unrolling BDs.
for (unsigned i = 1; i < strides.size(); i++) {
if (mlir::getConstantIntValue(wraps[i]) &&
mlir::getConstantIntValue(strides[i])) {
if (*mlir::getConstantIntValue(wraps[i]) > 1 &&
!*mlir::getConstantIntValue(strides[i])) {
// This is a repeat dimension.
if (mlir::getConstantIntValue(wraps[0]) &&
*mlir::getConstantIntValue(wraps[0]) == 1) {
// Move the repeat dimension i to dimension 0.
auto tmp = wraps[0];
wraps[0] = wraps[i];
wraps[i] = tmp;
tmp = strides[0];
strides[0] = strides[i];
strides[i] = tmp;
} else
return failure();
}
}
Expand Down
47 changes: 20 additions & 27 deletions mlir/lib/Conversion/AIRToAIEPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,8 @@ void outlineAIECores(OpBuilder &builder, AIE::DeviceOp aie_device,

if (options.emit_herd_lock)
core_builder.create<AIE::UseLockOp>(core_builder.getUnknownLoc(),
herd_lock, 0,
AIE::LockAction::Acquire);
herd_lock, AIE::LockAction::Acquire,
0);

Region &r = h.getRegion();
r.cloneInto(&core.getBody(), remap);
Expand All @@ -318,8 +318,8 @@ void outlineAIECores(OpBuilder &builder, AIE::DeviceOp aie_device,
core_builder.setInsertionPoint(launch_bb->getTerminator());
if (options.emit_herd_lock)
core_builder.create<AIE::UseLockOp>(core_builder.getUnknownLoc(),
herd_lock, 0,
AIE::LockAction::Release);
herd_lock, AIE::LockAction::Release,
0);

if (options.emit_while)
core_builder.create<cf::BranchOp>(hloc, core_bb);
Expand Down Expand Up @@ -1972,19 +1972,9 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase<AIRToAIEPass> {
MemRefType memref_ty,
StringAttr dma_name_attr) {
for (auto the_other_chan_o : getTheOtherChannelOpThroughSymbol(chan_o)) {
bool areEqualVecs = true;
if (getTensorShape(memref_ty).size() !=
the_other_chan_o.getSizes().size())
areEqualVecs = false;
else
for (unsigned i = 0; i < getTensorShape(memref_ty).size(); i++)
if (getTensorShape(memref_ty)[i] !=
mlir::getConstantIntValue(the_other_chan_o.getSizes()[i]))
areEqualVecs = false;
if (areEqualVecs)
the_other_chan_o->setAttr(
"metadata", FlatSymbolRefAttr::get(the_other_chan_o->getContext(),
dma_name_attr));
the_other_chan_o->setAttr(
"metadata", FlatSymbolRefAttr::get(the_other_chan_o->getContext(),
dma_name_attr));
}
}

Expand Down Expand Up @@ -2238,17 +2228,18 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase<AIRToAIEPass> {
else
builder.setInsertionPoint(memcpyOpIf);

builder.create<AIE::UseLockOp>(memcpyOpIf->getLoc(), acqLockOp, lockAqValue,
builder.create<AIE::UseLockOp>(memcpyOpIf->getLoc(), acqLockOp,
isAIE2 ? AIE::LockAction::AcquireGreaterEqual
: AIE::LockAction::Acquire);
: AIE::LockAction::Acquire,
lockAqValue);
// try to find a place to put the unlock. If there are deallocs,
// replace them with unlock. Otherwise, put them at the end.
bool need_unlock = true;
for (auto u : alloc.getUsers()) {
if (auto dealloc = dyn_cast<memref::DeallocOp>(u)) {
builder.setInsertionPoint(dealloc);
builder.create<AIE::UseLockOp>(dealloc->getLoc(), relLockOp,
lockRelValue, AIE::LockAction::Release);
AIE::LockAction::Release, lockRelValue);
// assume that the deallocs will take care of it when
// deallocs are present
need_unlock = false;
Expand All @@ -2257,8 +2248,8 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase<AIRToAIEPass> {
if (need_unlock) {
auto t = memcpyOpIf->getBlock()->getTerminator();
builder.setInsertionPoint(t);
builder.create<AIE::UseLockOp>(t->getLoc(), relLockOp, lockRelValue,
AIE::LockAction::Release);
builder.create<AIE::UseLockOp>(t->getLoc(), relLockOp,
AIE::LockAction::Release, lockRelValue);
}
allocs_to_remap.insert(alloc.getDefiningOp());
}
Expand Down Expand Up @@ -2360,13 +2351,15 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase<AIRToAIEPass> {
: ndcpy.getSrcStrides();

int64_t len = getMemcpySizesAsInt(memref, sizes);
int64_t offset = get1DOffset(offsets, memref);
int64_t offset =
get1DOffset(offsets, strides, getElementSizeInBytes(memref.getType()));

Value length =
b.create<arith::ConstantIndexOp>(memcpyOp.getLoc(), len)->getResult(0);
b.create<AIE::UseLockOp>(loc, acqLockOp, lockAqValue,
b.create<AIE::UseLockOp>(loc, acqLockOp,
isAIE2 ? AIE::LockAction::AcquireGreaterEqual
: AIE::LockAction::Acquire);
: AIE::LockAction::Acquire,
lockAqValue);

std::vector<AIE::BDDimLayoutAttr> dims =
getWrapsAndStrides(sizes, strides, ndcpy->getContext());
Expand All @@ -2384,8 +2377,8 @@ class AIRToAIEPass : public air::impl::AIRToAIEBase<AIRToAIEPass> {
loc, bufferOp, offset,
cast<arith::ConstantIndexOp>(length.getDefiningOp()).value(),
wraps_and_strides);
b.create<AIE::UseLockOp>(loc, relLockOp, lockRelValue,
AIE::LockAction::Release);
b.create<AIE::UseLockOp>(loc, relLockOp, AIE::LockAction::Release,
lockRelValue);
}

AIE::ShimDMAOp getShimDMAOp(AIE::TileOp tile) {
Expand Down
15 changes: 10 additions & 5 deletions mlir/lib/Conversion/AIRToAIESchedulingUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,11 @@ bool air::areIdenticalVectors(std::vector<unsigned> &a,
return a == b;
}

int64_t air::get1DOffset(SmallVector<Value> memcpy_offsets, Value memref) {
int64_t air::get1DOffset(SmallVector<Value> memcpy_offsets,
SmallVector<Value> memcpy_strides,
int byte_count_per_elem) {
if (memcpy_offsets.empty())
return 0;
SmallVector<int> memref_shape = getTensorShape(memref.getType());

int64_t one_d_offset = 0;
for (int i = memcpy_offsets.size() - 1; i >= 0; i--) {
Expand All @@ -166,10 +167,14 @@ int64_t air::get1DOffset(SmallVector<Value> memcpy_offsets, Value memref) {
assert(false && "non-static offset in memcpy op");
if (i == memcpy_offsets.size() - 1)
one_d_offset += *offset;
else
one_d_offset += *offset * memref_shape[i + 1];
else {
if (auto stride_i = mlir::getConstantIntValue(memcpy_strides[i])) {
one_d_offset += (*offset) * (*stride_i);
} else
assert(false && "non-static size in memcpy op");
}
}
return one_d_offset * getElementSizeInBytes(memref.getType());
return one_d_offset * byte_count_per_elem;
}

std::vector<AIE::BDDimLayoutAttr>
Expand Down
Loading

0 comments on commit 358ecf8

Please sign in to comment.