Implement simple loop-invariant-code-motion based on dialect interfaces.

PiperOrigin-RevId: 275004258
This commit is contained in:
Stephan Herhut
2019-10-16 04:28:13 -07:00
committed by A. Unique TensorFlower
parent 98f64b4da1
commit b843cc5d5a
15 changed files with 1168 additions and 514 deletions

View File

@@ -28,6 +28,7 @@
#include "mlir/IR/Dialect.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/StandardTypes.h"
#include "mlir/Transforms/LoopLikeInterface.h"
namespace mlir {
class AffineBound;

View File

@@ -28,6 +28,11 @@
include "mlir/IR/OpBase.td"
#endif // OP_BASE
#ifdef MLIR_LOOPLIKEINTERFACE
#else
include "mlir/Transforms/LoopLikeInterface.td"
#endif
include "mlir/Dialect/AffineOps/AffineOpsBase.td"
def Affine_Dialect : Dialect {
@@ -53,7 +58,9 @@ class Affine_Op<string mnemonic, list<OpTrait> traits = []> :
def ImplicitAffineTerminator
: SingleBlockImplicitTerminator<"AffineTerminatorOp">;
def AffineForOp : Affine_Op<"for", [ImplicitAffineTerminator]> {
def AffineForOp : Affine_Op<"for",
[ImplicitAffineTerminator,
DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
let summary = "for operation";
let description = [{
The "affine.for" operation represents an affine loop nest, defining an SSA

View File

@@ -26,6 +26,7 @@
#include "mlir/IR/Builders.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/Transforms/LoopLikeInterface.h"
namespace mlir {
namespace loop {

View File

@@ -28,6 +28,11 @@
include "mlir/IR/OpBase.td"
#endif // OP_BASE
#ifdef MLIR_LOOPLIKEINTERFACE
#else
include "mlir/Transforms/LoopLikeInterface.td"
#endif
def Loop_Dialect : Dialect {
let name = "loop";
let cppNamespace = "";
@@ -48,7 +53,8 @@ class Loop_Op<string mnemonic, list<OpTrait> traits = []> :
}
def ForOp : Loop_Op<"for",
[SingleBlockImplicitTerminator<"TerminatorOp">]> {
[DeclareOpInterfaceMethods<LoopLikeOpInterface>,
SingleBlockImplicitTerminator<"TerminatorOp">]> {
let summary = "for operation";
let description = [{
The "loop.for" operation represents a loop nest taking 3 SSA value as

View File

@@ -249,6 +249,15 @@ public:
return op_filter_iterator<OpT>(end(), end());
}
/// Return an iterator range over the operation within this block excluding
/// the terminator operation at the end.
llvm::iterator_range<iterator> without_terminator() {
if (begin() == end())
return {begin(), end()};
auto endIt = --end();
return {begin(), endIt};
}
//===--------------------------------------------------------------------===//
// Terminator management
//===--------------------------------------------------------------------===//

View File

@@ -0,0 +1,35 @@
//===- LoopLikeInterface.h - Loop-like operations interface ---------------===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file implements the operation interface for loop like operations.
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_TRANSFORMS_LOOPLIKEINTERFACE_H_
#define MLIR_TRANSFORMS_LOOPLIKEINTERFACE_H_
#include "mlir/IR/OpDefinition.h"
#include "mlir/Support/LogicalResult.h"
#include "llvm/ADT/ArrayRef.h"
namespace mlir {
#include "mlir/Transforms/LoopLikeInterface.h.inc"
} // namespace mlir
#endif // MLIR_TRANSFORMS_LOOPLIKEINTERFACE_H_

View File

@@ -0,0 +1,62 @@
//===- LoopLikeInterface.td - LoopLike interface -----------*- tablegen -*-===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// Defines the interface for loop-like operations as used by LICM.
//
//===----------------------------------------------------------------------===//
#ifdef MLIR_LOOPLIKEINTERFACE
#else
#define MLIR_LOOPLIKEINTERFACE
#ifdef OP_BASE
#else
include "mlir/IR/OpBase.td"
#endif // OP_BASE
def LoopLikeOpInterface : OpInterface<"LoopLikeOpInterface"> {
let description = [{
Encodes properties of a loop. Operations that implement this interface will
be considered by loop-invariant code motion.
}];
let methods = [
InterfaceMethod<[{
Returns true if the given value is defined outside of the loop.
A sensible implementation could be to check whether the value's defining
operation lies outside of the loops body region. If the loop uses
explicit capture of dependencies, an implementation could check whether
the value corresponds to a captured dependency.
}],
"bool", "isDefinedOutsideOfLoop", (ins "Value *":$value)
>,
InterfaceMethod<[{
Returns the region that makes up the body of the loop and should be
inspected for loop-invariant operations.
}],
"Region &", "getLoopBody"
>,
InterfaceMethod<[{
Moves the given vector of operations out of the loop. The vector is
sorted topologically.
}],
"LogicalResult", "moveOutOfLoop", (ins "ArrayRef<Operation *>":$ops)
>,
];
}
#endif // MLIR_LOOPLIKEINTERFACE

View File

@@ -32,6 +32,7 @@ namespace mlir {
class AffineForOp;
class FuncOp;
class ModuleOp;
class Pass;
template <typename T> class OpPassBase;
/// Creates a constant folding pass. Note that this pass solely provides simple
@@ -90,7 +91,11 @@ createLoopFusionPass(unsigned fastMemorySpace = 0,
/// Creates a loop invariant code motion pass that hoists loop invariant
/// instructions out of the loop.
std::unique_ptr<OpPassBase<FuncOp>> createLoopInvariantCodeMotionPass();
std::unique_ptr<Pass> createLoopInvariantCodeMotionPass();
/// Creates a loop invariant code motion pass that hoists loop invariant
/// instructions out of affine loop.
std::unique_ptr<OpPassBase<FuncOp>> createAffineLoopInvariantCodeMotionPass();
/// Creates a pass to pipeline explicit movement of data across levels of the
/// memory hierarchy.

View File

@@ -0,0 +1,73 @@
//===- SideEffectsInterface.h - dialect interface modeling side effects ---===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file specifies a dialect interface to model side-effects.
//
//===----------------------------------------------------------------------===//
#ifndef MLIR_TRANSFORMS_SIDEEFFECTSINTERFACE_H_
#define MLIR_TRANSFORMS_SIDEEFFECTSINTERFACE_H_
#include "mlir/IR/DialectInterface.h"
#include "mlir/IR/Operation.h"
namespace mlir {
/// Specifies an interface for basic side-effect modelling that is used by the
/// loop-invariant code motion pass.
///
/// TODO: This interface should be replaced by a more general solution.
class SideEffectsDialectInterface
: public DialectInterface::Base<SideEffectsDialectInterface> {
public:
SideEffectsDialectInterface(Dialect *dialect) : Base(dialect) {}
enum SideEffecting {
Never, /* the operation has no side-effects */
Recursive, /* the operation has side-effects if a contained operation has */
Always /* the operation has side-effects */
};
/// Checks whether the given operation has side-effects.
virtual SideEffecting isSideEffecting(Operation *op) const {
if (op->hasNoSideEffect())
return Never;
return Always;
};
};
class SideEffectsInterface
: public DialectInterfaceCollection<SideEffectsDialectInterface> {
public:
using SideEffecting = SideEffectsDialectInterface::SideEffecting;
explicit SideEffectsInterface(MLIRContext *ctx)
: DialectInterfaceCollection<SideEffectsDialectInterface>(ctx) {}
SideEffecting isSideEffecting(Operation *op) const {
// First check generic trait.
if (op->hasNoSideEffect())
return SideEffecting::Never;
if (auto handler = getInterfaceFor(op))
return handler->isSideEffecting(op);
return SideEffecting::Always;
}
};
} // namespace mlir
#endif // MLIR_TRANSFORMS_SIDEEFFECTSINTERFACE_H_

View File

@@ -23,9 +23,11 @@
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Transforms/InliningUtils.h"
#include "mlir/Transforms/SideEffectsInterface.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Support/Debug.h"
using namespace mlir;
using llvm::dbgs;
@@ -68,6 +70,19 @@ struct AffineInlinerInterface : public DialectInlinerInterface {
/// Affine regions should be analyzed recursively.
bool shouldAnalyzeRecursively(Operation *op) const final { return true; }
};
// TODO(mlir): Extend for other ops in this dialect.
struct AffineSideEffectsInterface : public SideEffectsDialectInterface {
using SideEffectsDialectInterface::SideEffectsDialectInterface;
SideEffecting isSideEffecting(Operation *op) const override {
if (isa<AffineIfOp>(op)) {
return Recursive;
}
return SideEffectsDialectInterface::isSideEffecting(op);
};
};
} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -81,7 +96,7 @@ AffineOpsDialect::AffineOpsDialect(MLIRContext *context)
#define GET_OP_LIST
#include "mlir/Dialect/AffineOps/AffineOps.cpp.inc"
>();
addInterfaces<AffineInlinerInterface>();
addInterfaces<AffineInlinerInterface, AffineSideEffectsInterface>();
}
/// A utility function to check if a given region is attached to a function.
@@ -1530,6 +1545,18 @@ bool AffineForOp::matchingBoundOperandList() {
return true;
}
Region &AffineForOp::getLoopBody() { return region(); }
bool AffineForOp::isDefinedOutsideOfLoop(Value *value) {
return !region().isAncestor(value->getParentRegion());
}
LogicalResult AffineForOp::moveOutOfLoop(ArrayRef<Operation *> ops) {
for (auto *op : ops)
op->moveBefore(*this);
return success();
}
/// Returns if the provided value is the induction variable of a AffineForOp.
bool mlir::isForInductionVar(Value *val) {
return getForInductionVarOwner(val) != AffineForOp();

View File

@@ -29,10 +29,29 @@
#include "mlir/IR/Value.h"
#include "mlir/Support/MathExtras.h"
#include "mlir/Support/STLExtras.h"
#include "mlir/Transforms/SideEffectsInterface.h"
using namespace mlir;
using namespace mlir::loop;
//===----------------------------------------------------------------------===//
// LoopOpsDialect Interfaces
//===----------------------------------------------------------------------===//
namespace {
struct LoopSideEffectsInterface : public SideEffectsDialectInterface {
using SideEffectsDialectInterface::SideEffectsDialectInterface;
SideEffecting isSideEffecting(Operation *op) const override {
if (isa<IfOp>(op) || isa<ForOp>(op)) {
return Recursive;
}
return SideEffectsDialectInterface::isSideEffecting(op);
};
};
} // namespace
//===----------------------------------------------------------------------===//
// LoopOpsDialect
//===----------------------------------------------------------------------===//
@@ -43,6 +62,7 @@ LoopOpsDialect::LoopOpsDialect(MLIRContext *context)
#define GET_OP_LIST
#include "mlir/Dialect/LoopOps/LoopOps.cpp.inc"
>();
addInterfaces<LoopSideEffectsInterface>();
}
//===----------------------------------------------------------------------===//
@@ -112,6 +132,18 @@ static ParseResult parseForOp(OpAsmParser &parser, OperationState &result) {
return success();
}
Region &ForOp::getLoopBody() { return region(); }
bool ForOp::isDefinedOutsideOfLoop(Value *value) {
return !region().isAncestor(value->getParentRegion());
}
LogicalResult ForOp::moveOutOfLoop(ArrayRef<Operation *> ops) {
for (auto *op : ops)
op->moveBefore(this->getOperation());
return success();
}
ForOp mlir::loop::getForInductionVarOwner(Value *val) {
auto *ivArg = dyn_cast<BlockArgument>(val);
if (!ivArg)

View File

@@ -0,0 +1,248 @@
//===- AffineLoopInvariantCodeMotion.cpp - Code to perform loop fusion-----===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file implements loop invariant code motion.
//
//===----------------------------------------------------------------------===//
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Analysis/Utils.h"
#include "mlir/Dialect/AffineOps/AffineOps.h"
#include "mlir/Dialect/StandardOps/Ops.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopUtils.h"
#include "mlir/Transforms/Passes.h"
#include "mlir/Transforms/Utils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "licm"
using namespace mlir;
namespace {
/// Loop invariant code motion (LICM) pass.
/// TODO(asabne) : The pass is missing zero-trip tests.
/// TODO(asabne) : Check for the presence of side effects before hoisting.
/// TODO: This code should be removed once the new LICM pass can handle its
/// uses.
struct LoopInvariantCodeMotion : public FunctionPass<LoopInvariantCodeMotion> {
void runOnFunction() override;
void runOnAffineForOp(AffineForOp forOp);
};
} // end anonymous namespace
static bool
checkInvarianceOfNestedIfOps(Operation *op, Value *indVar,
SmallPtrSetImpl<Operation *> &definedOps,
SmallPtrSetImpl<Operation *> &opsToHoist);
static bool isOpLoopInvariant(Operation &op, Value *indVar,
SmallPtrSetImpl<Operation *> &definedOps,
SmallPtrSetImpl<Operation *> &opsToHoist);
static bool
areAllOpsInTheBlockListInvariant(Region &blockList, Value *indVar,
SmallPtrSetImpl<Operation *> &definedOps,
SmallPtrSetImpl<Operation *> &opsToHoist);
static bool isMemRefDereferencingOp(Operation &op) {
// TODO(asabne): Support DMA Ops.
if (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op)) {
return true;
}
return false;
}
// Returns true if the individual op is loop invariant.
bool isOpLoopInvariant(Operation &op, Value *indVar,
SmallPtrSetImpl<Operation *> &definedOps,
SmallPtrSetImpl<Operation *> &opsToHoist) {
LLVM_DEBUG(llvm::dbgs() << "iterating on op: " << op;);
if (isa<AffineIfOp>(op)) {
if (!checkInvarianceOfNestedIfOps(&op, indVar, definedOps, opsToHoist)) {
return false;
}
} else if (isa<AffineForOp>(op)) {
// If the body of a predicated region has a for loop, we don't hoist the
// 'affine.if'.
return false;
} else if (isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op)) {
// TODO(asabne): Support DMA ops.
return false;
} else if (!isa<ConstantOp>(op)) {
if (isMemRefDereferencingOp(op)) {
Value *memref = isa<AffineLoadOp>(op)
? cast<AffineLoadOp>(op).getMemRef()
: cast<AffineStoreOp>(op).getMemRef();
for (auto *user : memref->getUsers()) {
// If this memref has a user that is a DMA, give up because these
// operations write to this memref.
if (isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op)) {
return false;
}
// If the memref used by the load/store is used in a store elsewhere in
// the loop nest, we do not hoist. Similarly, if the memref used in a
// load is also being stored too, we do not hoist the load.
if (isa<AffineStoreOp>(user) ||
(isa<AffineLoadOp>(user) && isa<AffineStoreOp>(op))) {
if (&op != user) {
SmallVector<AffineForOp, 8> userIVs;
getLoopIVs(*user, &userIVs);
// Check that userIVs don't contain the for loop around the op.
if (llvm::is_contained(userIVs, getForInductionVarOwner(indVar))) {
return false;
}
}
}
}
}
// Insert this op in the defined ops list.
definedOps.insert(&op);
if (op.getNumOperands() == 0 && !isa<AffineTerminatorOp>(op)) {
LLVM_DEBUG(llvm::dbgs() << "\nNon-constant op with 0 operands\n");
return false;
}
for (unsigned int i = 0; i < op.getNumOperands(); ++i) {
auto *operandSrc = op.getOperand(i)->getDefiningOp();
LLVM_DEBUG(
op.getOperand(i)->print(llvm::dbgs() << "\nIterating on operand\n"));
// If the loop IV is the operand, this op isn't loop invariant.
if (indVar == op.getOperand(i)) {
LLVM_DEBUG(llvm::dbgs() << "\nLoop IV is the operand\n");
return false;
}
if (operandSrc != nullptr) {
LLVM_DEBUG(llvm::dbgs()
<< *operandSrc << "\nIterating on operand src\n");
// If the value was defined in the loop (outside of the
// if/else region), and that operation itself wasn't meant to
// be hoisted, then mark this operation loop dependent.
if (definedOps.count(operandSrc) && opsToHoist.count(operandSrc) == 0) {
return false;
}
}
}
}
// If no operand was loop variant, mark this op for motion.
opsToHoist.insert(&op);
return true;
}
// Checks if all ops in a region (i.e. list of blocks) are loop invariant.
bool areAllOpsInTheBlockListInvariant(
Region &blockList, Value *indVar, SmallPtrSetImpl<Operation *> &definedOps,
SmallPtrSetImpl<Operation *> &opsToHoist) {
for (auto &b : blockList) {
for (auto &op : b) {
if (!isOpLoopInvariant(op, indVar, definedOps, opsToHoist)) {
return false;
}
}
}
return true;
}
// Returns true if the affine.if op can be hoisted.
bool checkInvarianceOfNestedIfOps(Operation *op, Value *indVar,
SmallPtrSetImpl<Operation *> &definedOps,
SmallPtrSetImpl<Operation *> &opsToHoist) {
assert(isa<AffineIfOp>(op));
auto ifOp = cast<AffineIfOp>(op);
if (!areAllOpsInTheBlockListInvariant(ifOp.thenRegion(), indVar, definedOps,
opsToHoist)) {
return false;
}
if (!areAllOpsInTheBlockListInvariant(ifOp.elseRegion(), indVar, definedOps,
opsToHoist)) {
return false;
}
return true;
}
void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
auto *loopBody = forOp.getBody();
auto *indVar = forOp.getInductionVar();
SmallPtrSet<Operation *, 8> definedOps;
// This is the place where hoisted instructions would reside.
OpBuilder b(forOp.getOperation());
SmallPtrSet<Operation *, 8> opsToHoist;
SmallVector<Operation *, 8> opsToMove;
for (auto &op : *loopBody) {
// We don't hoist for loops.
if (!isa<AffineForOp>(op)) {
if (!isa<AffineTerminatorOp>(op)) {
if (isOpLoopInvariant(op, indVar, definedOps, opsToHoist)) {
opsToMove.push_back(&op);
}
}
}
}
// For all instructions that we found to be invariant, place sequentially
// right before the for loop.
for (auto *op : opsToMove) {
op->moveBefore(forOp);
}
LLVM_DEBUG(forOp.getOperation()->print(llvm::dbgs() << "Modified loop\n"));
}
void LoopInvariantCodeMotion::runOnFunction() {
// Walk through all loops in a function in innermost-loop-first order. This
// way, we first LICM from the inner loop, and place the ops in
// the outer loop, which in turn can be further LICM'ed.
getFunction().walk([&](AffineForOp op) {
LLVM_DEBUG(op.getOperation()->print(llvm::dbgs() << "\nOriginal loop\n"));
runOnAffineForOp(op);
});
}
std::unique_ptr<OpPassBase<FuncOp>>
mlir::createAffineLoopInvariantCodeMotionPass() {
return std::make_unique<LoopInvariantCodeMotion>();
}
static PassRegistration<LoopInvariantCodeMotion>
pass("affine-loop-invariant-code-motion",
"Hoist loop invariant instructions outside of the loop");

View File

@@ -19,26 +19,16 @@
//
//===----------------------------------------------------------------------===//
#include "mlir/Analysis/AffineAnalysis.h"
#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/LoopAnalysis.h"
#include "mlir/Analysis/SliceAnalysis.h"
#include "mlir/Analysis/Utils.h"
#include "mlir/Dialect/AffineOps/AffineOps.h"
#include "mlir/Dialect/StandardOps/Ops.h"
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopUtils.h"
#include "mlir/Transforms/Passes.h"
#include "mlir/Transforms/Utils.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Function.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/LoopLikeInterface.h"
#include "mlir/Transforms/SideEffectsInterface.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "licm"
@@ -46,200 +36,114 @@ using namespace mlir;
namespace {
using SideEffecting = SideEffectsInterface::SideEffecting;
/// Loop invariant code motion (LICM) pass.
/// TODO(asabne) : The pass is missing zero-trip tests.
/// TODO(asabne) : Check for the presence of side effects before hoisting.
struct LoopInvariantCodeMotion : public FunctionPass<LoopInvariantCodeMotion> {
void runOnFunction() override;
void runOnAffineForOp(AffineForOp forOp);
struct LoopInvariantCodeMotion : public OperationPass<LoopInvariantCodeMotion> {
public:
void runOnOperation() override;
};
} // end anonymous namespace
static bool
checkInvarianceOfNestedIfOps(Operation *op, Value *indVar,
SmallPtrSetImpl<Operation *> &definedOps,
SmallPtrSetImpl<Operation *> &opsToHoist);
static bool isOpLoopInvariant(Operation &op, Value *indVar,
SmallPtrSetImpl<Operation *> &definedOps,
SmallPtrSetImpl<Operation *> &opsToHoist);
static bool
areAllOpsInTheBlockListInvariant(Region &blockList, Value *indVar,
SmallPtrSetImpl<Operation *> &definedOps,
SmallPtrSetImpl<Operation *> &opsToHoist);
static bool isMemRefDereferencingOp(Operation &op) {
// TODO(asabne): Support DMA Ops.
if (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op)) {
return true;
}
return false;
}
std::unique_ptr<OpPassBase<FuncOp>> mlir::createLoopInvariantCodeMotionPass() {
return std::make_unique<LoopInvariantCodeMotion>();
}
// Returns true if the individual op is loop invariant.
bool isOpLoopInvariant(Operation &op, Value *indVar,
SmallPtrSetImpl<Operation *> &definedOps,
SmallPtrSetImpl<Operation *> &opsToHoist) {
LLVM_DEBUG(llvm::dbgs() << "iterating on op: " << op;);
if (isa<AffineIfOp>(op)) {
if (!checkInvarianceOfNestedIfOps(&op, indVar, definedOps, opsToHoist)) {
// Checks whether the given op can be hoisted by checking that
// - the op and any of its contained operations do not depend on SSA values
// defined inside of the loop (by means of calling definedOutside).
// - the op has no side-effects. If sideEffecting is Never, sideeffects of this
// op and its nested ops are ignored.
static bool canBeHoisted(Operation *op,
llvm::function_ref<bool(Value *)> definedOutside,
SideEffecting sideEffecting,
SideEffectsInterface &interface) {
// Check that dependencies are defined outside of loop.
if (!llvm::all_of(op->getOperands(), definedOutside))
return false;
// Check whether this op is side-effect free. If we already know that there
// can be no side-effects because the surrounding op has claimed so, we can
// (and have to) skip this step.
auto thisOpIsSideEffecting = sideEffecting;
if (thisOpIsSideEffecting != SideEffecting::Never) {
thisOpIsSideEffecting = interface.isSideEffecting(op);
// If the op always has sideeffects, we cannot hoist.
if (thisOpIsSideEffecting == SideEffecting::Always)
return false;
}
} else if (isa<AffineForOp>(op)) {
// If the body of a predicated region has a for loop, we don't hoist the
// 'affine.if'.
return false;
} else if (isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op)) {
// TODO(asabne): Support DMA ops.
return false;
} else if (!isa<ConstantOp>(op)) {
if (isMemRefDereferencingOp(op)) {
Value *memref = isa<AffineLoadOp>(op)
? cast<AffineLoadOp>(op).getMemRef()
: cast<AffineStoreOp>(op).getMemRef();
for (auto *user : memref->getUsers()) {
// If this memref has a user that is a DMA, give up because these
// operations write to this memref.
if (isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op)) {
}
// Recurse into the regions for this op and check whether the contained ops
// can be hoisted.
for (auto &region : op->getRegions()) {
for (auto &block : region.getBlocks()) {
for (auto &innerOp : block) {
if (innerOp.isKnownTerminator())
continue;
if (!canBeHoisted(&innerOp, definedOutside, thisOpIsSideEffecting,
interface))
return false;
}
// If the memref used by the load/store is used in a store elsewhere in
// the loop nest, we do not hoist. Similarly, if the memref used in a
// load is also being stored too, we do not hoist the load.
if (isa<AffineStoreOp>(user) ||
(isa<AffineLoadOp>(user) && isa<AffineStoreOp>(op))) {
if (&op != user) {
SmallVector<AffineForOp, 8> userIVs;
getLoopIVs(*user, &userIVs);
// Check that userIVs don't contain the for loop around the op.
if (llvm::is_contained(userIVs, getForInductionVarOwner(indVar))) {
return false;
}
}
}
}
}
// Insert this op in the defined ops list.
definedOps.insert(&op);
if (op.getNumOperands() == 0 && !isa<AffineTerminatorOp>(op)) {
LLVM_DEBUG(llvm::dbgs() << "\nNon-constant op with 0 operands\n");
return false;
}
for (unsigned int i = 0; i < op.getNumOperands(); ++i) {
auto *operandSrc = op.getOperand(i)->getDefiningOp();
LLVM_DEBUG(
op.getOperand(i)->print(llvm::dbgs() << "\nIterating on operand\n"));
// If the loop IV is the operand, this op isn't loop invariant.
if (indVar == op.getOperand(i)) {
LLVM_DEBUG(llvm::dbgs() << "\nLoop IV is the operand\n");
return false;
}
if (operandSrc != nullptr) {
LLVM_DEBUG(llvm::dbgs()
<< *operandSrc << "\nIterating on operand src\n");
// If the value was defined in the loop (outside of the
// if/else region), and that operation itself wasn't meant to
// be hoisted, then mark this operation loop dependent.
if (definedOps.count(operandSrc) && opsToHoist.count(operandSrc) == 0) {
return false;
}
}
}
}
// If no operand was loop variant, mark this op for motion.
opsToHoist.insert(&op);
return true;
}
// Checks if all ops in a region (i.e. list of blocks) are loop invariant.
bool areAllOpsInTheBlockListInvariant(
Region &blockList, Value *indVar, SmallPtrSetImpl<Operation *> &definedOps,
SmallPtrSetImpl<Operation *> &opsToHoist) {
static LogicalResult moveLoopInvariantCode(LoopLikeOpInterface looplike,
SideEffectsInterface &interface) {
auto &loopBody = looplike.getLoopBody();
for (auto &b : blockList) {
for (auto &op : b) {
if (!isOpLoopInvariant(op, indVar, definedOps, opsToHoist)) {
return false;
}
}
}
return true;
}
// Returns true if the affine.if op can be hoisted.
bool checkInvarianceOfNestedIfOps(Operation *op, Value *indVar,
SmallPtrSetImpl<Operation *> &definedOps,
SmallPtrSetImpl<Operation *> &opsToHoist) {
assert(isa<AffineIfOp>(op));
auto ifOp = cast<AffineIfOp>(op);
if (!areAllOpsInTheBlockListInvariant(ifOp.thenRegion(), indVar, definedOps,
opsToHoist)) {
return false;
}
if (!areAllOpsInTheBlockListInvariant(ifOp.elseRegion(), indVar, definedOps,
opsToHoist)) {
return false;
}
return true;
}
void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
auto *loopBody = forOp.getBody();
auto *indVar = forOp.getInductionVar();
SmallPtrSet<Operation *, 8> definedOps;
// This is the place where hoisted instructions would reside.
OpBuilder b(forOp.getOperation());
SmallPtrSet<Operation *, 8> opsToHoist;
// We use two collections here as we need to preserve the order for insertion
// and this is easiest.
SmallPtrSet<Operation *, 8> willBeMovedSet;
SmallVector<Operation *, 8> opsToMove;
for (auto &op : *loopBody) {
// We don't hoist for loops.
if (!isa<AffineForOp>(op)) {
if (!isa<AffineTerminatorOp>(op)) {
if (isOpLoopInvariant(op, indVar, definedOps, opsToHoist)) {
opsToMove.push_back(&op);
}
// Helper to check whether an operation is loop invariant wrt. SSA properties.
auto isDefinedOutsideOfBody = [&](Value *value) {
auto definingOp = value->getDefiningOp();
return (definingOp && !!willBeMovedSet.count(definingOp)) ||
looplike.isDefinedOutsideOfLoop(value);
};
// Do not use walk here, as we do not want to go into nested regions and hoist
// operations from there. These regions might have semantics unknown to this
// rewriting. If the nested regions are loops, they will have been processed.
for (auto &block : loopBody) {
for (auto &op : block.without_terminator()) {
if (canBeHoisted(&op, isDefinedOutsideOfBody,
mlir::SideEffectsDialectInterface::Recursive,
interface)) {
opsToMove.push_back(&op);
willBeMovedSet.insert(&op);
}
}
}
// For all instructions that we found to be invariant, place sequentially
// right before the for loop.
for (auto *op : opsToMove) {
op->moveBefore(forOp);
}
LLVM_DEBUG(forOp.getOperation()->print(llvm::dbgs() << "Modified loop\n"));
// For all instructions that we found to be invariant, move outside of the
// loop.
auto result = looplike.moveOutOfLoop(opsToMove);
LLVM_DEBUG(looplike.print(llvm::dbgs() << "Modified loop\n"));
return result;
}
void LoopInvariantCodeMotion::runOnFunction() {
// Walk through all loops in a function in innermost-loop-first order. This
} // end anonymous namespace
void LoopInvariantCodeMotion::runOnOperation() {
SideEffectsInterface interface(&getContext());
// Walk through all loops in a function in innermost-loop-first order. This
// way, we first LICM from the inner loop, and place the ops in
// the outer loop, which in turn can be further LICM'ed.
getFunction().walk([&](AffineForOp op) {
LLVM_DEBUG(op.getOperation()->print(llvm::dbgs() << "\nOriginal loop\n"));
runOnAffineForOp(op);
getOperation()->walk([&](Operation *op) {
if (auto looplike = dyn_cast<LoopLikeOpInterface>(op)) {
LLVM_DEBUG(op->print(llvm::dbgs() << "\nOriginal loop\n"));
if (failed(moveLoopInvariantCode(looplike, interface)))
signalPassFailure();
}
});
}
// Include the generated code for the loop-like interface here, as it otherwise
// has no compilation unit. This works as loop-invariant code motion is the
// only user of that interface.
#include "mlir/Transforms/LoopLikeInterface.cpp.inc"
std::unique_ptr<Pass> mlir::createLoopInvariantCodeMotionPass() {
return std::make_unique<LoopInvariantCodeMotion>();
}
static PassRegistration<LoopInvariantCodeMotion>
pass("affine-loop-invariant-code-motion",
pass("loop-invariant-code-motion",
"Hoist loop invariant instructions outside of the loop");

View File

@@ -0,0 +1,507 @@
// RUN: mlir-opt %s -affine-loop-invariant-code-motion -split-input-file | FileCheck %s
func @nested_loops_both_having_invariant_code() {
%m = alloc() : memref<10xf32>
%cf7 = constant 7.0 : f32
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
%v0 = addf %cf7, %cf8 : f32
affine.for %arg1 = 0 to 10 {
affine.store %v0, %m[%arg0] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
return
}
// The store-load forwarding can see through affine apply's since it relies on
// dependence information.
// CHECK-LABEL: func @store_affine_apply
func @store_affine_apply() -> memref<10xf32> {
%cf7 = constant 7.0 : f32
%m = alloc() : memref<10xf32>
affine.for %arg0 = 0 to 10 {
%t0 = affine.apply (d1) -> (d1 + 1)(%arg0)
affine.store %cf7, %m[%t0] : memref<10xf32>
}
return %m : memref<10xf32>
// CHECK: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: %1 = affine.apply #map3(%arg0)
// CHECK-NEXT: affine.store %cst, %0[%1] : memref<10xf32>
// CHECK-NEXT: }
// CHECK-NEXT: return %0 : memref<10xf32>
}
func @nested_loops_code_invariant_to_both() {
%m = alloc() : memref<10xf32>
%cf7 = constant 7.0 : f32
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
%v0 = addf %cf7, %cf8 : f32
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
return
}
func @single_loop_nothing_invariant() {
%m1 = alloc() : memref<10xf32>
%m2 = alloc() : memref<10xf32>
affine.for %arg0 = 0 to 10 {
%v0 = affine.load %m1[%arg0] : memref<10xf32>
%v1 = affine.load %m2[%arg0] : memref<10xf32>
%v2 = addf %v0, %v1 : f32
affine.store %v2, %m1[%arg0] : memref<10xf32>
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %1 = alloc() : memref<10xf32>
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32>
// CHECK-NEXT: %3 = affine.load %1[%arg0] : memref<10xf32>
// CHECK-NEXT: %4 = addf %2, %3 : f32
// CHECK-NEXT: affine.store %4, %0[%arg0] : memref<10xf32>
return
}
func @invariant_code_inside_affine_if() {
%m = alloc() : memref<10xf32>
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
%t0 = affine.apply (d1) -> (d1 + 1)(%arg0)
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %t0) {
%cf9 = addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg0] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: %1 = affine.apply #map3(%arg0)
// CHECK-NEXT: affine.if #set0(%arg0, %1) {
// CHECK-NEXT: %2 = addf %cst, %cst : f32
// CHECK-NEXT: affine.store %2, %0[%arg0] : memref<10xf32>
// CHECK-NEXT: }
return
}
func @dependent_stores() {
%m = alloc() : memref<10xf32>
%cf7 = constant 7.0 : f32
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
%v0 = addf %cf7, %cf8 : f32
affine.for %arg1 = 0 to 10 {
%v1 = addf %cf7, %cf7 : f32
affine.store %v1, %m[%arg1] : memref<10xf32>
affine.store %v0, %m[%arg0] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
// CHECK-NEXT: %2 = addf %cst, %cst : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.store %2, %0[%arg1] : memref<10xf32>
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
return
}
func @independent_stores() {
%m = alloc() : memref<10xf32>
%cf7 = constant 7.0 : f32
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
%v0 = addf %cf7, %cf8 : f32
affine.for %arg1 = 0 to 10 {
%v1 = addf %cf7, %cf7 : f32
affine.store %v0, %m[%arg0] : memref<10xf32>
affine.store %v1, %m[%arg1] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
// CHECK-NEXT: %2 = addf %cst, %cst : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.store %2, %0[%arg1] : memref<10xf32>
// CHECK-NEXT: }
return
}
func @load_dependent_store() {
%m = alloc() : memref<10xf32>
%cf7 = constant 7.0 : f32
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
%v0 = addf %cf7, %cf8 : f32
affine.for %arg1 = 0 to 10 {
%v1 = addf %cf7, %cf7 : f32
affine.store %v0, %m[%arg1] : memref<10xf32>
%v2 = affine.load %m[%arg0] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
// CHECK-NEXT: %2 = addf %cst, %cst : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
// CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
return
}
func @load_after_load() {
%m = alloc() : memref<10xf32>
%cf7 = constant 7.0 : f32
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
%v0 = addf %cf7, %cf8 : f32
affine.for %arg1 = 0 to 10 {
%v1 = addf %cf7, %cf7 : f32
%v3 = affine.load %m[%arg1] : memref<10xf32>
%v2 = affine.load %m[%arg0] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
// CHECK-NEXT: %2 = addf %cst, %cst : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: %4 = affine.load %0[%arg1] : memref<10xf32>
return
}
func @invariant_affine_if() {
%m = alloc() : memref<10xf32>
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%cf9 = addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg0] : memref<10xf32>
}
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
// CHECK-NEXT: }
return
}
func @invariant_affine_if2() {
%m = alloc() : memref<10xf32>
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%cf9 = addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg1] : memref<10xf32>
}
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
return
}
func @invariant_affine_nested_if() {
%m = alloc() : memref<10xf32>
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%cf9 = addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg0] : memref<10xf32>
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
affine.store %cf9, %m[%arg1] : memref<10xf32>
}
}
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
return
}
func @invariant_affine_nested_if_else() {
%m = alloc() : memref<10xf32>
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%cf9 = addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg0] : memref<10xf32>
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
affine.store %cf9, %m[%arg0] : memref<10xf32>
} else {
affine.store %cf9, %m[%arg1] : memref<10xf32>
}
}
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
// CHECK-NEXT: } else {
// CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
return
}
func @invariant_affine_nested_if_else2() {
%m = alloc() : memref<10xf32>
%m2 = alloc() : memref<10xf32>
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%cf9 = addf %cf8, %cf8 : f32
%tload1 = affine.load %m[%arg0] : memref<10xf32>
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
affine.store %cf9, %m2[%arg0] : memref<10xf32>
} else {
%tload2 = affine.load %m[%arg0] : memref<10xf32>
}
}
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %1 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %2 = addf %cst, %cst : f32
// CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: affine.store %2, %1[%arg0] : memref<10xf32>
// CHECK-NEXT: } else {
// CHECK-NEXT: %4 = affine.load %0[%arg0] : memref<10xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
return
}
func @invariant_affine_nested_if2() {
%m = alloc() : memref<10xf32>
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%cf9 = addf %cf8, %cf8 : f32
%v1 = affine.load %m[%arg0] : memref<10xf32>
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%v2 = affine.load %m[%arg0] : memref<10xf32>
}
}
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
return
}
func @invariant_affine_for_inside_affine_if() {
%m = alloc() : memref<10xf32>
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%cf9 = addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg0] : memref<10xf32>
affine.for %arg2 = 0 to 10 {
affine.store %cf9, %m[%arg2] : memref<10xf32>
}
}
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.for %arg2 = 0 to 10 {
// CHECK-NEXT: affine.store %1, %0[%arg2] : memref<10xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
return
}
func @invariant_constant_and_load() {
%m = alloc() : memref<100xf32>
%m2 = alloc() : memref<100xf32>
affine.for %arg0 = 0 to 5 {
%c0 = constant 0 : index
%v = affine.load %m2[%c0] : memref<100xf32>
affine.store %v, %m[%arg0] : memref<100xf32>
}
// CHECK: %0 = alloc() : memref<100xf32>
// CHECK-NEXT: %1 = alloc() : memref<100xf32>
// CHECK-NEXT: %c0 = constant 0 : index
// CHECK-NEXT: %2 = affine.load %1[%c0] : memref<100xf32>
// CHECK-NEXT: affine.for %arg0 = 0 to 5 {
// CHECK-NEXT: affine.store %2, %0[%arg0] : memref<100xf32>
return
}
func @nested_load_store_same_memref() {
%m = alloc() : memref<10xf32>
%cst = constant 8.0 : f32
%c0 = constant 0 : index
affine.for %arg0 = 0 to 10 {
%v0 = affine.load %m[%c0] : memref<10xf32>
affine.for %arg1 = 0 to 10 {
affine.store %cst, %m[%arg1] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: %c0 = constant 0 : index
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: %1 = affine.load %0[%c0] : memref<10xf32>
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.store %cst, %0[%arg1] : memref<10xf32>
return
}
func @nested_load_store_same_memref2() {
%m = alloc() : memref<10xf32>
%cst = constant 8.0 : f32
%c0 = constant 0 : index
affine.for %arg0 = 0 to 10 {
affine.store %cst, %m[%c0] : memref<10xf32>
affine.for %arg1 = 0 to 10 {
%v0 = affine.load %m[%arg0] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: %c0 = constant 0 : index
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.store %cst, %0[%c0] : memref<10xf32>
// CHECK-NEXT: %1 = affine.load %0[%arg0] : memref<10xf32>
return
}

View File

@@ -1,4 +1,4 @@
// RUN: mlir-opt %s -affine-loop-invariant-code-motion -split-input-file | FileCheck %s
// RUN: mlir-opt %s -loop-invariant-code-motion -split-input-file | FileCheck %s
func @nested_loops_both_having_invariant_code() {
%m = alloc() : memref<10xf32>
@@ -8,40 +8,23 @@ func @nested_loops_both_having_invariant_code() {
affine.for %arg0 = 0 to 10 {
%v0 = addf %cf7, %cf8 : f32
affine.for %arg1 = 0 to 10 {
%v1 = addf %v0, %cf8 : f32
affine.store %v0, %m[%arg0] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
// CHECK-NEXT: %[[CST0:.*]] = constant 7.000000e+00 : f32
// CHECK-NEXT: %[[CST1:.*]] = constant 8.000000e+00 : f32
// CHECK-NEXT: %[[ADD0:.*]] = addf %[[CST0]], %[[CST1]] : f32
// CHECK-NEXT: addf %[[ADD0]], %[[CST1]] : f32
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.store
return
}
// The store-load forwarding can see through affine apply's since it relies on
// dependence information.
// CHECK-LABEL: func @store_affine_apply
func @store_affine_apply() -> memref<10xf32> {
%cf7 = constant 7.0 : f32
%m = alloc() : memref<10xf32>
affine.for %arg0 = 0 to 10 {
%t0 = affine.apply (d1) -> (d1 + 1)(%arg0)
affine.store %cf7, %m[%t0] : memref<10xf32>
}
return %m : memref<10xf32>
// CHECK: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: %1 = affine.apply #map3(%arg0)
// CHECK-NEXT: affine.store %cst, %0[%1] : memref<10xf32>
// CHECK-NEXT: }
// CHECK-NEXT: return %0 : memref<10xf32>
}
func @nested_loops_code_invariant_to_both() {
%m = alloc() : memref<10xf32>
%cf7 = constant 7.0 : f32
@@ -108,117 +91,6 @@ func @invariant_code_inside_affine_if() {
return
}
func @dependent_stores() {
%m = alloc() : memref<10xf32>
%cf7 = constant 7.0 : f32
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
%v0 = addf %cf7, %cf8 : f32
affine.for %arg1 = 0 to 10 {
%v1 = addf %cf7, %cf7 : f32
affine.store %v1, %m[%arg1] : memref<10xf32>
affine.store %v0, %m[%arg0] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
// CHECK-NEXT: %2 = addf %cst, %cst : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.store %2, %0[%arg1] : memref<10xf32>
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
return
}
func @independent_stores() {
%m = alloc() : memref<10xf32>
%cf7 = constant 7.0 : f32
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
%v0 = addf %cf7, %cf8 : f32
affine.for %arg1 = 0 to 10 {
%v1 = addf %cf7, %cf7 : f32
affine.store %v0, %m[%arg0] : memref<10xf32>
affine.store %v1, %m[%arg1] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
// CHECK-NEXT: %2 = addf %cst, %cst : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.store %2, %0[%arg1] : memref<10xf32>
// CHECK-NEXT: }
return
}
func @load_dependent_store() {
%m = alloc() : memref<10xf32>
%cf7 = constant 7.0 : f32
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
%v0 = addf %cf7, %cf8 : f32
affine.for %arg1 = 0 to 10 {
%v1 = addf %cf7, %cf7 : f32
affine.store %v0, %m[%arg1] : memref<10xf32>
%v2 = affine.load %m[%arg0] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
// CHECK-NEXT: %2 = addf %cst, %cst : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
// CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
return
}
func @load_after_load() {
%m = alloc() : memref<10xf32>
%cf7 = constant 7.0 : f32
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
%v0 = addf %cf7, %cf8 : f32
affine.for %arg1 = 0 to 10 {
%v1 = addf %cf7, %cf7 : f32
%v3 = affine.load %m[%arg1] : memref<10xf32>
%v2 = affine.load %m[%arg0] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
// CHECK-NEXT: %2 = addf %cst, %cst : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: %4 = affine.load %0[%arg1] : memref<10xf32>
return
}
func @invariant_affine_if() {
%m = alloc() : memref<10xf32>
%cf8 = constant 8.0 : f32
@@ -226,21 +98,17 @@ func @invariant_affine_if() {
affine.for %arg1 = 0 to 10 {
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%cf9 = addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg0] : memref<10xf32>
}
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
// CHECK-NEXT: %[[CST:.*]] = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%[[ARG]], %[[ARG]]) {
// CHECK-NEXT: addf %[[CST]], %[[CST]] : f32
// CHECK-NEXT: }
return
}
@@ -252,22 +120,20 @@ func @invariant_affine_if2() {
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%cf9 = addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg1] : memref<10xf32>
}
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
// CHECK: alloc
// CHECK-NEXT: constant
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.if
// CHECK-NEXT: addf
// CHECK-NEXT: affine.store
// CHECK-NEXT: }
// CHECK-NEXT: }
return
}
@@ -278,23 +144,21 @@ func @invariant_affine_nested_if() {
affine.for %arg1 = 0 to 10 {
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%cf9 = addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg0] : memref<10xf32>
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
affine.store %cf9, %m[%arg1] : memref<10xf32>
%cf10 = addf %cf9, %cf9 : f32
}
}
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
// CHECK: alloc
// CHECK-NEXT: constant
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.if
// CHECK-NEXT: addf
// CHECK-NEXT: affine.if
// CHECK-NEXT: addf
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
@@ -312,7 +176,7 @@ func @invariant_affine_nested_if_else() {
%cf9 = addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg0] : memref<10xf32>
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
affine.store %cf9, %m[%arg0] : memref<10xf32>
%cf10 = addf %cf9, %cf9 : f32
} else {
affine.store %cf9, %m[%arg1] : memref<10xf32>
}
@@ -320,17 +184,17 @@ func @invariant_affine_nested_if_else() {
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
// CHECK: alloc
// CHECK-NEXT: constant
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.for
// CHECK-NEXT: affine.if
// CHECK-NEXT: addf
// CHECK-NEXT: affine.store
// CHECK-NEXT: affine.if
// CHECK-NEXT: addf
// CHECK-NEXT: } else {
// CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
// CHECK-NEXT: affine.store
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
@@ -339,169 +203,42 @@ func @invariant_affine_nested_if_else() {
return
}
func @invariant_affine_nested_if_else2() {
func @invariant_loop_dialect() {
%ci0 = constant 0 : index
%ci10 = constant 10 : index
%ci1 = constant 1 : index
%m = alloc() : memref<10xf32>
%m2 = alloc() : memref<10xf32>
%cf7 = constant 7.0 : f32
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%cf9 = addf %cf8, %cf8 : f32
%tload1 = affine.load %m[%arg0] : memref<10xf32>
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
affine.store %cf9, %m2[%arg0] : memref<10xf32>
} else {
%tload2 = affine.load %m[%arg0] : memref<10xf32>
}
}
loop.for %arg0 = %ci0 to %ci10 step %ci1 {
loop.for %arg1 = %ci0 to %ci10 step %ci1 {
%v0 = addf %cf7, %cf8 : f32
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %1 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %2 = addf %cst, %cst : f32
// CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: affine.store %2, %1[%arg0] : memref<10xf32>
// CHECK-NEXT: } else {
// CHECK-NEXT: %4 = affine.load %0[%arg0] : memref<10xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: %cst = constant 7.000000e+00 : f32
// CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
// CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
return
}
func @invariant_affine_nested_if2() {
func @variant_loop_dialect() {
%ci0 = constant 0 : index
%ci10 = constant 10 : index
%ci1 = constant 1 : index
%m = alloc() : memref<10xf32>
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%cf9 = addf %cf8, %cf8 : f32
%v1 = affine.load %m[%arg0] : memref<10xf32>
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%v2 = affine.load %m[%arg0] : memref<10xf32>
}
}
loop.for %arg0 = %ci0 to %ci10 step %ci1 {
loop.for %arg1 = %ci0 to %ci10 step %ci1 {
%v0 = addi %arg0, %arg1 : index
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
return
}
func @invariant_affine_for_inside_affine_if() {
%m = alloc() : memref<10xf32>
%cf8 = constant 8.0 : f32
affine.for %arg0 = 0 to 10 {
affine.for %arg1 = 0 to 10 {
affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
%cf9 = addf %cf8, %cf8 : f32
affine.store %cf9, %m[%arg0] : memref<10xf32>
affine.for %arg2 = 0 to 10 {
affine.store %cf9, %m[%arg2] : memref<10xf32>
}
}
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
// CHECK-NEXT: %1 = addf %cst, %cst : f32
// CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
// CHECK-NEXT: affine.for %arg2 = 0 to 10 {
// CHECK-NEXT: affine.store %1, %0[%arg2] : memref<10xf32>
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
return
}
func @invariant_constant_and_load() {
%m = alloc() : memref<100xf32>
%m2 = alloc() : memref<100xf32>
affine.for %arg0 = 0 to 5 {
%c0 = constant 0 : index
%v = affine.load %m2[%c0] : memref<100xf32>
affine.store %v, %m[%arg0] : memref<100xf32>
}
// CHECK: %0 = alloc() : memref<100xf32>
// CHECK-NEXT: %1 = alloc() : memref<100xf32>
// CHECK-NEXT: %c0 = constant 0 : index
// CHECK-NEXT: %2 = affine.load %1[%c0] : memref<100xf32>
// CHECK-NEXT: affine.for %arg0 = 0 to 5 {
// CHECK-NEXT: affine.store %2, %0[%arg0] : memref<100xf32>
return
}
func @nested_load_store_same_memref() {
%m = alloc() : memref<10xf32>
%cst = constant 8.0 : f32
%c0 = constant 0 : index
affine.for %arg0 = 0 to 10 {
%v0 = affine.load %m[%c0] : memref<10xf32>
affine.for %arg1 = 0 to 10 {
affine.store %cst, %m[%arg1] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: %c0 = constant 0 : index
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: %1 = affine.load %0[%c0] : memref<10xf32>
// CHECK-NEXT: affine.for %arg1 = 0 to 10 {
// CHECK-NEXT: affine.store %cst, %0[%arg1] : memref<10xf32>
return
}
func @nested_load_store_same_memref2() {
%m = alloc() : memref<10xf32>
%cst = constant 8.0 : f32
%c0 = constant 0 : index
affine.for %arg0 = 0 to 10 {
affine.store %cst, %m[%c0] : memref<10xf32>
affine.for %arg1 = 0 to 10 {
%v0 = affine.load %m[%arg0] : memref<10xf32>
}
}
// CHECK: %0 = alloc() : memref<10xf32>
// CHECK-NEXT: %cst = constant 8.000000e+00 : f32
// CHECK-NEXT: %c0 = constant 0 : index
// CHECK-NEXT: affine.for %arg0 = 0 to 10 {
// CHECK-NEXT: affine.store %cst, %0[%c0] : memref<10xf32>
// CHECK-NEXT: %1 = affine.load %0[%arg0] : memref<10xf32>
// CHECK-NEXT: loop.for
// CHECK-NEXT: loop.for
// CHECK-NEXT: addi
return
}