Implement simple loop-invariant-code-motion based on dialect interfaces.

PiperOrigin-RevId: 275004258
2019-10-16 04:28:13 -07:00
parent 98f64b4da1
commit b843cc5d5a
15 changed files with 1168 additions and 514 deletions
--- a/mlir/include/mlir/Dialect/AffineOps/AffineOps.h
+++ b/mlir/include/mlir/Dialect/AffineOps/AffineOps.h
@@ -28,6 +28,7 @@
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/StandardTypes.h"
+#include "mlir/Transforms/LoopLikeInterface.h"

 namespace mlir {
 class AffineBound;
--- a/mlir/include/mlir/Dialect/AffineOps/AffineOps.td
+++ b/mlir/include/mlir/Dialect/AffineOps/AffineOps.td
@@ -28,6 +28,11 @@
 include "mlir/IR/OpBase.td"
 #endif // OP_BASE

+#ifdef MLIR_LOOPLIKEINTERFACE
+#else
+include "mlir/Transforms/LoopLikeInterface.td"
+#endif
+
 include "mlir/Dialect/AffineOps/AffineOpsBase.td"

 def Affine_Dialect : Dialect {
@@ -53,7 +58,9 @@ class Affine_Op<string mnemonic, list<OpTrait> traits = []> :
 def ImplicitAffineTerminator
    : SingleBlockImplicitTerminator<"AffineTerminatorOp">;

-def AffineForOp : Affine_Op<"for", [ImplicitAffineTerminator]> {
+def AffineForOp : Affine_Op<"for",
+    [ImplicitAffineTerminator,
+     DeclareOpInterfaceMethods<LoopLikeOpInterface>]> {
  let summary = "for operation";
  let description = [{
    The "affine.for" operation represents an affine loop nest, defining an SSA
--- a/mlir/include/mlir/Dialect/LoopOps/LoopOps.h
+++ b/mlir/include/mlir/Dialect/LoopOps/LoopOps.h
@@ -26,6 +26,7 @@
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpDefinition.h"
+#include "mlir/Transforms/LoopLikeInterface.h"

 namespace mlir {
 namespace loop {
--- a/mlir/include/mlir/Dialect/LoopOps/LoopOps.td
+++ b/mlir/include/mlir/Dialect/LoopOps/LoopOps.td
@@ -28,6 +28,11 @@
 include "mlir/IR/OpBase.td"
 #endif // OP_BASE

+#ifdef MLIR_LOOPLIKEINTERFACE
+#else
+include "mlir/Transforms/LoopLikeInterface.td"
+#endif
+
 def Loop_Dialect : Dialect {
  let name = "loop";
  let cppNamespace = "";
@@ -48,7 +53,8 @@ class Loop_Op<string mnemonic, list<OpTrait> traits = []> :
 }

 def ForOp : Loop_Op<"for",
-      [SingleBlockImplicitTerminator<"TerminatorOp">]> {
+      [DeclareOpInterfaceMethods<LoopLikeOpInterface>,
+       SingleBlockImplicitTerminator<"TerminatorOp">]> {
  let summary = "for operation";
  let description = [{
    The "loop.for" operation represents a loop nest taking 3 SSA value as
--- a/mlir/include/mlir/IR/Block.h
+++ b/mlir/include/mlir/IR/Block.h
@@ -249,6 +249,15 @@ public:
    return op_filter_iterator<OpT>(end(), end());
  }

+  /// Return an iterator range over the operation within this block excluding
+  /// the terminator operation at the end.
+  llvm::iterator_range<iterator> without_terminator() {
+    if (begin() == end())
+      return {begin(), end()};
+    auto endIt = --end();
+    return {begin(), endIt};
+  }
+
  //===--------------------------------------------------------------------===//
  // Terminator management
  //===--------------------------------------------------------------------===//
--- a/mlir/include/mlir/Transforms/LoopLikeInterface.h
+++ b/mlir/include/mlir/Transforms/LoopLikeInterface.h
@@ -0,0 +1,35 @@
+//===- LoopLikeInterface.h - Loop-like operations interface ---------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements the operation interface for loop like operations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TRANSFORMS_LOOPLIKEINTERFACE_H_
+#define MLIR_TRANSFORMS_LOOPLIKEINTERFACE_H_
+
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/Support/LogicalResult.h"
+#include "llvm/ADT/ArrayRef.h"
+
+namespace mlir {
+
+#include "mlir/Transforms/LoopLikeInterface.h.inc"
+
+} // namespace mlir
+
+#endif // MLIR_TRANSFORMS_LOOPLIKEINTERFACE_H_
--- a/mlir/include/mlir/Transforms/LoopLikeInterface.td
+++ b/mlir/include/mlir/Transforms/LoopLikeInterface.td
@@ -0,0 +1,62 @@
+//===- LoopLikeInterface.td - LoopLike interface -----------*- tablegen -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// Defines the interface for loop-like operations as used by LICM.
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef MLIR_LOOPLIKEINTERFACE
+#else
+#define MLIR_LOOPLIKEINTERFACE
+
+#ifdef OP_BASE
+#else
+include "mlir/IR/OpBase.td"
+#endif // OP_BASE
+
+def LoopLikeOpInterface : OpInterface<"LoopLikeOpInterface"> {
+  let description = [{
+    Encodes properties of a loop. Operations that implement this interface will
+    be considered by loop-invariant code motion.
+  }];
+
+  let methods = [
+    InterfaceMethod<[{
+        Returns true if the given value is defined outside of the loop.
+        A sensible implementation could be to check whether the value's defining
+        operation lies outside of the loops body region. If the loop uses
+        explicit capture of dependencies, an implementation could check whether
+        the value corresponds to a captured dependency.
+      }],
+      "bool", "isDefinedOutsideOfLoop", (ins "Value *":$value)
+    >,
+    InterfaceMethod<[{
+        Returns the region that makes up the body of the loop and should be
+        inspected for loop-invariant operations.
+      }],
+      "Region &", "getLoopBody"
+    >,
+    InterfaceMethod<[{
+        Moves the given vector of operations out of the loop. The vector is
+        sorted topologically.
+      }],
+      "LogicalResult", "moveOutOfLoop", (ins "ArrayRef<Operation *>":$ops)
+    >,
+  ];
+}
+
+#endif // MLIR_LOOPLIKEINTERFACE
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -32,6 +32,7 @@ namespace mlir {
 class AffineForOp;
 class FuncOp;
 class ModuleOp;
+class Pass;
 template <typename T> class OpPassBase;

 /// Creates a constant folding pass. Note that this pass solely provides simple
@@ -90,7 +91,11 @@ createLoopFusionPass(unsigned fastMemorySpace = 0,

 /// Creates a loop invariant code motion pass that hoists loop invariant
 /// instructions out of the loop.
-std::unique_ptr<OpPassBase<FuncOp>> createLoopInvariantCodeMotionPass();
+std::unique_ptr<Pass> createLoopInvariantCodeMotionPass();
+
+/// Creates a loop invariant code motion pass that hoists loop invariant
+/// instructions out of affine loop.
+std::unique_ptr<OpPassBase<FuncOp>> createAffineLoopInvariantCodeMotionPass();

 /// Creates a pass to pipeline explicit movement of data across levels of the
 /// memory hierarchy.
--- a/mlir/include/mlir/Transforms/SideEffectsInterface.h
+++ b/mlir/include/mlir/Transforms/SideEffectsInterface.h
@@ -0,0 +1,73 @@
+//===- SideEffectsInterface.h - dialect interface modeling side effects ---===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file specifies a dialect interface to model side-effects.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TRANSFORMS_SIDEEFFECTSINTERFACE_H_
+#define MLIR_TRANSFORMS_SIDEEFFECTSINTERFACE_H_
+
+#include "mlir/IR/DialectInterface.h"
+#include "mlir/IR/Operation.h"
+
+namespace mlir {
+
+/// Specifies an interface for basic side-effect modelling that is used by the
+/// loop-invariant code motion pass.
+///
+/// TODO: This interface should be replaced by a more general solution.
+class SideEffectsDialectInterface
+    : public DialectInterface::Base<SideEffectsDialectInterface> {
+public:
+  SideEffectsDialectInterface(Dialect *dialect) : Base(dialect) {}
+
+  enum SideEffecting {
+    Never,     /* the operation has no side-effects */
+    Recursive, /* the operation has side-effects if a contained operation has */
+    Always     /* the operation has side-effects */
+  };
+
+  /// Checks whether the given operation has side-effects.
+  virtual SideEffecting isSideEffecting(Operation *op) const {
+    if (op->hasNoSideEffect())
+      return Never;
+    return Always;
+  };
+};
+
+class SideEffectsInterface
+    : public DialectInterfaceCollection<SideEffectsDialectInterface> {
+public:
+  using SideEffecting = SideEffectsDialectInterface::SideEffecting;
+  explicit SideEffectsInterface(MLIRContext *ctx)
+      : DialectInterfaceCollection<SideEffectsDialectInterface>(ctx) {}
+
+  SideEffecting isSideEffecting(Operation *op) const {
+    // First check generic trait.
+    if (op->hasNoSideEffect())
+      return SideEffecting::Never;
+    if (auto handler = getInterfaceFor(op))
+      return handler->isSideEffecting(op);
+
+    return SideEffecting::Always;
+  }
+};
+
+} // namespace mlir
+
+#endif // MLIR_TRANSFORMS_SIDEEFFECTSINTERFACE_H_
--- a/mlir/lib/Dialect/AffineOps/AffineOps.cpp
+++ b/mlir/lib/Dialect/AffineOps/AffineOps.cpp
@@ -23,9 +23,11 @@
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Transforms/InliningUtils.h"
+#include "mlir/Transforms/SideEffectsInterface.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/Support/Debug.h"
+
 using namespace mlir;
 using llvm::dbgs;

@@ -68,6 +70,19 @@ struct AffineInlinerInterface : public DialectInlinerInterface {
  /// Affine regions should be analyzed recursively.
  bool shouldAnalyzeRecursively(Operation *op) const final { return true; }
 };
+
+// TODO(mlir): Extend for other ops in this dialect.
+struct AffineSideEffectsInterface : public SideEffectsDialectInterface {
+  using SideEffectsDialectInterface::SideEffectsDialectInterface;
+
+  SideEffecting isSideEffecting(Operation *op) const override {
+    if (isa<AffineIfOp>(op)) {
+      return Recursive;
+    }
+    return SideEffectsDialectInterface::isSideEffecting(op);
+  };
+};
+
 } // end anonymous namespace

 //===----------------------------------------------------------------------===//
@@ -81,7 +96,7 @@ AffineOpsDialect::AffineOpsDialect(MLIRContext *context)
 #define GET_OP_LIST
 #include "mlir/Dialect/AffineOps/AffineOps.cpp.inc"
                >();
-  addInterfaces<AffineInlinerInterface>();
+  addInterfaces<AffineInlinerInterface, AffineSideEffectsInterface>();
 }

 /// A utility function to check if a given region is attached to a function.
@@ -1530,6 +1545,18 @@ bool AffineForOp::matchingBoundOperandList() {
  return true;
 }

+Region &AffineForOp::getLoopBody() { return region(); }
+
+bool AffineForOp::isDefinedOutsideOfLoop(Value *value) {
+  return !region().isAncestor(value->getParentRegion());
+}
+
+LogicalResult AffineForOp::moveOutOfLoop(ArrayRef<Operation *> ops) {
+  for (auto *op : ops)
+    op->moveBefore(*this);
+  return success();
+}
+
 /// Returns if the provided value is the induction variable of a AffineForOp.
 bool mlir::isForInductionVar(Value *val) {
  return getForInductionVarOwner(val) != AffineForOp();
--- a/mlir/lib/Dialect/LoopOps/LoopOps.cpp
+++ b/mlir/lib/Dialect/LoopOps/LoopOps.cpp
@@ -29,10 +29,29 @@
 #include "mlir/IR/Value.h"
 #include "mlir/Support/MathExtras.h"
 #include "mlir/Support/STLExtras.h"
+#include "mlir/Transforms/SideEffectsInterface.h"

 using namespace mlir;
 using namespace mlir::loop;

+//===----------------------------------------------------------------------===//
+// LoopOpsDialect Interfaces
+//===----------------------------------------------------------------------===//
+namespace {
+
+struct LoopSideEffectsInterface : public SideEffectsDialectInterface {
+  using SideEffectsDialectInterface::SideEffectsDialectInterface;
+
+  SideEffecting isSideEffecting(Operation *op) const override {
+    if (isa<IfOp>(op) || isa<ForOp>(op)) {
+      return Recursive;
+    }
+    return SideEffectsDialectInterface::isSideEffecting(op);
+  };
+};
+
+} // namespace
+
 //===----------------------------------------------------------------------===//
 // LoopOpsDialect
 //===----------------------------------------------------------------------===//
@@ -43,6 +62,7 @@ LoopOpsDialect::LoopOpsDialect(MLIRContext *context)
 #define GET_OP_LIST
 #include "mlir/Dialect/LoopOps/LoopOps.cpp.inc"
      >();
+  addInterfaces<LoopSideEffectsInterface>();
 }

 //===----------------------------------------------------------------------===//
@@ -112,6 +132,18 @@ static ParseResult parseForOp(OpAsmParser &parser, OperationState &result) {
  return success();
 }

+Region &ForOp::getLoopBody() { return region(); }
+
+bool ForOp::isDefinedOutsideOfLoop(Value *value) {
+  return !region().isAncestor(value->getParentRegion());
+}
+
+LogicalResult ForOp::moveOutOfLoop(ArrayRef<Operation *> ops) {
+  for (auto *op : ops)
+    op->moveBefore(this->getOperation());
+  return success();
+}
+
 ForOp mlir::loop::getForInductionVarOwner(Value *val) {
  auto *ivArg = dyn_cast<BlockArgument>(val);
  if (!ivArg)
--- a/mlir/lib/Transforms/AffineLoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Transforms/AffineLoopInvariantCodeMotion.cpp
@@ -0,0 +1,248 @@
+//===- AffineLoopInvariantCodeMotion.cpp - Code to perform loop fusion-----===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements loop invariant code motion.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Analysis/AffineAnalysis.h"
+#include "mlir/Analysis/AffineStructures.h"
+#include "mlir/Analysis/LoopAnalysis.h"
+#include "mlir/Analysis/SliceAnalysis.h"
+#include "mlir/Analysis/Utils.h"
+#include "mlir/Dialect/AffineOps/AffineOps.h"
+#include "mlir/Dialect/StandardOps/Ops.h"
+#include "mlir/IR/AffineExpr.h"
+#include "mlir/IR/AffineMap.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/LoopUtils.h"
+#include "mlir/Transforms/Passes.h"
+#include "mlir/Transforms/Utils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "licm"
+
+using namespace mlir;
+
+namespace {
+
+/// Loop invariant code motion (LICM) pass.
+/// TODO(asabne) : The pass is missing zero-trip tests.
+/// TODO(asabne) : Check for the presence of side effects before hoisting.
+/// TODO: This code should be removed once the new LICM pass can handle its
+///       uses.
+struct LoopInvariantCodeMotion : public FunctionPass<LoopInvariantCodeMotion> {
+  void runOnFunction() override;
+  void runOnAffineForOp(AffineForOp forOp);
+};
+} // end anonymous namespace
+
+static bool
+checkInvarianceOfNestedIfOps(Operation *op, Value *indVar,
+                             SmallPtrSetImpl<Operation *> &definedOps,
+                             SmallPtrSetImpl<Operation *> &opsToHoist);
+static bool isOpLoopInvariant(Operation &op, Value *indVar,
+                              SmallPtrSetImpl<Operation *> &definedOps,
+                              SmallPtrSetImpl<Operation *> &opsToHoist);
+
+static bool
+areAllOpsInTheBlockListInvariant(Region &blockList, Value *indVar,
+                                 SmallPtrSetImpl<Operation *> &definedOps,
+                                 SmallPtrSetImpl<Operation *> &opsToHoist);
+
+static bool isMemRefDereferencingOp(Operation &op) {
+  // TODO(asabne): Support DMA Ops.
+  if (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op)) {
+    return true;
+  }
+  return false;
+}
+
+// Returns true if the individual op is loop invariant.
+bool isOpLoopInvariant(Operation &op, Value *indVar,
+                       SmallPtrSetImpl<Operation *> &definedOps,
+                       SmallPtrSetImpl<Operation *> &opsToHoist) {
+  LLVM_DEBUG(llvm::dbgs() << "iterating on op: " << op;);
+
+  if (isa<AffineIfOp>(op)) {
+    if (!checkInvarianceOfNestedIfOps(&op, indVar, definedOps, opsToHoist)) {
+      return false;
+    }
+  } else if (isa<AffineForOp>(op)) {
+    // If the body of a predicated region has a for loop, we don't hoist the
+    // 'affine.if'.
+    return false;
+  } else if (isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op)) {
+    // TODO(asabne): Support DMA ops.
+    return false;
+  } else if (!isa<ConstantOp>(op)) {
+    if (isMemRefDereferencingOp(op)) {
+      Value *memref = isa<AffineLoadOp>(op)
+                          ? cast<AffineLoadOp>(op).getMemRef()
+                          : cast<AffineStoreOp>(op).getMemRef();
+      for (auto *user : memref->getUsers()) {
+        // If this memref has a user that is a DMA, give up because these
+        // operations write to this memref.
+        if (isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op)) {
+          return false;
+        }
+        // If the memref used by the load/store is used in a store elsewhere in
+        // the loop nest, we do not hoist. Similarly, if the memref used in a
+        // load is also being stored too, we do not hoist the load.
+        if (isa<AffineStoreOp>(user) ||
+            (isa<AffineLoadOp>(user) && isa<AffineStoreOp>(op))) {
+          if (&op != user) {
+            SmallVector<AffineForOp, 8> userIVs;
+            getLoopIVs(*user, &userIVs);
+            // Check that userIVs don't contain the for loop around the op.
+            if (llvm::is_contained(userIVs, getForInductionVarOwner(indVar))) {
+              return false;
+            }
+          }
+        }
+      }
+    }
+
+    // Insert this op in the defined ops list.
+    definedOps.insert(&op);
+
+    if (op.getNumOperands() == 0 && !isa<AffineTerminatorOp>(op)) {
+      LLVM_DEBUG(llvm::dbgs() << "\nNon-constant op with 0 operands\n");
+      return false;
+    }
+    for (unsigned int i = 0; i < op.getNumOperands(); ++i) {
+      auto *operandSrc = op.getOperand(i)->getDefiningOp();
+
+      LLVM_DEBUG(
+          op.getOperand(i)->print(llvm::dbgs() << "\nIterating on operand\n"));
+
+      // If the loop IV is the operand, this op isn't loop invariant.
+      if (indVar == op.getOperand(i)) {
+        LLVM_DEBUG(llvm::dbgs() << "\nLoop IV is the operand\n");
+        return false;
+      }
+
+      if (operandSrc != nullptr) {
+        LLVM_DEBUG(llvm::dbgs()
+                   << *operandSrc << "\nIterating on operand src\n");
+
+        // If the value was defined in the loop (outside of the
+        // if/else region), and that operation itself wasn't meant to
+        // be hoisted, then mark this operation loop dependent.
+        if (definedOps.count(operandSrc) && opsToHoist.count(operandSrc) == 0) {
+          return false;
+        }
+      }
+    }
+  }
+
+  // If no operand was loop variant, mark this op for motion.
+  opsToHoist.insert(&op);
+  return true;
+}
+
+// Checks if all ops in a region (i.e. list of blocks) are loop invariant.
+bool areAllOpsInTheBlockListInvariant(
+    Region &blockList, Value *indVar, SmallPtrSetImpl<Operation *> &definedOps,
+    SmallPtrSetImpl<Operation *> &opsToHoist) {
+
+  for (auto &b : blockList) {
+    for (auto &op : b) {
+      if (!isOpLoopInvariant(op, indVar, definedOps, opsToHoist)) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+// Returns true if the affine.if op can be hoisted.
+bool checkInvarianceOfNestedIfOps(Operation *op, Value *indVar,
+                                  SmallPtrSetImpl<Operation *> &definedOps,
+                                  SmallPtrSetImpl<Operation *> &opsToHoist) {
+  assert(isa<AffineIfOp>(op));
+  auto ifOp = cast<AffineIfOp>(op);
+
+  if (!areAllOpsInTheBlockListInvariant(ifOp.thenRegion(), indVar, definedOps,
+                                        opsToHoist)) {
+    return false;
+  }
+
+  if (!areAllOpsInTheBlockListInvariant(ifOp.elseRegion(), indVar, definedOps,
+                                        opsToHoist)) {
+    return false;
+  }
+
+  return true;
+}
+
+void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
+  auto *loopBody = forOp.getBody();
+  auto *indVar = forOp.getInductionVar();
+
+  SmallPtrSet<Operation *, 8> definedOps;
+  // This is the place where hoisted instructions would reside.
+  OpBuilder b(forOp.getOperation());
+
+  SmallPtrSet<Operation *, 8> opsToHoist;
+  SmallVector<Operation *, 8> opsToMove;
+
+  for (auto &op : *loopBody) {
+    // We don't hoist for loops.
+    if (!isa<AffineForOp>(op)) {
+      if (!isa<AffineTerminatorOp>(op)) {
+        if (isOpLoopInvariant(op, indVar, definedOps, opsToHoist)) {
+          opsToMove.push_back(&op);
+        }
+      }
+    }
+  }
+
+  // For all instructions that we found to be invariant, place sequentially
+  // right before the for loop.
+  for (auto *op : opsToMove) {
+    op->moveBefore(forOp);
+  }
+
+  LLVM_DEBUG(forOp.getOperation()->print(llvm::dbgs() << "Modified loop\n"));
+}
+
+void LoopInvariantCodeMotion::runOnFunction() {
+  // Walk through all loops in a function in innermost-loop-first order.  This
+  // way, we first LICM from the inner loop, and place the ops in
+  // the outer loop, which in turn can be further LICM'ed.
+  getFunction().walk([&](AffineForOp op) {
+    LLVM_DEBUG(op.getOperation()->print(llvm::dbgs() << "\nOriginal loop\n"));
+    runOnAffineForOp(op);
+  });
+}
+
+std::unique_ptr<OpPassBase<FuncOp>>
+mlir::createAffineLoopInvariantCodeMotionPass() {
+  return std::make_unique<LoopInvariantCodeMotion>();
+}
+
+static PassRegistration<LoopInvariantCodeMotion>
+    pass("affine-loop-invariant-code-motion",
+         "Hoist loop invariant instructions outside of the loop");
--- a/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp
@@ -19,26 +19,16 @@
 //
 //===----------------------------------------------------------------------===//

-#include "mlir/Analysis/AffineAnalysis.h"
-#include "mlir/Analysis/AffineStructures.h"
-#include "mlir/Analysis/LoopAnalysis.h"
-#include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/AffineOps/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Ops.h"
-#include "mlir/IR/AffineExpr.h"
-#include "mlir/IR/AffineMap.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
+
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/Function.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/LoopLikeInterface.h"
+#include "mlir/Transforms/SideEffectsInterface.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"

 #define DEBUG_TYPE "licm"

@@ -46,200 +36,114 @@ using namespace mlir;

 namespace {

+using SideEffecting = SideEffectsInterface::SideEffecting;
+
 /// Loop invariant code motion (LICM) pass.
-/// TODO(asabne) : The pass is missing zero-trip tests.
-/// TODO(asabne) : Check for the presence of side effects before hoisting.
-struct LoopInvariantCodeMotion : public FunctionPass<LoopInvariantCodeMotion> {
-  void runOnFunction() override;
-  void runOnAffineForOp(AffineForOp forOp);
+struct LoopInvariantCodeMotion : public OperationPass<LoopInvariantCodeMotion> {
+public:
+  void runOnOperation() override;
 };
-} // end anonymous namespace

-static bool
-checkInvarianceOfNestedIfOps(Operation *op, Value *indVar,
-                             SmallPtrSetImpl<Operation *> &definedOps,
-                             SmallPtrSetImpl<Operation *> &opsToHoist);
-static bool isOpLoopInvariant(Operation &op, Value *indVar,
-                              SmallPtrSetImpl<Operation *> &definedOps,
-                              SmallPtrSetImpl<Operation *> &opsToHoist);
-
-static bool
-areAllOpsInTheBlockListInvariant(Region &blockList, Value *indVar,
-                                 SmallPtrSetImpl<Operation *> &definedOps,
-                                 SmallPtrSetImpl<Operation *> &opsToHoist);
-
-static bool isMemRefDereferencingOp(Operation &op) {
-  // TODO(asabne): Support DMA Ops.
-  if (isa<AffineLoadOp>(op) || isa<AffineStoreOp>(op)) {
-    return true;
-  }
-  return false;
-}
-
-std::unique_ptr<OpPassBase<FuncOp>> mlir::createLoopInvariantCodeMotionPass() {
-  return std::make_unique<LoopInvariantCodeMotion>();
-}
-
-// Returns true if the individual op is loop invariant.
-bool isOpLoopInvariant(Operation &op, Value *indVar,
-                       SmallPtrSetImpl<Operation *> &definedOps,
-                       SmallPtrSetImpl<Operation *> &opsToHoist) {
-  LLVM_DEBUG(llvm::dbgs() << "iterating on op: " << op;);
-
-  if (isa<AffineIfOp>(op)) {
-    if (!checkInvarianceOfNestedIfOps(&op, indVar, definedOps, opsToHoist)) {
+// Checks whether the given op can be hoisted by checking that
+// - the op and any of its contained operations do not depend on SSA values
+//   defined inside of the loop (by means of calling definedOutside).
+// - the op has no side-effects. If sideEffecting is Never, sideeffects of this
+//   op and its nested ops are ignored.
+static bool canBeHoisted(Operation *op,
+                         llvm::function_ref<bool(Value *)> definedOutside,
+                         SideEffecting sideEffecting,
+                         SideEffectsInterface &interface) {
+  // Check that dependencies are defined outside of loop.
+  if (!llvm::all_of(op->getOperands(), definedOutside))
+    return false;
+  // Check whether this op is side-effect free. If we already know that there
+  // can be no side-effects because the surrounding op has claimed so, we can
+  // (and have to) skip this step.
+  auto thisOpIsSideEffecting = sideEffecting;
+  if (thisOpIsSideEffecting != SideEffecting::Never) {
+    thisOpIsSideEffecting = interface.isSideEffecting(op);
+    // If the op always has sideeffects, we cannot hoist.
+    if (thisOpIsSideEffecting == SideEffecting::Always)
      return false;
-    }
-  } else if (isa<AffineForOp>(op)) {
-    // If the body of a predicated region has a for loop, we don't hoist the
-    // 'affine.if'.
-    return false;
-  } else if (isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op)) {
-    // TODO(asabne): Support DMA ops.
-    return false;
-  } else if (!isa<ConstantOp>(op)) {
-    if (isMemRefDereferencingOp(op)) {
-      Value *memref = isa<AffineLoadOp>(op)
-                          ? cast<AffineLoadOp>(op).getMemRef()
-                          : cast<AffineStoreOp>(op).getMemRef();
-      for (auto *user : memref->getUsers()) {
-        // If this memref has a user that is a DMA, give up because these
-        // operations write to this memref.
-        if (isa<AffineDmaStartOp>(op) || isa<AffineDmaWaitOp>(op)) {
+  }
+  // Recurse into the regions for this op and check whether the contained ops
+  // can be hoisted.
+  for (auto &region : op->getRegions()) {
+    for (auto &block : region.getBlocks()) {
+      for (auto &innerOp : block) {
+        if (innerOp.isKnownTerminator())
+          continue;
+        if (!canBeHoisted(&innerOp, definedOutside, thisOpIsSideEffecting,
+                          interface))
          return false;
-        }
-        // If the memref used by the load/store is used in a store elsewhere in
-        // the loop nest, we do not hoist. Similarly, if the memref used in a
-        // load is also being stored too, we do not hoist the load.
-        if (isa<AffineStoreOp>(user) ||
-            (isa<AffineLoadOp>(user) && isa<AffineStoreOp>(op))) {
-          if (&op != user) {
-            SmallVector<AffineForOp, 8> userIVs;
-            getLoopIVs(*user, &userIVs);
-            // Check that userIVs don't contain the for loop around the op.
-            if (llvm::is_contained(userIVs, getForInductionVarOwner(indVar))) {
-              return false;
-            }
-          }
-        }
-      }
-    }
-
-    // Insert this op in the defined ops list.
-    definedOps.insert(&op);
-
-    if (op.getNumOperands() == 0 && !isa<AffineTerminatorOp>(op)) {
-      LLVM_DEBUG(llvm::dbgs() << "\nNon-constant op with 0 operands\n");
-      return false;
-    }
-    for (unsigned int i = 0; i < op.getNumOperands(); ++i) {
-      auto *operandSrc = op.getOperand(i)->getDefiningOp();
-
-      LLVM_DEBUG(
-          op.getOperand(i)->print(llvm::dbgs() << "\nIterating on operand\n"));
-
-      // If the loop IV is the operand, this op isn't loop invariant.
-      if (indVar == op.getOperand(i)) {
-        LLVM_DEBUG(llvm::dbgs() << "\nLoop IV is the operand\n");
-        return false;
-      }
-
-      if (operandSrc != nullptr) {
-        LLVM_DEBUG(llvm::dbgs()
-                   << *operandSrc << "\nIterating on operand src\n");
-
-        // If the value was defined in the loop (outside of the
-        // if/else region), and that operation itself wasn't meant to
-        // be hoisted, then mark this operation loop dependent.
-        if (definedOps.count(operandSrc) && opsToHoist.count(operandSrc) == 0) {
-          return false;
-        }
      }
    }
  }
-
-  // If no operand was loop variant, mark this op for motion.
-  opsToHoist.insert(&op);
  return true;
 }

-// Checks if all ops in a region (i.e. list of blocks) are loop invariant.
-bool areAllOpsInTheBlockListInvariant(
-    Region &blockList, Value *indVar, SmallPtrSetImpl<Operation *> &definedOps,
-    SmallPtrSetImpl<Operation *> &opsToHoist) {
+static LogicalResult moveLoopInvariantCode(LoopLikeOpInterface looplike,
+                                           SideEffectsInterface &interface) {
+  auto &loopBody = looplike.getLoopBody();

-  for (auto &b : blockList) {
-    for (auto &op : b) {
-      if (!isOpLoopInvariant(op, indVar, definedOps, opsToHoist)) {
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-// Returns true if the affine.if op can be hoisted.
-bool checkInvarianceOfNestedIfOps(Operation *op, Value *indVar,
-                                  SmallPtrSetImpl<Operation *> &definedOps,
-                                  SmallPtrSetImpl<Operation *> &opsToHoist) {
-  assert(isa<AffineIfOp>(op));
-  auto ifOp = cast<AffineIfOp>(op);
-
-  if (!areAllOpsInTheBlockListInvariant(ifOp.thenRegion(), indVar, definedOps,
-                                        opsToHoist)) {
-    return false;
-  }
-
-  if (!areAllOpsInTheBlockListInvariant(ifOp.elseRegion(), indVar, definedOps,
-                                        opsToHoist)) {
-    return false;
-  }
-
-  return true;
-}
-
-void LoopInvariantCodeMotion::runOnAffineForOp(AffineForOp forOp) {
-  auto *loopBody = forOp.getBody();
-  auto *indVar = forOp.getInductionVar();
-
-  SmallPtrSet<Operation *, 8> definedOps;
-  // This is the place where hoisted instructions would reside.
-  OpBuilder b(forOp.getOperation());
-
-  SmallPtrSet<Operation *, 8> opsToHoist;
+  // We use two collections here as we need to preserve the order for insertion
+  // and this is easiest.
+  SmallPtrSet<Operation *, 8> willBeMovedSet;
  SmallVector<Operation *, 8> opsToMove;

-  for (auto &op : *loopBody) {
-    // We don't hoist for loops.
-    if (!isa<AffineForOp>(op)) {
-      if (!isa<AffineTerminatorOp>(op)) {
-        if (isOpLoopInvariant(op, indVar, definedOps, opsToHoist)) {
-          opsToMove.push_back(&op);
-        }
+  // Helper to check whether an operation is loop invariant wrt. SSA properties.
+  auto isDefinedOutsideOfBody = [&](Value *value) {
+    auto definingOp = value->getDefiningOp();
+    return (definingOp && !!willBeMovedSet.count(definingOp)) ||
+           looplike.isDefinedOutsideOfLoop(value);
+  };
+
+  // Do not use walk here, as we do not want to go into nested regions and hoist
+  // operations from there. These regions might have semantics unknown to this
+  // rewriting. If the nested regions are loops, they will have been processed.
+  for (auto &block : loopBody) {
+    for (auto &op : block.without_terminator()) {
+      if (canBeHoisted(&op, isDefinedOutsideOfBody,
+                       mlir::SideEffectsDialectInterface::Recursive,
+                       interface)) {
+        opsToMove.push_back(&op);
+        willBeMovedSet.insert(&op);
      }
    }
  }

-  // For all instructions that we found to be invariant, place sequentially
-  // right before the for loop.
-  for (auto *op : opsToMove) {
-    op->moveBefore(forOp);
-  }
-
-  LLVM_DEBUG(forOp.getOperation()->print(llvm::dbgs() << "Modified loop\n"));
+  // For all instructions that we found to be invariant, move outside of the
+  // loop.
+  auto result = looplike.moveOutOfLoop(opsToMove);
+  LLVM_DEBUG(looplike.print(llvm::dbgs() << "Modified loop\n"));
+  return result;
 }

-void LoopInvariantCodeMotion::runOnFunction() {
-  // Walk through all loops in a function in innermost-loop-first order.  This
+} // end anonymous namespace
+
+void LoopInvariantCodeMotion::runOnOperation() {
+  SideEffectsInterface interface(&getContext());
+  // Walk through all loops in a function in innermost-loop-first order. This
  // way, we first LICM from the inner loop, and place the ops in
  // the outer loop, which in turn can be further LICM'ed.
-  getFunction().walk([&](AffineForOp op) {
-    LLVM_DEBUG(op.getOperation()->print(llvm::dbgs() << "\nOriginal loop\n"));
-    runOnAffineForOp(op);
+  getOperation()->walk([&](Operation *op) {
+    if (auto looplike = dyn_cast<LoopLikeOpInterface>(op)) {
+      LLVM_DEBUG(op->print(llvm::dbgs() << "\nOriginal loop\n"));
+      if (failed(moveLoopInvariantCode(looplike, interface)))
+        signalPassFailure();
+    }
  });
 }

+// Include the generated code for the loop-like interface here, as it otherwise
+// has no compilation unit. This works as loop-invariant code motion is the
+// only user of that interface.
+#include "mlir/Transforms/LoopLikeInterface.cpp.inc"
+
+std::unique_ptr<Pass> mlir::createLoopInvariantCodeMotionPass() {
+  return std::make_unique<LoopInvariantCodeMotion>();
+}
+
 static PassRegistration<LoopInvariantCodeMotion>
-    pass("affine-loop-invariant-code-motion",
+    pass("loop-invariant-code-motion",
         "Hoist loop invariant instructions outside of the loop");
--- a/mlir/test/Transforms/affine-loop-invariant-code-motion.mlir
+++ b/mlir/test/Transforms/affine-loop-invariant-code-motion.mlir
@@ -0,0 +1,507 @@
+// RUN: mlir-opt %s -affine-loop-invariant-code-motion -split-input-file | FileCheck %s
+
+func @nested_loops_both_having_invariant_code() {
+  %m = alloc() : memref<10xf32>
+  %cf7 = constant 7.0 : f32
+  %cf8 = constant 8.0 : f32
+
+  affine.for %arg0 = 0 to 10 {
+    %v0 = addf %cf7, %cf8 : f32
+    affine.for %arg1 = 0 to 10 {
+      affine.store %v0, %m[%arg0] : memref<10xf32>
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
+  // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
+  // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
+
+  return
+}
+
+// The store-load forwarding can see through affine apply's since it relies on
+// dependence information.
+// CHECK-LABEL: func @store_affine_apply
+func @store_affine_apply() -> memref<10xf32> {
+  %cf7 = constant 7.0 : f32
+  %m = alloc() : memref<10xf32>
+  affine.for %arg0 = 0 to 10 {
+      %t0 = affine.apply (d1) -> (d1 + 1)(%arg0)
+      affine.store %cf7, %m[%t0] : memref<10xf32>
+  }
+  return %m : memref<10xf32>
+// CHECK:       %cst = constant 7.000000e+00 : f32
+// CHECK-NEXT:  %0 = alloc() : memref<10xf32>
+// CHECK-NEXT:  affine.for %arg0 = 0 to 10 {
+// CHECK-NEXT:      %1 = affine.apply #map3(%arg0)
+// CHECK-NEXT:      affine.store %cst, %0[%1] : memref<10xf32>
+// CHECK-NEXT:  }
+// CHECK-NEXT:  return %0 : memref<10xf32>
+}
+
+func @nested_loops_code_invariant_to_both() {
+  %m = alloc() : memref<10xf32>
+  %cf7 = constant 7.0 : f32
+  %cf8 = constant 8.0 : f32
+
+  affine.for %arg0 = 0 to 10 {
+    affine.for %arg1 = 0 to 10 {
+      %v0 = addf %cf7, %cf8 : f32
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
+  // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
+  // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
+
+  return
+}
+
+func @single_loop_nothing_invariant() {
+  %m1 = alloc() : memref<10xf32>
+  %m2 = alloc() : memref<10xf32>
+  affine.for %arg0 = 0 to 10 {
+    %v0 = affine.load %m1[%arg0] : memref<10xf32>
+    %v1 = affine.load %m2[%arg0] : memref<10xf32>
+    %v2 = addf %v0, %v1 : f32
+    affine.store %v2, %m1[%arg0] : memref<10xf32>
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %1 = alloc() : memref<10xf32>
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: %3 = affine.load %1[%arg0] : memref<10xf32>
+  // CHECK-NEXT: %4 = addf %2, %3 : f32
+  // CHECK-NEXT: affine.store %4, %0[%arg0] : memref<10xf32>
+
+  return
+}
+
+func @invariant_code_inside_affine_if() {
+  %m = alloc() : memref<10xf32>
+  %cf8 = constant 8.0 : f32
+
+  affine.for %arg0 = 0 to 10 {
+    %t0 = affine.apply (d1) -> (d1 + 1)(%arg0)
+    affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %t0) {
+        %cf9 = addf %cf8, %cf8 : f32
+        affine.store %cf9, %m[%arg0] : memref<10xf32>
+
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: %1 = affine.apply #map3(%arg0)
+  // CHECK-NEXT: affine.if #set0(%arg0, %1) {
+  // CHECK-NEXT: %2 = addf %cst, %cst : f32
+  // CHECK-NEXT: affine.store %2, %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: }
+
+
+  return
+}
+
+
+func @dependent_stores() {
+  %m = alloc() : memref<10xf32>
+  %cf7 = constant 7.0 : f32
+  %cf8 = constant 8.0 : f32
+
+  affine.for %arg0 = 0 to 10 {
+    %v0 = addf %cf7, %cf8 : f32
+    affine.for %arg1 = 0 to 10 {
+      %v1 = addf %cf7, %cf7 : f32
+      affine.store %v1, %m[%arg1] : memref<10xf32>
+      affine.store %v0, %m[%arg0] : memref<10xf32>
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
+  // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
+  // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
+  // CHECK-NEXT: %2 = addf %cst, %cst : f32
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+
+  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
+  // CHECK-NEXT:   affine.store %2, %0[%arg1] : memref<10xf32>
+  // CHECK-NEXT:   affine.store %1, %0[%arg0] : memref<10xf32>
+
+  return
+}
+
+func @independent_stores() {
+  %m = alloc() : memref<10xf32>
+  %cf7 = constant 7.0 : f32
+  %cf8 = constant 8.0 : f32
+
+  affine.for %arg0 = 0 to 10 {
+    %v0 = addf %cf7, %cf8 : f32
+    affine.for %arg1 = 0 to 10 {
+      %v1 = addf %cf7, %cf7 : f32
+      affine.store %v0, %m[%arg0] : memref<10xf32>
+      affine.store %v1, %m[%arg1] : memref<10xf32>
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
+  // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
+  // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
+  // CHECK-NEXT: %2 = addf %cst, %cst : f32
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT:   affine.for %arg1 = 0 to 10 {
+  // CHECK-NEXT:     affine.store %1, %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT:     affine.store %2, %0[%arg1] : memref<10xf32>
+  // CHECK-NEXT:    }
+
+  return
+}
+
+func @load_dependent_store() {
+  %m = alloc() : memref<10xf32>
+  %cf7 = constant 7.0 : f32
+  %cf8 = constant 8.0 : f32
+
+  affine.for %arg0 = 0 to 10 {
+    %v0 = addf %cf7, %cf8 : f32
+    affine.for %arg1 = 0 to 10 {
+      %v1 = addf %cf7, %cf7 : f32
+      affine.store %v0, %m[%arg1] : memref<10xf32>
+      %v2 = affine.load %m[%arg0] : memref<10xf32>
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
+  // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
+  // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
+  // CHECK-NEXT: %2 = addf %cst, %cst : f32
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
+  // CHECK-NEXT:   affine.store %1, %0[%arg1] : memref<10xf32>
+  // CHECK-NEXT:   %3 = affine.load %0[%arg0] : memref<10xf32>
+
+  return
+}
+
+func @load_after_load() {
+  %m = alloc() : memref<10xf32>
+  %cf7 = constant 7.0 : f32
+  %cf8 = constant 8.0 : f32
+
+  affine.for %arg0 = 0 to 10 {
+    %v0 = addf %cf7, %cf8 : f32
+    affine.for %arg1 = 0 to 10 {
+      %v1 = addf %cf7, %cf7 : f32
+      %v3 = affine.load %m[%arg1] : memref<10xf32>
+      %v2 = affine.load %m[%arg0] : memref<10xf32>
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
+  // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
+  // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
+  // CHECK-NEXT: %2 = addf %cst, %cst : f32
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
+  // CHECK-NEXT: %4 = affine.load %0[%arg1] : memref<10xf32>
+
+  return
+}
+
+func @invariant_affine_if() {
+  %m = alloc() : memref<10xf32>
+  %cf8 = constant 8.0 : f32
+  affine.for %arg0 = 0 to 10 {
+    affine.for %arg1 = 0 to 10 {
+      affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
+          %cf9 = addf %cf8, %cf8 : f32
+          affine.store %cf9, %m[%arg0] : memref<10xf32>
+
+      }
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
+  // CHECK-NEXT: %1 = addf %cst, %cst : f32
+  // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: }
+
+
+  return
+}
+
+func @invariant_affine_if2() {
+  %m = alloc() : memref<10xf32>
+  %cf8 = constant 8.0 : f32
+  affine.for %arg0 = 0 to 10 {
+    affine.for %arg1 = 0 to 10 {
+      affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
+          %cf9 = addf %cf8, %cf8 : f32
+          affine.store %cf9, %m[%arg1] : memref<10xf32>
+
+      }
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
+  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
+  // CHECK-NEXT: %1 = addf %cst, %cst : f32
+  // CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+
+
+  return
+}
+
+func @invariant_affine_nested_if() {
+  %m = alloc() : memref<10xf32>
+  %cf8 = constant 8.0 : f32
+  affine.for %arg0 = 0 to 10 {
+    affine.for %arg1 = 0 to 10 {
+      affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
+          %cf9 = addf %cf8, %cf8 : f32
+          affine.store %cf9, %m[%arg0] : memref<10xf32>
+          affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
+            affine.store %cf9, %m[%arg1] : memref<10xf32>
+          }
+      }
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
+  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
+  // CHECK-NEXT: %1 = addf %cst, %cst : f32
+  // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
+  // CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+
+
+  return
+}
+
+func @invariant_affine_nested_if_else() {
+  %m = alloc() : memref<10xf32>
+  %cf8 = constant 8.0 : f32
+  affine.for %arg0 = 0 to 10 {
+    affine.for %arg1 = 0 to 10 {
+      affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
+          %cf9 = addf %cf8, %cf8 : f32
+          affine.store %cf9, %m[%arg0] : memref<10xf32>
+          affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
+            affine.store %cf9, %m[%arg0] : memref<10xf32>
+          } else {
+            affine.store %cf9, %m[%arg1] : memref<10xf32>
+          }
+      }
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
+  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
+  // CHECK-NEXT: %1 = addf %cst, %cst : f32
+  // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
+  // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: } else {
+  // CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+
+
+  return
+}
+
+func @invariant_affine_nested_if_else2() {
+  %m = alloc() : memref<10xf32>
+  %m2 = alloc() : memref<10xf32>
+  %cf8 = constant 8.0 : f32
+  affine.for %arg0 = 0 to 10 {
+    affine.for %arg1 = 0 to 10 {
+      affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
+          %cf9 = addf %cf8, %cf8 : f32
+          %tload1 = affine.load %m[%arg0] : memref<10xf32>
+          affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
+            affine.store %cf9, %m2[%arg0] : memref<10xf32>
+          } else {
+            %tload2 = affine.load %m[%arg0] : memref<10xf32>
+          }
+      }
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %1 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
+  // CHECK-NEXT: %2 = addf %cst, %cst : f32
+  // CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
+  // CHECK-NEXT: affine.store %2, %1[%arg0] : memref<10xf32>
+  // CHECK-NEXT: } else {
+  // CHECK-NEXT: %4 = affine.load %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+
+
+  return
+}
+
+
+func @invariant_affine_nested_if2() {
+  %m = alloc() : memref<10xf32>
+  %cf8 = constant 8.0 : f32
+  affine.for %arg0 = 0 to 10 {
+    affine.for %arg1 = 0 to 10 {
+      affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
+          %cf9 = addf %cf8, %cf8 : f32
+          %v1 = affine.load %m[%arg0] : memref<10xf32>
+          affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
+            %v2 = affine.load %m[%arg0] : memref<10xf32>
+          }
+      }
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
+  // CHECK-NEXT: %1 = addf %cst, %cst : f32
+  // CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
+  // CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+
+
+  return
+}
+
+func @invariant_affine_for_inside_affine_if() {
+  %m = alloc() : memref<10xf32>
+  %cf8 = constant 8.0 : f32
+  affine.for %arg0 = 0 to 10 {
+    affine.for %arg1 = 0 to 10 {
+      affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
+          %cf9 = addf %cf8, %cf8 : f32
+          affine.store %cf9, %m[%arg0] : memref<10xf32>
+          affine.for %arg2 = 0 to 10 {
+            affine.store %cf9, %m[%arg2] : memref<10xf32>
+          }
+      }
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
+  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
+  // CHECK-NEXT: %1 = addf %cst, %cst : f32
+  // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: affine.for %arg2 = 0 to 10 {
+  // CHECK-NEXT: affine.store %1, %0[%arg2] : memref<10xf32>
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+  // CHECK-NEXT: }
+
+
+  return
+}
+
+
+func @invariant_constant_and_load() {
+  %m = alloc() : memref<100xf32>
+  %m2 = alloc() : memref<100xf32>
+  affine.for %arg0 = 0 to 5 {
+    %c0 = constant 0 : index
+    %v = affine.load %m2[%c0] : memref<100xf32>
+    affine.store %v, %m[%arg0] : memref<100xf32>
+  }
+
+  // CHECK: %0 = alloc() : memref<100xf32>
+  // CHECK-NEXT: %1 = alloc() : memref<100xf32>
+  // CHECK-NEXT: %c0 = constant 0 : index
+  // CHECK-NEXT: %2 = affine.load %1[%c0] : memref<100xf32>
+  // CHECK-NEXT: affine.for %arg0 = 0 to 5 {
+  // CHECK-NEXT:  affine.store %2, %0[%arg0] : memref<100xf32>
+
+
+  return
+}
+
+
+func @nested_load_store_same_memref() {
+  %m = alloc() : memref<10xf32>
+  %cst = constant 8.0 : f32
+  %c0 = constant 0 : index
+   affine.for %arg0 = 0 to 10 {
+    %v0 = affine.load %m[%c0] : memref<10xf32>
+    affine.for %arg1 = 0 to 10 {
+      affine.store %cst, %m[%arg1] : memref<10xf32>
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
+  // CHECK-NEXT: %c0 = constant 0 : index
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT:  %1 = affine.load %0[%c0] : memref<10xf32>
+  // CHECK-NEXT:   affine.for %arg1 = 0 to 10 {
+  // CHECK-NEXT:    affine.store %cst, %0[%arg1] : memref<10xf32>
+
+
+  return
+}
+
+
+func @nested_load_store_same_memref2() {
+  %m = alloc() : memref<10xf32>
+  %cst = constant 8.0 : f32
+  %c0 = constant 0 : index
+   affine.for %arg0 = 0 to 10 {
+     affine.store %cst, %m[%c0] : memref<10xf32>
+      affine.for %arg1 = 0 to 10 {
+        %v0 = affine.load %m[%arg0] : memref<10xf32>
+    }
+  }
+
+  // CHECK: %0 = alloc() : memref<10xf32>
+  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
+  // CHECK-NEXT: %c0 = constant 0 : index
+  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
+  // CHECK-NEXT:   affine.store %cst, %0[%c0] : memref<10xf32>
+  // CHECK-NEXT:   %1 = affine.load %0[%arg0] : memref<10xf32>
+
+
+  return
+}
--- a/mlir/test/Transforms/loop-invariant-code-motion.mlir
+++ b/mlir/test/Transforms/loop-invariant-code-motion.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -affine-loop-invariant-code-motion -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -loop-invariant-code-motion -split-input-file | FileCheck %s

 func @nested_loops_both_having_invariant_code() {
  %m = alloc() : memref<10xf32>
@@ -8,40 +8,23 @@ func @nested_loops_both_having_invariant_code() {
  affine.for %arg0 = 0 to 10 {
    %v0 = addf %cf7, %cf8 : f32
    affine.for %arg1 = 0 to 10 {
+      %v1 = addf %v0, %cf8 : f32
      affine.store %v0, %m[%arg0] : memref<10xf32>
    }
  }

  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
-  // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
-  // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-  // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: %[[CST0:.*]] = constant 7.000000e+00 : f32
+  // CHECK-NEXT: %[[CST1:.*]] = constant 8.000000e+00 : f32
+  // CHECK-NEXT: %[[ADD0:.*]] = addf %[[CST0]], %[[CST1]] : f32
+  // CHECK-NEXT: addf %[[ADD0]], %[[CST1]] : f32
+  // CHECK-NEXT: affine.for
+  // CHECK-NEXT: affine.for
+  // CHECK-NEXT: affine.store

  return
 }

-// The store-load forwarding can see through affine apply's since it relies on
-// dependence information.
-// CHECK-LABEL: func @store_affine_apply
-func @store_affine_apply() -> memref<10xf32> {
-  %cf7 = constant 7.0 : f32
-  %m = alloc() : memref<10xf32>
-  affine.for %arg0 = 0 to 10 {
-      %t0 = affine.apply (d1) -> (d1 + 1)(%arg0)
-      affine.store %cf7, %m[%t0] : memref<10xf32>
-  }
-  return %m : memref<10xf32>
-// CHECK:       %cst = constant 7.000000e+00 : f32
-// CHECK-NEXT:  %0 = alloc() : memref<10xf32>
-// CHECK-NEXT:  affine.for %arg0 = 0 to 10 {
-// CHECK-NEXT:      %1 = affine.apply #map3(%arg0)
-// CHECK-NEXT:      affine.store %cst, %0[%1] : memref<10xf32>
-// CHECK-NEXT:  }
-// CHECK-NEXT:  return %0 : memref<10xf32>
-}
-
 func @nested_loops_code_invariant_to_both() {
  %m = alloc() : memref<10xf32>
  %cf7 = constant 7.0 : f32
@@ -108,117 +91,6 @@ func @invariant_code_inside_affine_if() {
  return
 }

-
-func @dependent_stores() {
-  %m = alloc() : memref<10xf32>
-  %cf7 = constant 7.0 : f32
-  %cf8 = constant 8.0 : f32
-
-  affine.for %arg0 = 0 to 10 {
-    %v0 = addf %cf7, %cf8 : f32
-    affine.for %arg1 = 0 to 10 {
-      %v1 = addf %cf7, %cf7 : f32
-      affine.store %v1, %m[%arg1] : memref<10xf32>
-      affine.store %v0, %m[%arg0] : memref<10xf32>
-    }
-  }
-
-  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
-  // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
-  // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
-  // CHECK-NEXT: %2 = addf %cst, %cst : f32
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-
-  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
-  // CHECK-NEXT:   affine.store %2, %0[%arg1] : memref<10xf32>
-  // CHECK-NEXT:   affine.store %1, %0[%arg0] : memref<10xf32>
-
-  return
-}
-
-func @independent_stores() {
-  %m = alloc() : memref<10xf32>
-  %cf7 = constant 7.0 : f32
-  %cf8 = constant 8.0 : f32
-
-  affine.for %arg0 = 0 to 10 {
-    %v0 = addf %cf7, %cf8 : f32
-    affine.for %arg1 = 0 to 10 {
-      %v1 = addf %cf7, %cf7 : f32
-      affine.store %v0, %m[%arg0] : memref<10xf32>
-      affine.store %v1, %m[%arg1] : memref<10xf32>
-    }
-  }
-
-  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
-  // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
-  // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
-  // CHECK-NEXT: %2 = addf %cst, %cst : f32
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-  // CHECK-NEXT:   affine.for %arg1 = 0 to 10 {
-  // CHECK-NEXT:     affine.store %1, %0[%arg0] : memref<10xf32>
-  // CHECK-NEXT:     affine.store %2, %0[%arg1] : memref<10xf32>
-  // CHECK-NEXT:    }
-
-  return
-}
-
-func @load_dependent_store() {
-  %m = alloc() : memref<10xf32>
-  %cf7 = constant 7.0 : f32
-  %cf8 = constant 8.0 : f32
-
-  affine.for %arg0 = 0 to 10 {
-    %v0 = addf %cf7, %cf8 : f32
-    affine.for %arg1 = 0 to 10 {
-      %v1 = addf %cf7, %cf7 : f32
-      affine.store %v0, %m[%arg1] : memref<10xf32>
-      %v2 = affine.load %m[%arg0] : memref<10xf32>
-    }
-  }
-
-  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
-  // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
-  // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
-  // CHECK-NEXT: %2 = addf %cst, %cst : f32
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
-  // CHECK-NEXT:   affine.store %1, %0[%arg1] : memref<10xf32>
-  // CHECK-NEXT:   %3 = affine.load %0[%arg0] : memref<10xf32>
-
-  return
-}
-
-func @load_after_load() {
-  %m = alloc() : memref<10xf32>
-  %cf7 = constant 7.0 : f32
-  %cf8 = constant 8.0 : f32
-
-  affine.for %arg0 = 0 to 10 {
-    %v0 = addf %cf7, %cf8 : f32
-    affine.for %arg1 = 0 to 10 {
-      %v1 = addf %cf7, %cf7 : f32
-      %v3 = affine.load %m[%arg1] : memref<10xf32>
-      %v2 = affine.load %m[%arg0] : memref<10xf32>
-    }
-  }
-
-  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
-  // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
-  // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32
-  // CHECK-NEXT: %2 = addf %cst, %cst : f32
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-  // CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
-  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
-  // CHECK-NEXT: %4 = affine.load %0[%arg1] : memref<10xf32>
-
-  return
-}
-
 func @invariant_affine_if() {
  %m = alloc() : memref<10xf32>
  %cf8 = constant 8.0 : f32
@@ -226,21 +98,17 @@ func @invariant_affine_if() {
    affine.for %arg1 = 0 to 10 {
      affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
          %cf9 = addf %cf8, %cf8 : f32
-          affine.store %cf9, %m[%arg0] : memref<10xf32>
-
      }
    }
  }

  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
-  // CHECK-NEXT: %1 = addf %cst, %cst : f32
-  // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
+  // CHECK-NEXT: %[[CST:.*]] = constant 8.000000e+00 : f32
+  // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 {
+  // CHECK-NEXT: affine.if #set0(%[[ARG]], %[[ARG]]) {
+  // CHECK-NEXT: addf %[[CST]], %[[CST]] : f32
  // CHECK-NEXT: }

-
  return
 }

@@ -252,22 +120,20 @@ func @invariant_affine_if2() {
      affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
          %cf9 = addf %cf8, %cf8 : f32
          affine.store %cf9, %m[%arg1] : memref<10xf32>
-
      }
    }
  }

-  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
-  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
-  // CHECK-NEXT: %1 = addf %cst, %cst : f32
-  // CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
+  // CHECK: alloc
+  // CHECK-NEXT: constant
+  // CHECK-NEXT: affine.for
+  // CHECK-NEXT: affine.for
+  // CHECK-NEXT: affine.if
+  // CHECK-NEXT: addf
+  // CHECK-NEXT: affine.store
  // CHECK-NEXT: }
  // CHECK-NEXT: }

-
  return
 }

@@ -278,23 +144,21 @@ func @invariant_affine_nested_if() {
    affine.for %arg1 = 0 to 10 {
      affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
          %cf9 = addf %cf8, %cf8 : f32
-          affine.store %cf9, %m[%arg0] : memref<10xf32>
          affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
-            affine.store %cf9, %m[%arg1] : memref<10xf32>
+            %cf10 = addf %cf9, %cf9 : f32
          }
      }
    }
  }

-  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
-  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
-  // CHECK-NEXT: %1 = addf %cst, %cst : f32
-  // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
-  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
-  // CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
+  // CHECK: alloc
+  // CHECK-NEXT: constant
+  // CHECK-NEXT: affine.for
+  // CHECK-NEXT: affine.for
+  // CHECK-NEXT: affine.if
+  // CHECK-NEXT: addf
+  // CHECK-NEXT: affine.if
+  // CHECK-NEXT: addf
  // CHECK-NEXT: }
  // CHECK-NEXT: }
  // CHECK-NEXT: }
@@ -312,7 +176,7 @@ func @invariant_affine_nested_if_else() {
          %cf9 = addf %cf8, %cf8 : f32
          affine.store %cf9, %m[%arg0] : memref<10xf32>
          affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
-            affine.store %cf9, %m[%arg0] : memref<10xf32>
+            %cf10 = addf %cf9, %cf9 : f32
          } else {
            affine.store %cf9, %m[%arg1] : memref<10xf32>
          }
@@ -320,17 +184,17 @@ func @invariant_affine_nested_if_else() {
    }
  }

-  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
-  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
-  // CHECK-NEXT: %1 = addf %cst, %cst : f32
-  // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
-  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
-  // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
+  // CHECK: alloc
+  // CHECK-NEXT: constant
+  // CHECK-NEXT: affine.for
+  // CHECK-NEXT: affine.for
+  // CHECK-NEXT: affine.if
+  // CHECK-NEXT: addf
+  // CHECK-NEXT: affine.store
+  // CHECK-NEXT: affine.if
+  // CHECK-NEXT: addf
  // CHECK-NEXT: } else {
-  // CHECK-NEXT: affine.store %1, %0[%arg1] : memref<10xf32>
+  // CHECK-NEXT: affine.store
  // CHECK-NEXT: }
  // CHECK-NEXT: }
  // CHECK-NEXT: }
@@ -339,169 +203,42 @@ func @invariant_affine_nested_if_else() {
  return
 }

-func @invariant_affine_nested_if_else2() {
+func @invariant_loop_dialect() {
+  %ci0 = constant 0 : index
+  %ci10 = constant 10 : index
+  %ci1 = constant 1 : index
  %m = alloc() : memref<10xf32>
-  %m2 = alloc() : memref<10xf32>
+  %cf7 = constant 7.0 : f32
  %cf8 = constant 8.0 : f32
-  affine.for %arg0 = 0 to 10 {
-    affine.for %arg1 = 0 to 10 {
-      affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
-          %cf9 = addf %cf8, %cf8 : f32
-          %tload1 = affine.load %m[%arg0] : memref<10xf32>
-          affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
-            affine.store %cf9, %m2[%arg0] : memref<10xf32>
-          } else {
-            %tload2 = affine.load %m[%arg0] : memref<10xf32>
-          }
-      }
+  loop.for %arg0 = %ci0 to %ci10 step %ci1 {
+    loop.for %arg1 = %ci0 to %ci10 step %ci1 {
+      %v0 = addf %cf7, %cf8 : f32
    }
  }

  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %1 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
-  // CHECK-NEXT: %2 = addf %cst, %cst : f32
-  // CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
-  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
-  // CHECK-NEXT: affine.store %2, %1[%arg0] : memref<10xf32>
-  // CHECK-NEXT: } else {
-  // CHECK-NEXT: %4 = affine.load %0[%arg0] : memref<10xf32>
-  // CHECK-NEXT: }
-  // CHECK-NEXT: }
-
+  // CHECK-NEXT: %cst = constant 7.000000e+00 : f32
+  // CHECK-NEXT: %cst_0 = constant 8.000000e+00 : f32
+  // CHECK-NEXT: %1 = addf %cst, %cst_0 : f32

  return
 }

-
-func @invariant_affine_nested_if2() {
+func @variant_loop_dialect() {
+  %ci0 = constant 0 : index
+  %ci10 = constant 10 : index
+  %ci1 = constant 1 : index
  %m = alloc() : memref<10xf32>
-  %cf8 = constant 8.0 : f32
-  affine.for %arg0 = 0 to 10 {
-    affine.for %arg1 = 0 to 10 {
-      affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
-          %cf9 = addf %cf8, %cf8 : f32
-          %v1 = affine.load %m[%arg0] : memref<10xf32>
-          affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
-            %v2 = affine.load %m[%arg0] : memref<10xf32>
-          }
-      }
+  loop.for %arg0 = %ci0 to %ci10 step %ci1 {
+    loop.for %arg1 = %ci0 to %ci10 step %ci1 {
+      %v0 = addi %arg0, %arg1 : index
    }
  }

  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
-  // CHECK-NEXT: %1 = addf %cst, %cst : f32
-  // CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32>
-  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
-  // CHECK-NEXT: %3 = affine.load %0[%arg0] : memref<10xf32>
-  // CHECK-NEXT: }
-  // CHECK-NEXT: }
-
-
-  return
-}
-
-func @invariant_affine_for_inside_affine_if() {
-  %m = alloc() : memref<10xf32>
-  %cf8 = constant 8.0 : f32
-  affine.for %arg0 = 0 to 10 {
-    affine.for %arg1 = 0 to 10 {
-      affine.if (d0, d1) : (d1 - d0 >= 0) (%arg0, %arg0) {
-          %cf9 = addf %cf8, %cf8 : f32
-          affine.store %cf9, %m[%arg0] : memref<10xf32>
-          affine.for %arg2 = 0 to 10 {
-            affine.store %cf9, %m[%arg2] : memref<10xf32>
-          }
-      }
-    }
-  }
-
-  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-  // CHECK-NEXT: affine.for %arg1 = 0 to 10 {
-  // CHECK-NEXT: affine.if #set0(%arg0, %arg0) {
-  // CHECK-NEXT: %1 = addf %cst, %cst : f32
-  // CHECK-NEXT: affine.store %1, %0[%arg0] : memref<10xf32>
-  // CHECK-NEXT: affine.for %arg2 = 0 to 10 {
-  // CHECK-NEXT: affine.store %1, %0[%arg2] : memref<10xf32>
-  // CHECK-NEXT: }
-  // CHECK-NEXT: }
-  // CHECK-NEXT: }
-
-
-  return
-}
-
-
-func @invariant_constant_and_load() {
-  %m = alloc() : memref<100xf32>
-  %m2 = alloc() : memref<100xf32>
-  affine.for %arg0 = 0 to 5 {
-    %c0 = constant 0 : index
-    %v = affine.load %m2[%c0] : memref<100xf32>
-    affine.store %v, %m[%arg0] : memref<100xf32>
-  }
-
-  // CHECK: %0 = alloc() : memref<100xf32>
-  // CHECK-NEXT: %1 = alloc() : memref<100xf32>
-  // CHECK-NEXT: %c0 = constant 0 : index
-  // CHECK-NEXT: %2 = affine.load %1[%c0] : memref<100xf32>
-  // CHECK-NEXT: affine.for %arg0 = 0 to 5 {
-  // CHECK-NEXT:  affine.store %2, %0[%arg0] : memref<100xf32>
-
-
-  return
-}
-
-
-func @nested_load_store_same_memref() {
-  %m = alloc() : memref<10xf32>
-  %cst = constant 8.0 : f32
-  %c0 = constant 0 : index
-   affine.for %arg0 = 0 to 10 {
-    %v0 = affine.load %m[%c0] : memref<10xf32>
-    affine.for %arg1 = 0 to 10 {
-      affine.store %cst, %m[%arg1] : memref<10xf32>
-    }
-  }
-
-  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
-  // CHECK-NEXT: %c0 = constant 0 : index
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-  // CHECK-NEXT:  %1 = affine.load %0[%c0] : memref<10xf32>
-  // CHECK-NEXT:   affine.for %arg1 = 0 to 10 {
-  // CHECK-NEXT:    affine.store %cst, %0[%arg1] : memref<10xf32>
-
-
-  return
-}
-
-
-func @nested_load_store_same_memref2() {
-  %m = alloc() : memref<10xf32>
-  %cst = constant 8.0 : f32
-  %c0 = constant 0 : index
-   affine.for %arg0 = 0 to 10 {
-     affine.store %cst, %m[%c0] : memref<10xf32>
-      affine.for %arg1 = 0 to 10 {
-        %v0 = affine.load %m[%arg0] : memref<10xf32>
-    }
-  }
-
-  // CHECK: %0 = alloc() : memref<10xf32>
-  // CHECK-NEXT: %cst = constant 8.000000e+00 : f32
-  // CHECK-NEXT: %c0 = constant 0 : index
-  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
-  // CHECK-NEXT:   affine.store %cst, %0[%c0] : memref<10xf32>
-  // CHECK-NEXT:   %1 = affine.load %0[%arg0] : memref<10xf32>
-
+  // CHECK-NEXT: loop.for
+  // CHECK-NEXT: loop.for
+  // CHECK-NEXT: addi

  return
 }