//===- BufferPlacement.cpp - the impl for buffer placement ---------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements logic for computing correct alloc and dealloc positions. // Furthermore, buffer placement also adds required new alloc and copy // operations to ensure that all buffers are deallocated.The main class is the // BufferPlacementPass class that implements the underlying algorithm. In order // to put allocations and deallocations at safe positions, it is significantly // important to put them into the correct blocks. However, the liveness analysis // does not pay attention to aliases, which can occur due to branches (and their // associated block arguments) in general. For this purpose, BufferPlacement // firstly finds all possible aliases for a single value (using the // BufferPlacementAliasAnalysis class). Consider the following example: // // ^bb0(%arg0): // cond_br %cond, ^bb1, ^bb2 // ^bb1: // br ^exit(%arg0) // ^bb2: // %new_value = ... // br ^exit(%new_value) // ^exit(%arg1): // return %arg1; // // Using liveness information on its own would cause us to place the allocs and // deallocs in the wrong block. This is due to the fact that %new_value will not // be liveOut of its block. Instead, we can place the alloc for %new_value // in bb0 and its associated dealloc in exit. Alternatively, the alloc can stay // (or even has to stay due to additional dependencies) at this location and we // have to free the buffer in the same block, because it cannot be freed in the // post dominator. However, this requires a new copy buffer for %arg1 that will // contain the actual contents. Using the class BufferPlacementAliasAnalysis, we // will find out that %new_value has a potential alias %arg1. In order to find // the dealloc position we have to find all potential aliases, iterate over // their uses and find the common post-dominator block (note that additional // copies and buffers remove potential aliases and will influence the placement // of the deallocs). In all cases, the computed block can be safely used to free // the %new_value buffer (may be exit or bb2) as it will die and we can use // liveness information to determine the exact operation after which we have to // insert the dealloc. Finding the alloc position is similar and non-obvious. // However, the algorithm supports moving allocs to other places and introducing // copy buffers and placing deallocs in safe places to ensure that all buffers // will be freed in the end. // // TODO: // The current implementation does not support loops and the resulting code will // be invalid with respect to program semantics. The only thing that is // currently missing is a high-level loop analysis that allows us to move allocs // and deallocs outside of the loop blocks. Furthermore, it doesn't also accept // functions which return buffers already. // //===----------------------------------------------------------------------===// #include "mlir/Transforms/BufferPlacement.h" #include "mlir/Dialect/Linalg/IR/LinalgOps.h" #include "mlir/IR/Operation.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/Passes.h" #include "llvm/ADT/SetOperations.h" using namespace mlir; /// Walks over all immediate return-like terminators in the given region. template static void walkReturnOperations(Region *region, const FuncT &func) { for (Block &block : *region) for (Operation &operation : block) { // Skip non-return-like terminators. if (operation.hasTrait()) func(&operation); } } namespace { //===----------------------------------------------------------------------===// // BufferPlacementAliasAnalysis //===----------------------------------------------------------------------===// /// A straight-forward alias analysis which ensures that all aliases of all /// values will be determined. This is a requirement for the BufferPlacement /// class since you need to determine safe positions to place alloc and /// deallocs. class BufferPlacementAliasAnalysis { public: using ValueSetT = SmallPtrSet; using ValueMapT = llvm::DenseMap; public: /// Constructs a new alias analysis using the op provided. BufferPlacementAliasAnalysis(Operation *op) { build(op); } /// Find all immediate aliases this value could potentially have. ValueMapT::const_iterator find(Value value) const { return aliases.find(value); } /// Returns the end iterator that can be used in combination with find. ValueMapT::const_iterator end() const { return aliases.end(); } /// Find all immediate and indirect aliases this value could potentially /// have. Note that the resulting set will also contain the value provided as /// it is an alias of itself. ValueSetT resolve(Value value) const { ValueSetT result; resolveRecursive(value, result); return result; } /// Removes the given values from all alias sets. void remove(const SmallPtrSetImpl &aliasValues) { for (auto &entry : aliases) llvm::set_subtract(entry.second, aliasValues); } private: /// Recursively determines alias information for the given value. It stores /// all newly found potential aliases in the given result set. void resolveRecursive(Value value, ValueSetT &result) const { if (!result.insert(value).second) return; auto it = aliases.find(value); if (it == aliases.end()) return; for (Value alias : it->second) resolveRecursive(alias, result); } /// This function constructs a mapping from values to its immediate aliases. /// It iterates over all blocks, gets their predecessors, determines the /// values that will be passed to the corresponding block arguments and /// inserts them into the underlying map. Furthermore, it wires successor /// regions and branch-like return operations from nested regions. void build(Operation *op) { // Registers all aliases of the given values. auto registerAliases = [&](auto values, auto aliases) { for (auto entry : llvm::zip(values, aliases)) this->aliases[std::get<0>(entry)].insert(std::get<1>(entry)); }; // Add additional aliases created by view changes to the alias list. op->walk([&](ViewLikeOpInterface viewInterface) { aliases[viewInterface.getViewSource()].insert( viewInterface.getOperation()->getResult(0)); }); // Query all branch interfaces to link block argument aliases. op->walk([&](BranchOpInterface branchInterface) { Block *parentBlock = branchInterface.getOperation()->getBlock(); for (auto it = parentBlock->succ_begin(), e = parentBlock->succ_end(); it != e; ++it) { // Query the branch op interface to get the successor operands. auto successorOperands = branchInterface.getSuccessorOperands(it.getIndex()); if (!successorOperands.hasValue()) continue; // Build the actual mapping of values to their immediate aliases. registerAliases(successorOperands.getValue(), (*it)->getArguments()); } }); // Query the RegionBranchOpInterface to find potential successor regions. op->walk([&](RegionBranchOpInterface regionInterface) { // Create an empty attribute for each operand to comply with the // `getSuccessorRegions` interface definition that requires a single // attribute per operand. SmallVector operandAttributes( regionInterface.getOperation()->getNumOperands()); // Extract all entry regions and wire all initial entry successor inputs. SmallVector entrySuccessors; regionInterface.getSuccessorRegions(/*index=*/llvm::None, operandAttributes, entrySuccessors); for (RegionSuccessor &entrySuccessor : entrySuccessors) { // Wire the entry region's successor arguments with the initial // successor inputs. assert(entrySuccessor.getSuccessor() && "Invalid entry region without an attached successor region"); registerAliases(regionInterface.getSuccessorEntryOperands( entrySuccessor.getSuccessor()->getRegionNumber()), entrySuccessor.getSuccessorInputs()); } // Wire flow between regions and from region exits. for (Region ®ion : regionInterface.getOperation()->getRegions()) { // Iterate over all successor region entries that are reachable from the // current region. SmallVector successorRegions; regionInterface.getSuccessorRegions( region.getRegionNumber(), operandAttributes, successorRegions); for (RegionSuccessor &successorRegion : successorRegions) { // Iterate over all immediate terminator operations and wire the // successor inputs with the operands of each terminator. walkReturnOperations(®ion, [&](Operation *terminator) { registerAliases(terminator->getOperands(), successorRegion.getSuccessorInputs()); }); } } }); } /// Maps values to all immediate aliases this value can have. ValueMapT aliases; }; //===----------------------------------------------------------------------===// // BufferPlacement //===----------------------------------------------------------------------===// // The main buffer placement analysis used to place allocs, copies and deallocs. class BufferPlacement { public: using ValueSetT = BufferPlacementAliasAnalysis::ValueSetT; /// An intermediate representation of a single allocation node. struct AllocEntry { /// A reference to the associated allocation node. Value allocValue; /// The associated placement block in which the allocation should be /// performed. Block *placementBlock; /// The associated dealloc operation (if any). Operation *deallocOperation; }; using AllocEntryList = SmallVector; public: BufferPlacement(Operation *op) : operation(op), aliases(op), liveness(op), dominators(op), postDominators(op) { // Gather all allocation nodes initBlockMapping(); } /// Performs the actual placement/creation of all alloc, copy and dealloc /// nodes. void place() { // Place all allocations. placeAllocs(); // Add additional allocations and copies that are required. introduceCopies(); // Find all associated dealloc nodes. findDeallocs(); // Place deallocations for all allocation entries. placeDeallocs(); } private: /// Initializes the internal block mapping by discovering allocation nodes. It /// maps all allocation nodes to their initial block in which they can be /// safely allocated. void initBlockMapping() { operation->walk([&](MemoryEffectOpInterface opInterface) { // Try to find a single allocation result. SmallVector effects; opInterface.getEffects(effects); SmallVector allocateResultEffects; llvm::copy_if(effects, std::back_inserter(allocateResultEffects), [=](MemoryEffects::EffectInstance &it) { Value value = it.getValue(); return isa(it.getEffect()) && value && value.isa(); }); // If there is one result only, we will be able to move the allocation and // (possibly existing) deallocation ops. if (allocateResultEffects.size() != 1) return; // Get allocation result. auto allocResult = allocateResultEffects[0].getValue().cast(); // Find the initial allocation block and register this result. allocs.push_back( {allocResult, getInitialAllocBlock(allocResult), nullptr}); }); } /// Computes a valid allocation position in a dominator (if possible) for the /// given allocation result. Block *getInitialAllocBlock(OpResult result) { // Get all allocation operands as these operands are important for the // allocation operation. Operation *owner = result.getOwner(); auto operands = owner->getOperands(); Block *dominator; if (operands.size() < 1) dominator = findCommonDominator(result, aliases.resolve(result), dominators); else { // If this node has dependencies, check all dependent nodes with respect // to a common post dominator in which all values are available. ValueSetT dependencies(++operands.begin(), operands.end()); dominator = findCommonDominator(*operands.begin(), dependencies, postDominators); } // Do not move allocs out of their parent regions to keep them local. if (dominator->getParent() != owner->getParentRegion()) return &owner->getParentRegion()->front(); return dominator; } /// Finds correct alloc positions according to the algorithm described at /// the top of the file for all alloc nodes that can be handled by this /// analysis. void placeAllocs() const { for (const AllocEntry &entry : allocs) { Value alloc = entry.allocValue; // Get the actual block to place the alloc and get liveness information // for the placement block. Block *placementBlock = entry.placementBlock; // We have to ensure that we place the alloc before its first use in this // block. const LivenessBlockInfo *livenessInfo = liveness.getLiveness(placementBlock); Operation *startOperation = livenessInfo->getStartOperation(alloc); // Check whether the start operation lies in the desired placement block. // If not, we will use the terminator as this is the last operation in // this block. if (startOperation->getBlock() != placementBlock) startOperation = placementBlock->getTerminator(); // Move the alloc in front of the start operation. Operation *allocOperation = alloc.getDefiningOp(); allocOperation->moveBefore(startOperation); } } /// Introduces required allocs and copy operations to avoid memory leaks. void introduceCopies() { // Initialize the set of values that require a dedicated memory free // operation since their operands cannot be safely deallocated in a post // dominator. SmallPtrSet valuesToFree; llvm::SmallDenseSet> visitedValues; SmallVector, 8> toProcess; // Check dominance relation for proper dominance properties. If the given // value node does not dominate an alias, we will have to create a copy in // order to free all buffers that can potentially leak into a post // dominator. auto findUnsafeValues = [&](Value source, Block *definingBlock) { auto it = aliases.find(source); if (it == aliases.end()) return; for (Value value : it->second) { if (valuesToFree.count(value) > 0) continue; // Check whether we have to free this particular block argument. if (!dominators.dominates(definingBlock, value.getParentBlock())) { toProcess.emplace_back(value, value.getParentBlock()); valuesToFree.insert(value); } else if (visitedValues.insert(std::make_tuple(value, definingBlock)) .second) toProcess.emplace_back(value, definingBlock); } }; // Detect possibly unsafe aliases starting from all allocations. for (auto &entry : allocs) findUnsafeValues(entry.allocValue, entry.placementBlock); // Try to find block arguments that require an explicit free operation // until we reach a fix point. while (!toProcess.empty()) { auto current = toProcess.pop_back_val(); findUnsafeValues(std::get<0>(current), std::get<1>(current)); } // Update buffer aliases to ensure that we free all buffers and block // arguments at the correct locations. aliases.remove(valuesToFree); // Add new allocs and additional copy operations. for (Value value : valuesToFree) { if (auto blockArg = value.dyn_cast()) introduceBlockArgCopy(blockArg); else introduceValueCopyForRegionResult(value); // Register the value to require a final dealloc. Note that we do not have // to assign a block here since we do not want to move the allocation node // to another location. allocs.push_back({value, nullptr, nullptr}); } } /// Introduces temporary allocs in all predecessors and copies the source /// values into the newly allocated buffers. void introduceBlockArgCopy(BlockArgument blockArg) { // Allocate a buffer for the current block argument in the block of // the associated value (which will be a predecessor block by // definition). Block *block = blockArg.getOwner(); for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) { // Get the terminator and the value that will be passed to our // argument. Operation *terminator = (*it)->getTerminator(); auto branchInterface = cast(terminator); // Query the associated source value. Value sourceValue = branchInterface.getSuccessorOperands(it.getSuccessorIndex()) .getValue()[blockArg.getArgNumber()]; // Create a new alloc and copy at the current location of the terminator. Value alloc = introduceBufferCopy(sourceValue, terminator); // Wire new alloc and successor operand. auto mutableOperands = branchInterface.getMutableSuccessorOperands(it.getSuccessorIndex()); if (!mutableOperands.hasValue()) terminator->emitError() << "terminators with immutable successor " "operands are not supported"; else mutableOperands.getValue() .slice(blockArg.getArgNumber(), 1) .assign(alloc); } // Check whether the block argument has implicitly defined predecessors via // the RegionBranchOpInterface. This can be the case if the current block // argument belongs to the first block in a region and the parent operation // implements the RegionBranchOpInterface. Region *argRegion = block->getParent(); RegionBranchOpInterface regionInterface; if (!argRegion || &argRegion->front() != block || !(regionInterface = dyn_cast(argRegion->getParentOp()))) return; introduceCopiesForRegionSuccessors( regionInterface, argRegion->getParentOp()->getRegions(), [&](RegionSuccessor &successorRegion) { // Find a predecessor of our argRegion. return successorRegion.getSuccessor() == argRegion; }, [&](RegionSuccessor &successorRegion) { // The operand index will be the argument number. return blockArg.getArgNumber(); }); } /// Introduces temporary allocs in front of all associated nested-region /// terminators and copies the source values into the newly allocated buffers. void introduceValueCopyForRegionResult(Value value) { // Get the actual result index in the scope of the parent terminator. Operation *operation = value.getDefiningOp(); auto regionInterface = cast(operation); introduceCopiesForRegionSuccessors( regionInterface, operation->getRegions(), [&](RegionSuccessor &successorRegion) { // Determine whether this region has a successor entry that leaves // this region by returning to its parent operation. return !successorRegion.getSuccessor(); }, [&](RegionSuccessor &successorRegion) { // Find the associated success input index. return llvm::find(successorRegion.getSuccessorInputs(), value) .getIndex(); }); } /// Introduces buffer copies for all terminators in the given regions. The /// regionPredicate is applied to every successor region in order to restrict /// the copies to specific regions. Thereby, the operandProvider is invoked /// for each matching region successor and determines the operand index that /// requires a buffer copy. template void introduceCopiesForRegionSuccessors(RegionBranchOpInterface regionInterface, MutableArrayRef regions, const TPredicate ®ionPredicate, const TOperandProvider &operandProvider) { // Create an empty attribute for each operand to comply with the // `getSuccessorRegions` interface definition that requires a single // attribute per operand. SmallVector operandAttributes( regionInterface.getOperation()->getNumOperands()); for (Region ®ion : regions) { // Query the regionInterface to get all successor regions of the current // one. SmallVector successorRegions; regionInterface.getSuccessorRegions(region.getRegionNumber(), operandAttributes, successorRegions); // Try to find a matching region successor. RegionSuccessor *regionSuccessor = llvm::find_if(successorRegions, regionPredicate); if (regionSuccessor == successorRegions.end()) continue; // Get the operand index in the context of the current successor input // bindings. auto operandIndex = operandProvider(*regionSuccessor); // Iterate over all immediate terminator operations to introduce // new buffer allocations. Thereby, the appropriate terminator operand // will be adjusted to point to the newly allocated buffer instead. walkReturnOperations(®ion, [&](Operation *terminator) { // Extract the source value from the current terminator. Value sourceValue = terminator->getOperand(operandIndex); // Create a new alloc at the current location of the terminator. Value alloc = introduceBufferCopy(sourceValue, terminator); // Wire alloc and terminator operand. terminator->setOperand(operandIndex, alloc); }); } } /// Creates a new memory allocation for the given source value and copies /// its content into the newly allocated buffer. The terminator operation is /// used to insert the alloc and copy operations at the right places. Value introduceBufferCopy(Value sourceValue, Operation *terminator) { // Create a new alloc at the current location of the terminator. auto memRefType = sourceValue.getType().cast(); OpBuilder builder(terminator); // Extract information about dynamically shaped types by // extracting their dynamic dimensions. SmallVector dynamicOperands; for (auto shapeElement : llvm::enumerate(memRefType.getShape())) { if (!ShapedType::isDynamic(shapeElement.value())) continue; dynamicOperands.push_back(builder.create( terminator->getLoc(), sourceValue, shapeElement.index())); } // TODO: provide a generic interface to create dialect-specific // Alloc and CopyOp nodes. auto alloc = builder.create(terminator->getLoc(), memRefType, dynamicOperands); // Create a new copy operation that copies to contents of the old // allocation to the new one. builder.create(terminator->getLoc(), sourceValue, alloc); return alloc; } /// Finds associated deallocs that can be linked to our allocation nodes (if /// any). void findDeallocs() { for (auto &entry : allocs) { auto userIt = llvm::find_if(entry.allocValue.getUsers(), [&](Operation *user) { auto effectInterface = dyn_cast(user); if (!effectInterface) return false; // Try to find a free effect that is applied to one of our values // that will be automatically freed by our pass. SmallVector effects; effectInterface.getEffectsOnValue(entry.allocValue, effects); return llvm::any_of( effects, [&](MemoryEffects::EffectInstance &it) { return isa(it.getEffect()); }); }); // Assign the associated dealloc operation (if any). if (userIt != entry.allocValue.user_end()) entry.deallocOperation = *userIt; } } /// Finds correct dealloc positions according to the algorithm described at /// the top of the file for all alloc nodes and block arguments that can be /// handled by this analysis. void placeDeallocs() const { // Move or insert deallocs using the previously computed information. // These deallocations will be linked to their associated allocation nodes // since they don't have any aliases that can (potentially) increase their // liveness. for (const AllocEntry &entry : allocs) { Value alloc = entry.allocValue; auto aliasesSet = aliases.resolve(alloc); assert(aliasesSet.size() > 0 && "must contain at least one alias"); // Determine the actual block to place the dealloc and get liveness // information. Block *placementBlock = findCommonDominator(alloc, aliasesSet, postDominators); const LivenessBlockInfo *livenessInfo = liveness.getLiveness(placementBlock); // We have to ensure that the dealloc will be after the last use of all // aliases of the given value. We first assume that there are no uses in // the placementBlock and that we can safely place the dealloc at the // beginning. Operation *endOperation = &placementBlock->front(); // Iterate over all aliases and ensure that the endOperation will point // to the last operation of all potential aliases in the placementBlock. for (Value alias : aliasesSet) { Operation *aliasEndOperation = livenessInfo->getEndOperation(alias, endOperation); // Check whether the aliasEndOperation lies in the desired block and // whether it is behind the current endOperation. If yes, this will be // the new endOperation. if (aliasEndOperation->getBlock() == placementBlock && endOperation->isBeforeInBlock(aliasEndOperation)) endOperation = aliasEndOperation; } // endOperation is the last operation behind which we can safely store // the dealloc taking all potential aliases into account. // If there is an existing dealloc, move it to the right place. if (entry.deallocOperation) { entry.deallocOperation->moveAfter(endOperation); } else { // If the Dealloc position is at the terminator operation of the // block, then the value should escape from a deallocation. Operation *nextOp = endOperation->getNextNode(); if (!nextOp) continue; // If there is no dealloc node, insert one in the right place. OpBuilder builder(nextOp); builder.create(alloc.getLoc(), alloc); } } } /// Finds a common dominator for the given value while taking the positions /// of the values in the value set into account. It supports dominator and /// post-dominator analyses via template arguments. template Block *findCommonDominator(Value value, const ValueSetT &values, const DominatorT &doms) const { // Start with the current block the value is defined in. Block *dom = value.getParentBlock(); // Iterate over all aliases and their uses to find a safe placement block // according to the given dominator information. for (Value childValue : values) for (Operation *user : childValue.getUsers()) { // Move upwards in the dominator tree to find an appropriate // dominator block that takes the current use into account. dom = doms.findNearestCommonDominator(dom, user->getBlock()); } return dom; } /// The operation this transformation was constructed from. Operation *operation; /// Alias information that can be updated during the insertion of copies. BufferPlacementAliasAnalysis aliases; /// Maps allocation nodes to their associated blocks. AllocEntryList allocs; /// The underlying liveness analysis to compute fine grained information /// about alloc and dealloc positions. Liveness liveness; /// The dominator analysis to place deallocs in the appropriate blocks. DominanceInfo dominators; /// The post dominator analysis to place deallocs in the appropriate blocks. PostDominanceInfo postDominators; }; //===----------------------------------------------------------------------===// // BufferPlacementPass //===----------------------------------------------------------------------===// /// The actual buffer placement pass that moves alloc and dealloc nodes into /// the right positions. It uses the algorithm described at the top of the /// file. struct BufferPlacementPass : mlir::PassWrapper { void runOnFunction() override { // Place all required alloc, copy and dealloc nodes. BufferPlacement placement(getFunction()); placement.place(); } }; } // end anonymous namespace //===----------------------------------------------------------------------===// // BufferAssignmentPlacer //===----------------------------------------------------------------------===// /// Creates a new assignment placer. BufferAssignmentPlacer::BufferAssignmentPlacer(Operation *op) : operation(op) {} /// Computes the actual position to place allocs for the given value. OpBuilder::InsertPoint BufferAssignmentPlacer::computeAllocPosition(OpResult result) { Operation *owner = result.getOwner(); return OpBuilder::InsertPoint(owner->getBlock(), Block::iterator(owner)); } //===----------------------------------------------------------------------===// // BufferAssignmentTypeConverter //===----------------------------------------------------------------------===// /// Registers conversions into BufferAssignmentTypeConverter BufferAssignmentTypeConverter::BufferAssignmentTypeConverter() { // Keep all types unchanged. addConversion([](Type type) { return type; }); // Convert RankedTensorType to MemRefType. addConversion([](RankedTensorType type) { return (Type)MemRefType::get(type.getShape(), type.getElementType()); }); // Convert UnrankedTensorType to UnrankedMemRefType. addConversion([](UnrankedTensorType type) { return (Type)UnrankedMemRefType::get(type.getElementType(), 0); }); } /// Checks if `type` has been converted from non-memref type to memref. bool BufferAssignmentTypeConverter::isConvertedMemref(Type type, Type before) { return type.isa() && !before.isa(); } //===----------------------------------------------------------------------===// // BufferPlacementPass construction //===----------------------------------------------------------------------===// std::unique_ptr mlir::createBufferPlacementPass() { return std::make_unique(); }