[mlir] [VectorOps] Improve scatter/gather CPU performance

Replaced the linearized address with the proper LLVM way of defining vector of base + indices in SIMD style. This yields much better code. Some prototype results with microbencmarking sparse matrix x vector with 50% sparsity (about 2-3x faster): LINEARIZED IMPROVED GFLOPS sdot saxpy sdot saxpy 16x16 1.6 1.4 4.4 2.1 32x32 1.7 1.6 5.8 5.9 64x64 1.7 1.7 6.4 6.4 128x128 1.7 1.7 5.9 5.9 256x256 1.6 1.6 6.1 6.0 512x512 1.4 1.4 4.9 4.7 Reviewed By: nicolasvasilache Differential Revision: https://reviews.llvm.org/D84368
2020-07-22 23:47:33 -07:00
parent dab898f9ab
commit 1485fd295b
3 changed files with 7 additions and 20 deletions
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -147,28 +147,13 @@ LogicalResult getIndexedPtrs(ConversionPatternRewriter &rewriter,
      offset != 0 || memRefType.getMemorySpace() != 0)
    return failure();

-  // Base pointer.
+  // Create a vector of pointers from base and indices.
  MemRefDescriptor memRefDescriptor(memref);
  Value base = memRefDescriptor.alignedPtr(rewriter, loc);
-
-  // Create a vector of pointers from base and indices.
-  //
-  // TODO: this step serializes the address computations unfortunately,
-  //       ideally we would like to add splat(base) + index_vector
-  //       in SIMD form, but this does not match well with current
-  //       constraints of the standard and vector dialect....
-  //
  int64_t size = vType.getDimSize(0);
  auto pType = memRefDescriptor.getElementType();
  auto ptrsType = LLVM::LLVMType::getVectorTy(pType, size);
-  auto idxType = typeConverter.convertType(iType);
-  ptrs = rewriter.create<LLVM::UndefOp>(loc, ptrsType);
-  for (int64_t i = 0; i < size; i++) {
-    Value off =
-        extractOne(rewriter, typeConverter, loc, indices, idxType, 1, i);
-    Value ptr = rewriter.create<LLVM::GEPOp>(loc, pType, base, off);
-    ptrs = insertOne(rewriter, typeConverter, loc, ptrs, ptr, ptrsType, 1, i);
-  }
+  ptrs = rewriter.create<LLVM::GEPOp>(loc, ptrsType, base, indices);
  return success();
 }