This transform looks for suitable vector transfers from global memory to shared memory and converts them to async device copies. Differential Revision: https://reviews.llvm.org/D155569
24 lines
412 B
CMake
24 lines
412 B
CMake
add_mlir_dialect_library(MLIRNVGPUTransforms
|
|
CreateAsyncGroups.cpp
|
|
OptimizeSharedMemory.cpp
|
|
MmaSyncTF32Transform.cpp
|
|
Utils.cpp
|
|
|
|
ADDITIONAL_HEADER_DIRS
|
|
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/NVGPU
|
|
|
|
DEPENDS
|
|
MLIRNVGPUPassIncGen
|
|
|
|
LINK_LIBS PUBLIC
|
|
MLIRArithDialect
|
|
MLIRGPUDialect
|
|
MLIRIR
|
|
MLIRMemRefDialect
|
|
MLIRNVGPUDialect
|
|
MLIRPass
|
|
MLIRTransforms
|
|
MLIRVectorDialect
|
|
MLIRVectorUtils
|
|
)
|