Simplify vector unrolling pattern to be more aligned with rest of the patterns and be closer to vector distribution. The new implementation uses ExtractStridedSlice/InsertStridedSlice instead of the Tuple ops. After this change the ops based on Tuple don't have any more used so they can be removed. This allows removing signifcant amount of dead code and will allow extending the unrolling code going forward. Differential Revision: https://reviews.llvm.org/D105381
121 lines
5.0 KiB
MLIR
121 lines
5.0 KiB
MLIR
// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
|
|
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
|
|
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
|
|
// RUN: FileCheck %s
|
|
|
|
func @entry() {
|
|
%f0 = constant 0.0: f32
|
|
%f1 = constant 1.0: f32
|
|
%f2 = constant 2.0: f32
|
|
%f3 = constant 3.0: f32
|
|
%f4 = constant 4.0: f32
|
|
%f5 = constant 5.0: f32
|
|
%f6 = constant 6.0: f32
|
|
%f7 = constant 7.0: f32
|
|
%f8 = constant 8.0: f32
|
|
|
|
// Construct test vectors and matrices.
|
|
%0 = vector.broadcast %f1 : f32 to vector<2xf32>
|
|
%a = vector.insert %f2, %0[1] : f32 into vector<2xf32>
|
|
%1 = vector.broadcast %f3 : f32 to vector<2xf32>
|
|
%b = vector.insert %f4, %1[1] : f32 into vector<2xf32>
|
|
%2 = vector.broadcast %f5 : f32 to vector<2xf32>
|
|
%c = vector.insert %f6, %2[1] : f32 into vector<2xf32>
|
|
%3 = vector.broadcast %f7 : f32 to vector<2xf32>
|
|
%d = vector.insert %f8, %3[1] : f32 into vector<2xf32>
|
|
%4 = vector.broadcast %f0 : f32 to vector<2x2xf32>
|
|
%5 = vector.insert %a, %4[0] : vector<2xf32> into vector<2x2xf32>
|
|
%A = vector.insert %b, %5[1] : vector<2xf32> into vector<2x2xf32>
|
|
%6 = vector.broadcast %f0 : f32 to vector<2x2xf32>
|
|
%7 = vector.insert %c, %6[0] : vector<2xf32> into vector<2x2xf32>
|
|
%B = vector.insert %d, %7[1] : vector<2xf32> into vector<2x2xf32>
|
|
%8 = vector.broadcast %f0 : f32 to vector<3x2xf32>
|
|
%9 = vector.insert %a, %8[0] : vector<2xf32> into vector<3x2xf32>
|
|
%10 = vector.insert %b, %9[1] : vector<2xf32> into vector<3x2xf32>
|
|
%C = vector.insert %c, %10[2] : vector<2xf32> into vector<3x2xf32>
|
|
%cst = constant dense<0.000000e+00> : vector<2x4xf32>
|
|
%11 = vector.insert_strided_slice %A, %cst {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<2x4xf32>
|
|
%D = vector.insert_strided_slice %B, %11 {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<2x4xf32>
|
|
|
|
vector.print %A : vector<2x2xf32>
|
|
vector.print %B : vector<2x2xf32>
|
|
vector.print %C : vector<3x2xf32>
|
|
vector.print %D : vector<2x4xf32>
|
|
//
|
|
// test matrices:
|
|
//
|
|
// CHECK: ( ( 1, 2 ), ( 3, 4 ) )
|
|
// CHECK: ( ( 5, 6 ), ( 7, 8 ) )
|
|
// CHECK: ( ( 1, 2 ), ( 3, 4 ), ( 5, 6 ) )
|
|
// CHECK: ( ( 1, 2, 5, 6 ), ( 3, 4, 7, 8 ) )
|
|
|
|
%tA = vector.transpose %A, [1, 0] : vector<2x2xf32> to vector<2x2xf32>
|
|
%tB = vector.transpose %B, [1, 0] : vector<2x2xf32> to vector<2x2xf32>
|
|
%tC = vector.transpose %C, [1, 0] : vector<3x2xf32> to vector<2x3xf32>
|
|
%tD = vector.transpose %D, [1, 0] : vector<2x4xf32> to vector<4x2xf32>
|
|
|
|
vector.print %tA : vector<2x2xf32>
|
|
vector.print %tB : vector<2x2xf32>
|
|
vector.print %tC : vector<2x3xf32>
|
|
vector.print %tD : vector<4x2xf32>
|
|
//
|
|
// transposed matrices:
|
|
//
|
|
// CHECK: ( ( 1, 3 ), ( 2, 4 ) )
|
|
// CHECK: ( ( 5, 7 ), ( 6, 8 ) )
|
|
// CHECK: ( ( 1, 3, 5 ), ( 2, 4, 6 ) )
|
|
// CHECK: ( ( 1, 3 ), ( 2, 4 ), ( 5, 7 ), ( 6, 8 ) )
|
|
|
|
%idD = vector.transpose %D, [0, 1] : vector<2x4xf32> to vector<2x4xf32>
|
|
%ttD = vector.transpose %tD, [1, 0] : vector<4x2xf32> to vector<2x4xf32>
|
|
|
|
vector.print %idD : vector<2x4xf32>
|
|
vector.print %ttD : vector<2x4xf32>
|
|
//
|
|
// back to original after transpose matrices:
|
|
//
|
|
// CHECK: ( ( 1, 2, 5, 6 ), ( 3, 4, 7, 8 ) )
|
|
// CHECK: ( ( 1, 2, 5, 6 ), ( 3, 4, 7, 8 ) )
|
|
|
|
// Construct test tensor.
|
|
%p = vector.broadcast %f1 : f32 to vector<2x2x2xf32>
|
|
%q = vector.insert %f2, %p[0, 0, 1] : f32 into vector<2x2x2xf32>
|
|
%r = vector.insert %f3, %q[0, 1, 0] : f32 into vector<2x2x2xf32>
|
|
%s = vector.insert %f4, %r[0, 1, 1] : f32 into vector<2x2x2xf32>
|
|
%t = vector.insert %f5, %s[1, 0, 0] : f32 into vector<2x2x2xf32>
|
|
%u = vector.insert %f6, %t[1, 0, 1] : f32 into vector<2x2x2xf32>
|
|
%v = vector.insert %f7, %u[1, 1, 0] : f32 into vector<2x2x2xf32>
|
|
%w = vector.insert %f8, %v[1, 1, 1] : f32 into vector<2x2x2xf32>
|
|
|
|
vector.print %w : vector<2x2x2xf32>
|
|
//
|
|
// test tensors:
|
|
//
|
|
// CHECK: ( ( ( 1, 2 ), ( 3, 4 ) ), ( ( 5, 6 ), ( 7, 8 ) ) )
|
|
|
|
%tP = vector.transpose %w, [0, 1, 2] : vector<2x2x2xf32> to vector<2x2x2xf32>
|
|
%tQ = vector.transpose %w, [0, 2, 1] : vector<2x2x2xf32> to vector<2x2x2xf32>
|
|
%tR = vector.transpose %w, [1, 0, 2] : vector<2x2x2xf32> to vector<2x2x2xf32>
|
|
%tS = vector.transpose %w, [2, 0, 1] : vector<2x2x2xf32> to vector<2x2x2xf32>
|
|
%tT = vector.transpose %w, [1, 2, 0] : vector<2x2x2xf32> to vector<2x2x2xf32>
|
|
%tU = vector.transpose %w, [2, 1, 0] : vector<2x2x2xf32> to vector<2x2x2xf32>
|
|
|
|
vector.print %tP : vector<2x2x2xf32>
|
|
vector.print %tQ : vector<2x2x2xf32>
|
|
vector.print %tR : vector<2x2x2xf32>
|
|
vector.print %tS : vector<2x2x2xf32>
|
|
vector.print %tT : vector<2x2x2xf32>
|
|
vector.print %tU : vector<2x2x2xf32>
|
|
//
|
|
// transposed tensors:
|
|
//
|
|
// CHECK: ( ( ( 1, 2 ), ( 3, 4 ) ), ( ( 5, 6 ), ( 7, 8 ) ) )
|
|
// CHECK: ( ( ( 1, 3 ), ( 2, 4 ) ), ( ( 5, 7 ), ( 6, 8 ) ) )
|
|
// CHECK: ( ( ( 1, 2 ), ( 5, 6 ) ), ( ( 3, 4 ), ( 7, 8 ) ) )
|
|
// CHECK: ( ( ( 1, 3 ), ( 5, 7 ) ), ( ( 2, 4 ), ( 6, 8 ) ) )
|
|
// CHECK: ( ( ( 1, 5 ), ( 2, 6 ) ), ( ( 3, 7 ), ( 4, 8 ) ) )
|
|
// CHECK: ( ( ( 1, 5 ), ( 3, 7 ) ), ( ( 2, 6 ), ( 4, 8 ) ) )
|
|
|
|
return
|
|
}
|