[mlir] add support for verification in integration tests

The patch extends the runner utils by verification methods that compare two memrefs. The methods compare the content of the two memrefs and print success if the data is identical up to a small numerical error. The methods are meant to simplify the development of integration tests that for example compare optimized and unoptimized code paths (cf. the updates to the linalg matmul integration tests).

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D96326
This commit is contained in:
Tobias Gysi
2021-02-09 17:43:11 +01:00
parent 56c446a878
commit 5fa893cc38
6 changed files with 235 additions and 25 deletions

View File

@@ -31,6 +31,7 @@
#endif // _WIN32
#include <assert.h>
#include <cmath>
#include <iostream>
#include "mlir/ExecutionEngine/CRunnerUtils.h"
@@ -73,11 +74,13 @@ namespace impl {
template <typename T, int M, int... Dims>
std::ostream &operator<<(std::ostream &os, const Vector<T, M, Dims...> &v);
template <int... Dims> struct StaticSizeMult {
template <int... Dims>
struct StaticSizeMult {
static constexpr int value = 1;
};
template <int N, int... Dims> struct StaticSizeMult<N, Dims...> {
template <int N, int... Dims>
struct StaticSizeMult<N, Dims...> {
static constexpr int value = N * StaticSizeMult<Dims...>::value;
};
@@ -87,7 +90,8 @@ static inline void printSpace(std::ostream &os, int count) {
}
}
template <typename T, int M, int... Dims> struct VectorDataPrinter {
template <typename T, int M, int... Dims>
struct VectorDataPrinter {
static void print(std::ostream &os, const Vector<T, M, Dims...> &val);
};
@@ -211,6 +215,112 @@ void printMemRef(UnrankedMemRefType<T> &M) {
std::cout << "Unranked Memref ";
printMemRef(DynamicMemRefType<T>(M));
}
/// Verify the results of two computations are equivalent up to a small
/// numerical error.
template <typename T>
struct MemRefDataVerifier {
/// Maximum number of errors printed by the verifier.
static constexpr int errorLimit = 10;
/// Verify the relative difference of the values is smaller than epsilon.
static bool verifyRelErrorSmallerThan(T actual, T expected, T epsilon);
/// Verify the values are equivalent (integers) or are close (floating-point).
static bool verifyElem(T actual, T expected);
/// Verify the data element-by-element.
static void verify(std::ostream &os, T *actualBasePtr, T *expectedBasePtr,
int64_t dim, int64_t offset, const int64_t *sizes,
const int64_t *strides, int64_t &errors);
};
template <typename T>
bool MemRefDataVerifier<T>::verifyRelErrorSmallerThan(T actual, T expected,
T epsilon) {
// Return an error if one of the values is infinite or NaN.
if (!std::isfinite(actual) || !std::isfinite(expected))
return false;
// Return true if the relative error is smaller than epsilon.
T delta = std::abs(actual - expected);
T maximum = std::max(std::abs(actual), std::abs(expected));
if (delta > epsilon * maximum)
return false;
return true;
}
template <typename T>
bool MemRefDataVerifier<T>::verifyElem(T actual, T expected) {
return actual == expected;
}
template <>
inline bool MemRefDataVerifier<double>::verifyElem(double actual,
double expected) {
return verifyRelErrorSmallerThan(actual, expected, 1e-12);
}
template <>
inline bool MemRefDataVerifier<float>::verifyElem(float actual,
float expected) {
return verifyRelErrorSmallerThan(actual, expected, 1e-6);
}
template <typename T>
void MemRefDataVerifier<T>::verify(std::ostream &os, T *actualBasePtr,
T *expectedBasePtr, int64_t dim,
int64_t offset, const int64_t *sizes,
const int64_t *strides, int64_t &errors) {
// Verify the elements at the current offset.
if (dim == 0) {
if (!verifyElem(actualBasePtr[offset], expectedBasePtr[offset])) {
if (errors < errorLimit) {
os << actualBasePtr[offset] << " != " << expectedBasePtr[offset]
<< " offset = " << offset << "\n";
} else if (errors == errorLimit) {
os << "...\n";
}
errors++;
}
return;
}
// Iterate the current dimension and verify recursively.
for (int64_t i = 0; i < sizes[0]; ++i) {
verify(os, actualBasePtr, expectedBasePtr, dim - 1, offset + i * strides[0],
sizes + 1, strides + 1, errors);
}
}
/// Verify the equivalence of two dynamic memrefs.
template <typename T>
int64_t verifyMemRef(const DynamicMemRefType<T> &actual,
const DynamicMemRefType<T> &expected) {
// Check the shapes of the MemRefs match.
for (int64_t i = 0; i < actual.rank; ++i) {
if (expected.rank != actual.rank || actual.offset != expected.offset ||
actual.sizes[i] != expected.sizes[i] ||
actual.strides[i] != expected.strides[i]) {
printMemRefMetaData(std::cerr, actual);
printMemRefMetaData(std::cerr, expected);
return -1;
}
}
// Count the errors and print the verification result.
int64_t errors = 0;
MemRefDataVerifier<T>::verify(std::cerr, actual.basePtr, expected.basePtr,
actual.rank, actual.offset, actual.sizes,
actual.strides, errors);
return errors;
}
/// Verify the equivalence of two unranked memrefs.
template <typename T>
int64_t verifyMemRef(UnrankedMemRefType<T> &actual,
UnrankedMemRefType<T> &expected) {
return verifyMemRef(DynamicMemRefType<T>(actual),
DynamicMemRefType<T>(expected));
}
} // namespace impl
////////////////////////////////////////////////////////////////////////////////
@@ -247,4 +357,21 @@ extern "C" MLIR_RUNNERUTILS_EXPORT void
_mlir_ciface_print_memref_vector_4x4xf32(
StridedMemRefType<Vector2D<4, 4, float>, 2> *M);
extern "C" MLIR_RUNNERUTILS_EXPORT int64_t _mlir_ciface_verifyMemRefI32(
UnrankedMemRefType<int32_t> *actual, UnrankedMemRefType<int32_t> *expected);
extern "C" MLIR_RUNNERUTILS_EXPORT int64_t _mlir_ciface_verifyMemRefF32(
UnrankedMemRefType<float> *actual, UnrankedMemRefType<float> *expected);
extern "C" MLIR_RUNNERUTILS_EXPORT int64_t _mlir_ciface_verifyMemRefF64(
UnrankedMemRefType<double> *actual, UnrankedMemRefType<double> *expected);
extern "C" MLIR_RUNNERUTILS_EXPORT int64_t verifyMemRefI32(int64_t rank,
void *actualPtr,
void *expectedPtr);
extern "C" MLIR_RUNNERUTILS_EXPORT int64_t verifyMemRefF32(int64_t rank,
void *actualPtr,
void *expectedPtr);
extern "C" MLIR_RUNNERUTILS_EXPORT int64_t verifyMemRefF64(int64_t rank,
void *actualPtr,
void *expectedPtr);
#endif // EXECUTIONENGINE_RUNNERUTILS_H_

View File

@@ -1,6 +1,6 @@
// RUN: export M=24 && export K=64 && export N=192 && export ITERS=10 && \
// RUN: cat %s | sed 's@${M}@'"$M"'@g'| sed 's@${K}@'"$K"'@g' | sed 's@${N}@'"$N"'@g'| sed 's@${ITERS}@'"$ITERS"'@g'| \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.fill register-tile-sizes=4,32 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.copy register-tile-sizes=4,32 vectorize" | \
@@ -9,6 +9,7 @@
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// Use tee to both print to stderr and FileCheck
// RUN: tee -a /dev/stderr | FileCheck %s
@@ -87,10 +88,17 @@ func @main() {
%tmatmul = subf %t_end_matmul, %t_start_matmul: f64
call @print_perf(%iters, %tmatmul) : (index, f64) -> ()
%res = load %C[%c0, %c0]: !row_major_C
// CHECK: 64
vector.print %res: f32
// CHECK: {{^0}}
%C_ref = alloc() : !row_major_C
linalg.fill(%C_ref, %v0) : !row_major_C, !elem_type_c
linalg.matmul ins(%A, %B : !row_major_A, !row_major_B)
outs(%C_ref: !row_major_C)
%act = memref_cast %C : !row_major_C to memref<*xf32>
%exp = memref_cast %C_ref : !row_major_C to memref<*xf32>
%errors = call @verifyMemRefF32(%act, %exp) : (memref<*xf32>, memref<*xf32>) -> i64
vector.print %errors : i64
dealloc %C_ref : !row_major_C
dealloc %A : !row_major_A
dealloc %B : !row_major_B
dealloc %C : !row_major_C
@@ -99,6 +107,7 @@ func @main() {
}
func private @rtclock() -> f64
func private @verifyMemRefF32(memref<*xf32>, memref<*xf32>) -> i64 attributes { llvm.emit_c_interface }
// TODO: init with random, run and check output.
// func private @fill_random_f32(memref<*xf32>)

View File

@@ -1,6 +1,6 @@
// RUN: export M=24 && export K=64 && export N=192 && export ITERS=10 && \
// RUN: cat %s | sed 's@${M}@'"$M"'@g'| sed 's@${K}@'"$K"'@g' | sed 's@${N}@'"$N"'@g'| sed 's@${ITERS}@'"$ITERS"'@g'| \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul_column_major anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.fill register-tile-sizes=4,16 vectorize" | \
// TODO: linalg.copy vectorization in the presence of permutation map fails. Enable when addressed.
@@ -11,6 +11,7 @@
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// Use tee to both print to stderr and FileCheck
// RUN: tee -a /dev/stderr | FileCheck %s
@@ -84,9 +85,16 @@ func @main() {
%tmatmul_column_major = subf %t_end_matmul_column_major, %t_start_matmul_column_major: f64
call @print_perf(%iters, %tmatmul_column_major) : (index, f64) -> ()
%res = load %cC[%c0, %c0]: !column_major_C
// CHECK: 64
vector.print %res: !elem_type_c
// CHECK: {{^0}}
%cC_ref = alloc() : !column_major_C
linalg.fill(%cC_ref, %f0) : !column_major_C, !elem_type_c
linalg.matmul_column_major ins(%cA, %cB : !column_major_A, !column_major_B)
outs(%cC_ref: !column_major_C)
%act = memref_cast %cC : !column_major_C to memref<*xf32>
%exp = memref_cast %cC_ref : !column_major_C to memref<*xf32>
%errors = call @verifyMemRefF32(%act, %exp) : (memref<*xf32>, memref<*xf32>) -> i64
vector.print %errors : i64
dealloc %cC_ref : !column_major_C
dealloc %cA : !column_major_A
dealloc %cB : !column_major_B
@@ -96,6 +104,7 @@ func @main() {
}
func private @rtclock() -> f64
func private @verifyMemRefF32(memref<*xf32>, memref<*xf32>) -> i64 attributes { llvm.emit_c_interface }
// TODO: init with random, run and check output.
// func private @fill_random_f32(memref<*xf32>)

View File

@@ -1,7 +1,7 @@
// RUN: export M=24 && export K=64 && export N=192 && export ITERS=10 && \
// RUN: cat %s | sed 's@${M}@'"$M"'@g'| sed 's@${K}@'"$K"'@g' | sed 's@${N}@'"$N"'@g'| sed 's@${ITERS}@'"$ITERS"'@g'| \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul_column_major_as_row_major anchor-op=linalg.matmul_column_major register-tile-sizes=16,0,32 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-func=matmul_column_major_as_row_major anchor-op=linalg.matmul register-tile-sizes=12,32,16 vectorize" | \
// RUN: mlir-opt -test-linalg-codegen-strategy="anchor-op=linalg.fill register-tile-sizes=4,16 vectorize" | \
// TODO: linalg.copy vectorization in the presence of permutation map fails. Enable when addressed.
@@ -12,6 +12,7 @@
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// Use tee to both print to stderr and FileCheck
// RUN: tee -a /dev/stderr | FileCheck %s
@@ -63,11 +64,11 @@ func @print_perf(%iters: index, %total_time: f64) {
func @main() {
%f0 = constant 0.0 : !elem_type_c
%f1 = constant 1.0 : !elem_type_a
%cA = alloc() : !column_major_A
%cB = alloc() : !column_major_B
%cC = alloc() : !column_major_C
linalg.fill(%cA, %f1) : !column_major_A, !elem_type_a
linalg.fill(%cB, %f1) : !column_major_B, !elem_type_b
linalg.fill(%cC, %f0) : !column_major_C, !elem_type_c
@@ -95,13 +96,28 @@ func @main() {
%tmatmul_column_major_as_row_major = subf %t_end_matmul_column_major_as_row_major, %t_start_matmul_column_major_as_row_major: f64
call @print_perf(%iters, %tmatmul_column_major_as_row_major) : (index, f64) -> ()
%res = load %cC[%c0, %c0]: !column_major_C
// CHECK: 64
vector.print %res: !elem_type_c
%res2 = load %C[%c0, %c0]: !row_major_C
// CHECK: 64
vector.print %res2: !elem_type_c
// CHECK: {{^0}}
%cC_ref = alloc() : !column_major_C
linalg.fill(%cC_ref, %f0) : !column_major_C, !elem_type_c
linalg.matmul_column_major ins(%cA, %cB : !column_major_A, !column_major_B)
outs(%cC_ref: !column_major_C)
%act1 = memref_cast %cC : !column_major_C to memref<*xf32>
%exp1 = memref_cast %cC_ref : !column_major_C to memref<*xf32>
%errors1 = call @verifyMemRefF32(%act1, %exp1) : (memref<*xf32>, memref<*xf32>) -> i64
vector.print %errors1 : i64
dealloc %cC_ref : !column_major_C
// CHECK: {{^0}}
%C_ref = alloc() : !row_major_C
linalg.fill(%C_ref, %f0) : !row_major_C, !elem_type_c
linalg.matmul ins(%A, %B : !row_major_A, !row_major_B)
outs(%C_ref: !row_major_C)
%act2 = memref_cast %C : !row_major_C to memref<*xf32>
%exp2 = memref_cast %C_ref : !row_major_C to memref<*xf32>
%errors2 = call @verifyMemRefF32(%act2, %exp2) : (memref<*xf32>, memref<*xf32>) -> i64
vector.print %errors2 : i64
dealloc %C_ref : !row_major_C
dealloc %A : !row_major_A
dealloc %B : !row_major_B
dealloc %C : !row_major_C
@@ -114,6 +130,7 @@ func @main() {
}
func private @rtclock() -> f64
func private @verifyMemRefF32(memref<*xf32>, memref<*xf32>) -> i64 attributes { llvm.emit_c_interface }
// TODO: init with random, run and check output.
// func private @fill_random_f32(memref<*xf32>)

View File

@@ -9,6 +9,7 @@
// RUN: mlir-cpu-runner -O3 -e main -entry-point-result=void \
// Activate to dump assembly
// R_UN: -dump-object-file -object-filename=/tmp/a.o \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
// Use tee to both print to stderr and FileCheck
// RUN: tee -a /dev/stderr | FileCheck %s
@@ -85,9 +86,16 @@ func @main() {
%tmatmul = subf %t_end_matmul, %t_start_matmul: f64
call @print_perf(%iters, %tmatmul) : (index, f64) -> ()
%res = load %C[%c0, %c0]: !row_major_C
// CHECK: 64
vector.print %res: !elem_type_c
// CHECK: {{^0}}
%C_ref = alloc() : !row_major_C
linalg.fill(%C_ref, %v0) : !row_major_C, !elem_type_c
linalg.matmul_i8_i8_i32 ins(%A, %B : !row_major_A, !row_major_B)
outs(%C_ref: !row_major_C)
%res = memref_cast %C : !row_major_C to memref<*xi32>
%exp = memref_cast %C_ref : !row_major_C to memref<*xi32>
%errors = call @verifyMemRefI32(%res, %exp) : (memref<*xi32>, memref<*xi32>) -> i64
vector.print %errors : i64
dealloc %C_ref : !row_major_C
dealloc %A : !row_major_A
dealloc %B : !row_major_B
@@ -97,6 +105,7 @@ func @main() {
}
func private @rtclock() -> f64
func private @verifyMemRefI32(memref<*xi32>, memref<*xi32>) -> i64 attributes { llvm.emit_c_interface }
// TODO: init with random, run and check output.
// func private @fill_random_f32(memref<*xf32>)

View File

@@ -80,3 +80,42 @@ extern "C" void
_mlir_ciface_print_memref_4d_f32(StridedMemRefType<float, 4> *M) {
impl::printMemRef(*M);
}
extern "C" int64_t
_mlir_ciface_verifyMemRefI32(UnrankedMemRefType<int32_t> *actual,
UnrankedMemRefType<int32_t> *expected) {
return impl::verifyMemRef(*actual, *expected);
}
extern "C" int64_t
_mlir_ciface_verifyMemRefF32(UnrankedMemRefType<float> *actual,
UnrankedMemRefType<float> *expected) {
return impl::verifyMemRef(*actual, *expected);
}
extern "C" int64_t
_mlir_ciface_verifyMemRefF64(UnrankedMemRefType<double> *actual,
UnrankedMemRefType<double> *expected) {
return impl::verifyMemRef(*actual, *expected);
}
extern "C" int64_t verifyMemRefI32(int64_t rank, void *actualPtr,
void *expectedPtr) {
UnrankedMemRefType<int32_t> actualDesc = {rank, actualPtr};
UnrankedMemRefType<int32_t> expectedDesc = {rank, expectedPtr};
return _mlir_ciface_verifyMemRefI32(&actualDesc, &expectedDesc);
}
extern "C" int64_t verifyMemRefF32(int64_t rank, void *actualPtr,
void *expectedPtr) {
UnrankedMemRefType<float> actualDesc = {rank, actualPtr};
UnrankedMemRefType<float> expectedDesc = {rank, expectedPtr};
return _mlir_ciface_verifyMemRefF32(&actualDesc, &expectedDesc);
}
extern "C" int64_t verifyMemRefF64(int64_t rank, void *actualPtr,
void *expectedPtr) {
UnrankedMemRefType<double> actualDesc = {rank, actualPtr};
UnrankedMemRefType<double> expectedDesc = {rank, expectedPtr};
return _mlir_ciface_verifyMemRefF64(&actualDesc, &expectedDesc);
}