Files
clang-p2996/llvm/test/CodeGen/SystemZ/vec-zext.ll
Jonas Paulsson ef7aad0db4 [SystemZ] Improve handling of ZERO_EXTEND_VECTOR_INREG.
Instead of doing multiple unpacks when zero extending vectors (e.g. v2i16 ->
v2i64), benchmarks have shown that it is better to do a VPERM (vector
permute) since that is only one sequential instruction on the critical path.

This patch achieves this by

1. Expand ZERO_EXTEND_VECTOR_INREG into a vector shuffle with a zero vector
   instead of (multiple) unpacks.

2. Improve SystemZ::GeneralShuffle to perform a single unpack as the last
   operation if Bytes matches it.

Review: Ulrich Weigand
Differential Revision: https://reviews.llvm.org/D78486
2020-06-30 09:08:10 +02:00

95 lines
2.2 KiB
LLVM

; Test that vector zexts are done efficently also in case of fewer elements
; than allowed, e.g. <2 x i32>.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
define <2 x i16> @fun1(<2 x i8> %val1) {
; CHECK-LABEL: fun1:
; CHECK: vuplhb %v24, %v24
; CHECK-NEXT: br %r14
%z = zext <2 x i8> %val1 to <2 x i16>
ret <2 x i16> %z
}
define <2 x i32> @fun2(<2 x i8> %val1) {
; CHECK-LABEL: fun2:
; CHECK: larl %r1, .LCPI1_0
; CHECK-NEXT: vl %v0, 0(%r1), 3
; CHECK-NEXT: vperm %v24, %v0, %v24, %v0
; CHECK-NEXT: br %r14
%z = zext <2 x i8> %val1 to <2 x i32>
ret <2 x i32> %z
}
define <2 x i64> @fun3(<2 x i8> %val1) {
; CHECK-LABEL: fun3:
; CHECK: larl %r1, .LCPI2_0
; CHECK-NEXT: vl %v0, 0(%r1), 3
; CHECK-NEXT: vperm %v24, %v0, %v24, %v0
; CHECK-NEXT: br %r14
%z = zext <2 x i8> %val1 to <2 x i64>
ret <2 x i64> %z
}
define <2 x i32> @fun4(<2 x i16> %val1) {
; CHECK-LABEL: fun4:
; CHECK: vuplhh %v24, %v24
; CHECK-NEXT: br %r14
%z = zext <2 x i16> %val1 to <2 x i32>
ret <2 x i32> %z
}
define <2 x i64> @fun5(<2 x i16> %val1) {
; CHECK-LABEL: fun5:
; CHECK: larl %r1, .LCPI4_0
; CHECK-NEXT: vl %v0, 0(%r1), 3
; CHECK-NEXT: vperm %v24, %v0, %v24, %v0
; CHECK-NEXT: br %r14
%z = zext <2 x i16> %val1 to <2 x i64>
ret <2 x i64> %z
}
define <2 x i64> @fun6(<2 x i32> %val1) {
; CHECK-LABEL: fun6:
; CHECK: vuplhf %v24, %v24
; CHECK-NEXT: br %r14
%z = zext <2 x i32> %val1 to <2 x i64>
ret <2 x i64> %z
}
define <4 x i16> @fun7(<4 x i8> %val1) {
; CHECK-LABEL: fun7:
; CHECK: vuplhb %v24, %v24
; CHECK-NEXT: br %r14
%z = zext <4 x i8> %val1 to <4 x i16>
ret <4 x i16> %z
}
define <4 x i32> @fun8(<4 x i8> %val1) {
; CHECK-LABEL: fun8:
; CHECK: larl %r1, .LCPI7_0
; CHECK-NEXT: vl %v0, 0(%r1), 3
; CHECK-NEXT: vperm %v24, %v0, %v24, %v0
; CHECK-NEXT: br %r14
%z = zext <4 x i8> %val1 to <4 x i32>
ret <4 x i32> %z
}
define <4 x i32> @fun9(<4 x i16> %val1) {
; CHECK-LABEL: fun9:
; CHECK: vuplhh %v24, %v24
; CHECK-NEXT: br %r14
%z = zext <4 x i16> %val1 to <4 x i32>
ret <4 x i32> %z
}
define <8 x i16> @fun10(<8 x i8> %val1) {
; CHECK-LABEL: fun10:
; CHECK: vuplhb %v24, %v24
; CHECK-NEXT: br %r14
%z = zext <8 x i8> %val1 to <8 x i16>
ret <8 x i16> %z
}