Files
clang-p2996/llvm/test/CodeGen/SystemZ/vec-unpack-04.ll
Ulrich Weigand 4a4987be36 [SystemZ] Optimize vector zero/sign extensions
Generate more efficient code for zero or sign extensions where
the source is a subvector generated via SHUFFLE_VECTOR.

Specifically, recognize patterns corresponding to (series of)
VECTOR UNPACK instructions, or the VECTOR SIGN EXTEND TO
DOUBLEWORD instruction.

As a special case, also handle zero or sign extensions of a
vector element to i128.

Fixes: https://github.com/llvm/llvm-project/issues/129576
Fixes: https://github.com/llvm/llvm-project/issues/129899
2025-03-15 18:28:44 +01:00

80 lines
1.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
define i128 @f1(<2 x i64> %a) {
; CHECK-LABEL: f1:
; CHECK: # %bb.0: # %start
; CHECK-NEXT: vuplhg %v0, %v24
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
start:
%0 = extractelement <2 x i64> %a, i32 0
%1 = zext i64 %0 to i128
ret i128 %1
}
define i128 @f2(<2 x i64> %a) {
; CHECK-LABEL: f2:
; CHECK: # %bb.0: # %start
; CHECK-NEXT: vupllg %v0, %v24
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
start:
%0 = extractelement <2 x i64> %a, i32 1
%1 = zext i64 %0 to i128
ret i128 %1
}
define i128 @f3(<4 x i32> %a) {
; CHECK-LABEL: f3:
; CHECK: # %bb.0: # %start
; CHECK-NEXT: vuplhf %v0, %v24
; CHECK-NEXT: vuplhg %v0, %v0
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
start:
%0 = extractelement <4 x i32> %a, i32 0
%1 = zext i32 %0 to i128
ret i128 %1
}
define i128 @f4(<4 x i32> %a) {
; CHECK-LABEL: f4:
; CHECK: # %bb.0: # %start
; CHECK-NEXT: vupllf %v0, %v24
; CHECK-NEXT: vuplhg %v0, %v0
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
start:
%0 = extractelement <4 x i32> %a, i32 1
%1 = zext i32 %0 to i128
ret i128 %1
}
define i128 @f5(<4 x i32> %a) {
; CHECK-LABEL: f5:
; CHECK: # %bb.0: # %start
; CHECK-NEXT: vuplhf %v0, %v24
; CHECK-NEXT: vupllg %v0, %v0
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
start:
%0 = extractelement <4 x i32> %a, i32 2
%1 = zext i32 %0 to i128
ret i128 %1
}
define i128 @f6(<4 x i32> %a) {
; CHECK-LABEL: f6:
; CHECK: # %bb.0: # %start
; CHECK-NEXT: vupllf %v0, %v24
; CHECK-NEXT: vupllg %v0, %v0
; CHECK-NEXT: vst %v0, 0(%r2), 3
; CHECK-NEXT: br %r14
start:
%0 = extractelement <4 x i32> %a, i32 3
%1 = zext i32 %0 to i128
ret i128 %1
}