Generate more efficient code for zero or sign extensions where the source is a subvector generated via SHUFFLE_VECTOR. Specifically, recognize patterns corresponding to (series of) VECTOR UNPACK instructions, or the VECTOR SIGN EXTEND TO DOUBLEWORD instruction. As a special case, also handle zero or sign extensions of a vector element to i128. Fixes: https://github.com/llvm/llvm-project/issues/129576 Fixes: https://github.com/llvm/llvm-project/issues/129899
80 lines
1.9 KiB
LLVM
80 lines
1.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
|
|
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
|
|
|
|
define i128 @f1(<2 x i64> %a) {
|
|
; CHECK-LABEL: f1:
|
|
; CHECK: # %bb.0: # %start
|
|
; CHECK-NEXT: vuplhg %v0, %v24
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
|
; CHECK-NEXT: br %r14
|
|
start:
|
|
%0 = extractelement <2 x i64> %a, i32 0
|
|
%1 = zext i64 %0 to i128
|
|
ret i128 %1
|
|
}
|
|
|
|
define i128 @f2(<2 x i64> %a) {
|
|
; CHECK-LABEL: f2:
|
|
; CHECK: # %bb.0: # %start
|
|
; CHECK-NEXT: vupllg %v0, %v24
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
|
; CHECK-NEXT: br %r14
|
|
start:
|
|
%0 = extractelement <2 x i64> %a, i32 1
|
|
%1 = zext i64 %0 to i128
|
|
ret i128 %1
|
|
}
|
|
|
|
define i128 @f3(<4 x i32> %a) {
|
|
; CHECK-LABEL: f3:
|
|
; CHECK: # %bb.0: # %start
|
|
; CHECK-NEXT: vuplhf %v0, %v24
|
|
; CHECK-NEXT: vuplhg %v0, %v0
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
|
; CHECK-NEXT: br %r14
|
|
start:
|
|
%0 = extractelement <4 x i32> %a, i32 0
|
|
%1 = zext i32 %0 to i128
|
|
ret i128 %1
|
|
}
|
|
|
|
define i128 @f4(<4 x i32> %a) {
|
|
; CHECK-LABEL: f4:
|
|
; CHECK: # %bb.0: # %start
|
|
; CHECK-NEXT: vupllf %v0, %v24
|
|
; CHECK-NEXT: vuplhg %v0, %v0
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
|
; CHECK-NEXT: br %r14
|
|
start:
|
|
%0 = extractelement <4 x i32> %a, i32 1
|
|
%1 = zext i32 %0 to i128
|
|
ret i128 %1
|
|
}
|
|
|
|
define i128 @f5(<4 x i32> %a) {
|
|
; CHECK-LABEL: f5:
|
|
; CHECK: # %bb.0: # %start
|
|
; CHECK-NEXT: vuplhf %v0, %v24
|
|
; CHECK-NEXT: vupllg %v0, %v0
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
|
; CHECK-NEXT: br %r14
|
|
start:
|
|
%0 = extractelement <4 x i32> %a, i32 2
|
|
%1 = zext i32 %0 to i128
|
|
ret i128 %1
|
|
}
|
|
|
|
define i128 @f6(<4 x i32> %a) {
|
|
; CHECK-LABEL: f6:
|
|
; CHECK: # %bb.0: # %start
|
|
; CHECK-NEXT: vupllf %v0, %v24
|
|
; CHECK-NEXT: vupllg %v0, %v0
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
|
; CHECK-NEXT: br %r14
|
|
start:
|
|
%0 = extractelement <4 x i32> %a, i32 3
|
|
%1 = zext i32 %0 to i128
|
|
ret i128 %1
|
|
}
|