On processors supporting vector registers and SIMD instructions, enable i128 as legal type in VRs. This allows many operations to be implemented via native instructions directly in VRs (including add, subtract, logical operations and shifts). For a few other operations (e.g. multiply and divide, as well as atomic operations), we need to move the i128 value back to a GPR pair to use the corresponding instruction there. Overall, this is still beneficial. The patch includes the following LLVM changes: - Enable i128 as legal type - Set up legal operations (in SystemZInstrVector.td) - Custom expansion for i128 add/subtract with carry - Custom expansion for i128 comparisons and selects - Support for moving i128 to/from GPR pairs when required - Handle 128-bit integer constant values everywhere - Use i128 as intrinsic operand type where appropriate - Updated and new test cases In addition, clang builtins are updated to reflect the intrinsic operand type changes (which also improves compatibility with GCC).
378 lines
10 KiB
LLVM
378 lines
10 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
|
|
;
|
|
; Test storing of replicated values using vector replicate type instructions.
|
|
|
|
;; Replicated registers
|
|
|
|
define void @fun_2x1b(ptr %Src, ptr %Dst) {
|
|
; CHECK-LABEL: fun_2x1b:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vlrepb %v0, 0(%r2)
|
|
; CHECK-NEXT: vsteh %v0, 0(%r3), 0
|
|
; CHECK-NEXT: br %r14
|
|
%i = load i8, ptr %Src
|
|
%ZE = zext i8 %i to i16
|
|
%Val = mul i16 %ZE, 257
|
|
store i16 %Val, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
; Test multiple stores of same value.
|
|
define void @fun_4x1b(ptr %Src, ptr %Dst, ptr %Dst2) {
|
|
; CHECK-LABEL: fun_4x1b:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vlrepb %v0, 0(%r2)
|
|
; CHECK-NEXT: vstef %v0, 0(%r3), 0
|
|
; CHECK-NEXT: vstef %v0, 0(%r4), 0
|
|
; CHECK-NEXT: br %r14
|
|
%i = load i8, ptr %Src
|
|
%ZE = zext i8 %i to i32
|
|
%Val = mul i32 %ZE, 16843009
|
|
store i32 %Val, ptr %Dst
|
|
store i32 %Val, ptr %Dst2
|
|
ret void
|
|
}
|
|
|
|
define void @fun_8x1b(ptr %Src, ptr %Dst) {
|
|
; CHECK-LABEL: fun_8x1b:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vlrepb %v0, 0(%r2)
|
|
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
|
; CHECK-NEXT: br %r14
|
|
%i = load i8, ptr %Src
|
|
%ZE = zext i8 %i to i64
|
|
%Val = mul i64 %ZE, 72340172838076673
|
|
store i64 %Val, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
; A second truncated store of same value.
|
|
define void @fun_8x1b_4x1b(ptr %Src, ptr %Dst, ptr %Dst2) {
|
|
; CHECK-LABEL: fun_8x1b_4x1b:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vlrepb %v0, 0(%r2)
|
|
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
|
; CHECK-NEXT: vstef %v0, 0(%r4), 0
|
|
; CHECK-NEXT: br %r14
|
|
%i = load i8, ptr %Src
|
|
%ZE = zext i8 %i to i64
|
|
%Val = mul i64 %ZE, 72340172838076673
|
|
store i64 %Val, ptr %Dst
|
|
%TrVal = trunc i64 %Val to i32
|
|
store i32 %TrVal, ptr %Dst2
|
|
ret void
|
|
}
|
|
|
|
define void @fun_2x2b(ptr %Src, ptr %Dst) {
|
|
; CHECK-LABEL: fun_2x2b:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vlreph %v0, 0(%r2)
|
|
; CHECK-NEXT: vstef %v0, 0(%r3), 0
|
|
; CHECK-NEXT: br %r14
|
|
%i = load i16, ptr %Src
|
|
%ZE = zext i16 %i to i32
|
|
%Val = mul i32 %ZE, 65537
|
|
store i32 %Val, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
define void @fun_4x2b(ptr %Src, ptr %Dst) {
|
|
; CHECK-LABEL: fun_4x2b:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vlreph %v0, 0(%r2)
|
|
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
|
; CHECK-NEXT: br %r14
|
|
%i = load i16, ptr %Src
|
|
%ZE = zext i16 %i to i64
|
|
%Val = mul i64 %ZE, 281479271743489
|
|
store i64 %Val, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
define void @fun_2x4b(ptr %Src, ptr %Dst) {
|
|
; CHECK-LABEL: fun_2x4b:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vlrepf %v0, 0(%r2)
|
|
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
|
; CHECK-NEXT: br %r14
|
|
%i = load i32, ptr %Src
|
|
%ZE = zext i32 %i to i64
|
|
%Val = mul i64 %ZE, 4294967297
|
|
store i64 %Val, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
;; Replicated registers already in a vector.
|
|
|
|
; Test multiple stores of same value.
|
|
define void @fun_2Eltsx8x1b(ptr %Src, ptr %Dst, ptr %Dst2) {
|
|
; CHECK-LABEL: fun_2Eltsx8x1b:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vlrepb %v0, 0(%r2)
|
|
; CHECK-NEXT: vst %v0, 0(%r3), 3
|
|
; CHECK-NEXT: vst %v0, 0(%r4), 3
|
|
; CHECK-NEXT: br %r14
|
|
%i = load i8, ptr %Src
|
|
%ZE = zext i8 %i to i64
|
|
%Mul = mul i64 %ZE, 72340172838076673
|
|
%tmp = insertelement <2 x i64> undef, i64 %Mul, i32 0
|
|
%Val = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
store <2 x i64> %Val, ptr %Dst
|
|
store <2 x i64> %Val, ptr %Dst2
|
|
ret void
|
|
}
|
|
|
|
define void @fun_4Eltsx2x2b(ptr %Src, ptr %Dst) {
|
|
; CHECK-LABEL: fun_4Eltsx2x2b:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vlreph %v0, 0(%r2)
|
|
; CHECK-NEXT: vst %v0, 0(%r3), 3
|
|
; CHECK-NEXT: br %r14
|
|
%i = load i16, ptr %Src
|
|
%ZE = zext i16 %i to i32
|
|
%Mul = mul i32 %ZE, 65537
|
|
%tmp = insertelement <4 x i32> undef, i32 %Mul, i32 0
|
|
%Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
store <4 x i32> %Val, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
define void @fun_6Eltsx2x2b(ptr %Src, ptr %Dst) {
|
|
; CHECK-LABEL: fun_6Eltsx2x2b:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vlreph %v0, 0(%r2)
|
|
; CHECK-NEXT: vsteg %v0, 16(%r3), 0
|
|
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
|
; CHECK-NEXT: br %r14
|
|
%i = load i16, ptr %Src
|
|
%ZE = zext i16 %i to i32
|
|
%Mul = mul i32 %ZE, 65537
|
|
%tmp = insertelement <6 x i32> undef, i32 %Mul, i32 0
|
|
%Val = shufflevector <6 x i32> %tmp, <6 x i32> undef, <6 x i32> zeroinitializer
|
|
store <6 x i32> %Val, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
define void @fun_2Eltsx2x4b(ptr %Src, ptr %Dst) {
|
|
; CHECK-LABEL: fun_2Eltsx2x4b:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vlrepf %v0, 0(%r2)
|
|
; CHECK-NEXT: vst %v0, 0(%r3), 3
|
|
; CHECK-NEXT: br %r14
|
|
%i = load i32, ptr %Src
|
|
%ZE = zext i32 %i to i64
|
|
%Mul = mul i64 %ZE, 4294967297
|
|
%tmp = insertelement <2 x i64> undef, i64 %Mul, i32 0
|
|
%Val = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
|
|
store <2 x i64> %Val, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
define void @fun_5Eltsx2x4b(ptr %Src, ptr %Dst) {
|
|
; CHECK-LABEL: fun_5Eltsx2x4b:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vlrepf %v0, 0(%r2)
|
|
; CHECK-NEXT: vsteg %v0, 32(%r3), 0
|
|
; CHECK-NEXT: vst %v0, 16(%r3), 4
|
|
; CHECK-NEXT: vst %v0, 0(%r3), 4
|
|
; CHECK-NEXT: br %r14
|
|
%i = load i32, ptr %Src
|
|
%ZE = zext i32 %i to i64
|
|
%Mul = mul i64 %ZE, 4294967297
|
|
%tmp = insertelement <5 x i64> undef, i64 %Mul, i32 0
|
|
%Val = shufflevector <5 x i64> %tmp, <5 x i64> undef, <5 x i32> zeroinitializer
|
|
store <5 x i64> %Val, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
; Test replicating an incoming argument.
|
|
define void @fun_8x1b_arg(i8 %Arg, ptr %Dst) {
|
|
; CHECK-LABEL: fun_8x1b_arg:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vlvgp %v0, %r2, %r2
|
|
; CHECK-NEXT: vrepb %v0, %v0, 7
|
|
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
|
; CHECK-NEXT: br %r14
|
|
%ZE = zext i8 %Arg to i64
|
|
%Val = mul i64 %ZE, 72340172838076673
|
|
store i64 %Val, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
; A replication of a non-local value (ISD::AssertZext case).
|
|
define void @fun_nonlocalval() {
|
|
; CHECK-LABEL: fun_nonlocalval:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: lhi %r0, 0
|
|
; CHECK-NEXT: ciblh %r0, 0, 0(%r14)
|
|
; CHECK-NEXT: .LBB13_1: # %bb2
|
|
; CHECK-NEXT: llgf %r0, 0(%r1)
|
|
; CHECK-NEXT: vlvgp %v0, %r0, %r0
|
|
; CHECK-NEXT: vrepf %v0, %v0, 1
|
|
; CHECK-NEXT: vst %v0, 0(%r1), 3
|
|
; CHECK-NEXT: br %r14
|
|
%i = load i32, ptr undef, align 4
|
|
br i1 undef, label %bb2, label %bb7
|
|
|
|
bb2: ; preds = %bb1
|
|
%i3 = zext i32 %i to i64
|
|
%i4 = mul nuw i64 %i3, 4294967297
|
|
%i5 = insertelement <2 x i64> poison, i64 %i4, i64 0
|
|
%i6 = shufflevector <2 x i64> %i5, <2 x i64> poison, <2 x i32> zeroinitializer
|
|
store <2 x i64> %i6, ptr undef, align 8
|
|
ret void
|
|
|
|
bb7:
|
|
ret void
|
|
}
|
|
|
|
;; Replicated immediates
|
|
|
|
; Some cases where scalar instruction is better
|
|
define void @fun_8x1i_zero(ptr %Dst) {
|
|
; CHECK-LABEL: fun_8x1i_zero:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: mvghi 0(%r2), 0
|
|
; CHECK-NEXT: br %r14
|
|
store i64 0, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
define void @fun_4x1i_minus1(ptr %Dst) {
|
|
; CHECK-LABEL: fun_4x1i_minus1:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: mvhi 0(%r2), -1
|
|
; CHECK-NEXT: br %r14
|
|
store i32 -1, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
define void @fun_4x1i_allones(ptr %Dst) {
|
|
; CHECK-LABEL: fun_4x1i_allones:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: mvhi 0(%r2), -1
|
|
; CHECK-NEXT: br %r14
|
|
store i32 4294967295, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
define void @fun_2i(ptr %Dst) {
|
|
; CHECK-LABEL: fun_2i:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: mvhhi 0(%r2), 1
|
|
; CHECK-NEXT: br %r14
|
|
store i16 1, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
define void @fun_2x2i(ptr %Dst) {
|
|
; CHECK-LABEL: fun_2x2i:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vrepih %v0, 1
|
|
; CHECK-NEXT: vstef %v0, 0(%r2), 0
|
|
; CHECK-NEXT: br %r14
|
|
store i32 65537, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
define void @fun_4x2i(ptr %Dst) {
|
|
; CHECK-LABEL: fun_4x2i:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vrepih %v0, 1
|
|
; CHECK-NEXT: vsteg %v0, 0(%r2), 0
|
|
; CHECK-NEXT: br %r14
|
|
store i64 281479271743489, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
define void @fun_2x4i(ptr %Dst) {
|
|
; CHECK-LABEL: fun_2x4i:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vrepif %v0, 1
|
|
; CHECK-NEXT: vsteg %v0, 0(%r2), 0
|
|
; CHECK-NEXT: br %r14
|
|
store i64 4294967297, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
; Store replicated immediate twice using the same vector.
|
|
define void @fun_4x1i(ptr %Dst, ptr %Dst2) {
|
|
; CHECK-LABEL: fun_4x1i:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vrepib %v0, 3
|
|
; CHECK-NEXT: vstef %v0, 0(%r2), 0
|
|
; CHECK-NEXT: vstef %v0, 0(%r3), 0
|
|
; CHECK-NEXT: br %r14
|
|
store i32 50529027, ptr %Dst
|
|
store i32 50529027, ptr %Dst2
|
|
ret void
|
|
}
|
|
|
|
define void @fun_8x1i(ptr %Dst, ptr %Dst2) {
|
|
; CHECK-LABEL: fun_8x1i:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vrepib %v0, 1
|
|
; CHECK-NEXT: vsteg %v0, 0(%r2), 0
|
|
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
|
; CHECK-NEXT: br %r14
|
|
store i64 72340172838076673, ptr %Dst
|
|
store i64 72340172838076673, ptr %Dst2
|
|
ret void
|
|
}
|
|
|
|
; Similar, but with vectors.
|
|
define void @fun_4Eltsx4x1i_2Eltsx4x1i(ptr %Dst, ptr %Dst2) {
|
|
; CHECK-LABEL: fun_4Eltsx4x1i_2Eltsx4x1i:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vrepib %v0, 3
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
|
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
|
; CHECK-NEXT: br %r14
|
|
%tmp = insertelement <4 x i32> undef, i32 50529027, i32 0
|
|
%Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
store <4 x i32> %Val, ptr %Dst
|
|
%tmp2 = insertelement <2 x i32> undef, i32 50529027, i32 0
|
|
%Val2 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
|
|
store <2 x i32> %Val2, ptr %Dst2
|
|
ret void
|
|
}
|
|
|
|
; Same, but 64-bit store is scalar.
|
|
define void @fun_4Eltsx4x1i_8x1i(ptr %Dst, ptr %Dst2) {
|
|
; CHECK-LABEL: fun_4Eltsx4x1i_8x1i:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vrepib %v0, 3
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
|
; CHECK-NEXT: vsteg %v0, 0(%r3), 0
|
|
; CHECK-NEXT: br %r14
|
|
%tmp = insertelement <4 x i32> undef, i32 50529027, i32 0
|
|
%Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer
|
|
store <4 x i32> %Val, ptr %Dst
|
|
store i64 217020518514230019, ptr %Dst2
|
|
ret void
|
|
}
|
|
|
|
define void @fun_3Eltsx2x4i(ptr %Dst) {
|
|
; CHECK-LABEL: fun_3Eltsx2x4i:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vrepif %v0, 1
|
|
; CHECK-NEXT: vsteg %v0, 16(%r2), 0
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 4
|
|
; CHECK-NEXT: br %r14
|
|
%tmp = insertelement <3 x i64> undef, i64 4294967297, i32 0
|
|
%Val = shufflevector <3 x i64> %tmp, <3 x i64> undef, <3 x i32> zeroinitializer
|
|
store <3 x i64> %Val, ptr %Dst
|
|
ret void
|
|
}
|
|
|
|
define void @fun_16x1i(ptr %Dst) {
|
|
; CHECK-LABEL: fun_16x1i:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vrepib %v0, 1
|
|
; CHECK-NEXT: vst %v0, 0(%r2), 3
|
|
; CHECK-NEXT: br %r14
|
|
store i128 1334440654591915542993625911497130241, ptr %Dst
|
|
ret void
|
|
}
|