vperm instruction requires the data to be in the Altivec registers, if one of the vector operands is not used after this vperm instruction then it can be substituted by xxperm which doubles the number of available registers. Reviewed By: stefanp Differential Revision: https://reviews.llvm.org/D133700
1751 lines
68 KiB
LLVM
1751 lines
68 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9,CHECK-PWR9-LE
|
|
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9,CHECK-PWR9-BE
|
|
; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-PWR78,CHECK-PWR8 -implicit-check-not vabsdu
|
|
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s -check-prefixes=CHECK,CHECK-PWR78,CHECK-PWR7 -implicit-check-not vmaxsd
|
|
|
|
define <4 x i32> @simple_absv_32(<4 x i32> %a) local_unnamed_addr {
|
|
; CHECK-PWR9-LABEL: simple_absv_32:
|
|
; CHECK-PWR9: # %bb.0: # %entry
|
|
; CHECK-PWR9-NEXT: vnegw v3, v2
|
|
; CHECK-PWR9-NEXT: vmaxsw v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: simple_absv_32:
|
|
; CHECK-PWR78: # %bb.0: # %entry
|
|
; CHECK-PWR78-NEXT: xxlxor v3, v3, v3
|
|
; CHECK-PWR78-NEXT: vsubuwm v3, v3, v2
|
|
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
|
|
; CHECK-PWR78-NEXT: blr
|
|
entry:
|
|
%sub.i = sub <4 x i32> zeroinitializer, %a
|
|
%0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %a, <4 x i32> %sub.i)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define <4 x i32> @simple_absv_32_swap(<4 x i32> %a) local_unnamed_addr {
|
|
; CHECK-PWR9-LABEL: simple_absv_32_swap:
|
|
; CHECK-PWR9: # %bb.0: # %entry
|
|
; CHECK-PWR9-NEXT: vnegw v3, v2
|
|
; CHECK-PWR9-NEXT: vmaxsw v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: simple_absv_32_swap:
|
|
; CHECK-PWR78: # %bb.0: # %entry
|
|
; CHECK-PWR78-NEXT: xxlxor v3, v3, v3
|
|
; CHECK-PWR78-NEXT: vsubuwm v3, v3, v2
|
|
; CHECK-PWR78-NEXT: vmaxsw v2, v3, v2
|
|
; CHECK-PWR78-NEXT: blr
|
|
entry:
|
|
%sub.i = sub <4 x i32> zeroinitializer, %a
|
|
%0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub.i, <4 x i32> %a)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define <8 x i16> @simple_absv_16(<8 x i16> %a) local_unnamed_addr {
|
|
; CHECK-LABEL: simple_absv_16:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xxlxor v3, v3, v3
|
|
; CHECK-NEXT: vsubuhm v3, v3, v2
|
|
; CHECK-NEXT: vmaxsh v2, v2, v3
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%sub.i = sub <8 x i16> zeroinitializer, %a
|
|
%0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %a, <8 x i16> %sub.i)
|
|
ret <8 x i16> %0
|
|
}
|
|
|
|
define <16 x i8> @simple_absv_8(<16 x i8> %a) local_unnamed_addr {
|
|
; CHECK-LABEL: simple_absv_8:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xxlxor v3, v3, v3
|
|
; CHECK-NEXT: vsububm v3, v3, v2
|
|
; CHECK-NEXT: vmaxsb v2, v2, v3
|
|
; CHECK-NEXT: blr
|
|
entry:
|
|
%sub.i = sub <16 x i8> zeroinitializer, %a
|
|
%0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %a, <16 x i8> %sub.i)
|
|
ret <16 x i8> %0
|
|
}
|
|
|
|
; v2i64 vmax isn't avaiable on pwr7
|
|
define <2 x i64> @sub_absv_64(<2 x i64> %a, <2 x i64> %b) local_unnamed_addr {
|
|
; CHECK-PWR9-LABEL: sub_absv_64:
|
|
; CHECK-PWR9: # %bb.0: # %entry
|
|
; CHECK-PWR9-NEXT: vsubudm v2, v2, v3
|
|
; CHECK-PWR9-NEXT: vnegd v3, v2
|
|
; CHECK-PWR9-NEXT: vmaxsd v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR8-LABEL: sub_absv_64:
|
|
; CHECK-PWR8: # %bb.0: # %entry
|
|
; CHECK-PWR8-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR8-NEXT: vsubudm v2, v2, v3
|
|
; CHECK-PWR8-NEXT: vsubudm v3, v4, v2
|
|
; CHECK-PWR8-NEXT: vmaxsd v2, v2, v3
|
|
; CHECK-PWR8-NEXT: blr
|
|
;
|
|
; CHECK-PWR7-LABEL: sub_absv_64:
|
|
; CHECK-PWR7: # %bb.0: # %entry
|
|
; CHECK-PWR7-NEXT: addi r3, r1, -48
|
|
; CHECK-PWR7-NEXT: addi r4, r1, -32
|
|
; CHECK-PWR7-NEXT: stxvd2x v2, 0, r3
|
|
; CHECK-PWR7-NEXT: stxvd2x v3, 0, r4
|
|
; CHECK-PWR7-NEXT: ld r3, -40(r1)
|
|
; CHECK-PWR7-NEXT: ld r4, -24(r1)
|
|
; CHECK-PWR7-NEXT: ld r5, -48(r1)
|
|
; CHECK-PWR7-NEXT: ld r6, -32(r1)
|
|
; CHECK-PWR7-NEXT: sub r3, r3, r4
|
|
; CHECK-PWR7-NEXT: sub r4, r5, r6
|
|
; CHECK-PWR7-NEXT: sradi r5, r3, 63
|
|
; CHECK-PWR7-NEXT: sradi r6, r4, 63
|
|
; CHECK-PWR7-NEXT: xor r3, r3, r5
|
|
; CHECK-PWR7-NEXT: xor r4, r4, r6
|
|
; CHECK-PWR7-NEXT: sub r3, r3, r5
|
|
; CHECK-PWR7-NEXT: sub r4, r4, r6
|
|
; CHECK-PWR7-NEXT: std r3, -8(r1)
|
|
; CHECK-PWR7-NEXT: addi r3, r1, -16
|
|
; CHECK-PWR7-NEXT: std r4, -16(r1)
|
|
; CHECK-PWR7-NEXT: lxvd2x v2, 0, r3
|
|
; CHECK-PWR7-NEXT: blr
|
|
entry:
|
|
%0 = sub nsw <2 x i64> %a, %b
|
|
%1 = icmp sgt <2 x i64> %0, <i64 -1, i64 -1>
|
|
%2 = sub <2 x i64> zeroinitializer, %0
|
|
%3 = select <2 x i1> %1, <2 x i64> %0, <2 x i64> %2
|
|
ret <2 x i64> %3
|
|
}
|
|
|
|
; The select pattern can only be detected for v4i32.
|
|
define <4 x i32> @sub_absv_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr {
|
|
; CHECK-PWR9-LABEL: sub_absv_32:
|
|
; CHECK-PWR9: # %bb.0: # %entry
|
|
; CHECK-PWR9-NEXT: xvnegsp v3, v3
|
|
; CHECK-PWR9-NEXT: xvnegsp v2, v2
|
|
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: sub_absv_32:
|
|
; CHECK-PWR78: # %bb.0: # %entry
|
|
; CHECK-PWR78-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuwm v3, v4, v2
|
|
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
|
|
; CHECK-PWR78-NEXT: blr
|
|
entry:
|
|
%0 = sub nsw <4 x i32> %a, %b
|
|
%1 = icmp sgt <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%2 = sub <4 x i32> zeroinitializer, %0
|
|
%3 = select <4 x i1> %1, <4 x i32> %0, <4 x i32> %2
|
|
ret <4 x i32> %3
|
|
}
|
|
|
|
define <8 x i16> @sub_absv_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
|
|
; CHECK-PWR9-LABEL: sub_absv_16:
|
|
; CHECK-PWR9: # %bb.0: # %entry
|
|
; CHECK-PWR9-NEXT: vsubuhm v2, v2, v3
|
|
; CHECK-PWR9-NEXT: xxlxor v3, v3, v3
|
|
; CHECK-PWR9-NEXT: vsubuhm v3, v3, v2
|
|
; CHECK-PWR9-NEXT: vmaxsh v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: sub_absv_16:
|
|
; CHECK-PWR78: # %bb.0: # %entry
|
|
; CHECK-PWR78-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR78-NEXT: vsubuhm v2, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuhm v3, v4, v2
|
|
; CHECK-PWR78-NEXT: vmaxsh v2, v2, v3
|
|
; CHECK-PWR78-NEXT: blr
|
|
entry:
|
|
%0 = sub nsw <8 x i16> %a, %b
|
|
%1 = icmp sgt <8 x i16> %0, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
|
|
%2 = sub <8 x i16> zeroinitializer, %0
|
|
%3 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> %2
|
|
ret <8 x i16> %3
|
|
}
|
|
|
|
define <16 x i8> @sub_absv_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
|
|
; CHECK-PWR9-LABEL: sub_absv_8:
|
|
; CHECK-PWR9: # %bb.0: # %entry
|
|
; CHECK-PWR9-NEXT: vsububm v2, v2, v3
|
|
; CHECK-PWR9-NEXT: xxlxor v3, v3, v3
|
|
; CHECK-PWR9-NEXT: vsububm v3, v3, v2
|
|
; CHECK-PWR9-NEXT: vmaxsb v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: sub_absv_8:
|
|
; CHECK-PWR78: # %bb.0: # %entry
|
|
; CHECK-PWR78-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR78-NEXT: vsububm v2, v2, v3
|
|
; CHECK-PWR78-NEXT: vsububm v3, v4, v2
|
|
; CHECK-PWR78-NEXT: vmaxsb v2, v2, v3
|
|
; CHECK-PWR78-NEXT: blr
|
|
entry:
|
|
%0 = sub nsw <16 x i8> %a, %b
|
|
%1 = icmp sgt <16 x i8> %0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
|
%2 = sub <16 x i8> zeroinitializer, %0
|
|
%3 = select <16 x i1> %1, <16 x i8> %0, <16 x i8> %2
|
|
ret <16 x i8> %3
|
|
}
|
|
|
|
; FIXME: This does not produce the ISD::ABS that we are looking for.
|
|
; We should fix the missing canonicalization.
|
|
; We do manage to find the word version of ABS but not the halfword.
|
|
; Threfore, we end up doing more work than is required with a pair of abs for word
|
|
; instead of just one for the halfword.
|
|
define <8 x i16> @sub_absv_16_ext(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
|
|
; CHECK-PWR9-LABEL: sub_absv_16_ext:
|
|
; CHECK-PWR9: # %bb.0: # %entry
|
|
; CHECK-PWR9-NEXT: vmrghh v4, v2, v2
|
|
; CHECK-PWR9-NEXT: vmrglh v2, v2, v2
|
|
; CHECK-PWR9-NEXT: vmrghh v5, v3, v3
|
|
; CHECK-PWR9-NEXT: vmrglh v3, v3, v3
|
|
; CHECK-PWR9-NEXT: vextsh2w v2, v2
|
|
; CHECK-PWR9-NEXT: vextsh2w v3, v3
|
|
; CHECK-PWR9-NEXT: vextsh2w v4, v4
|
|
; CHECK-PWR9-NEXT: vextsh2w v5, v5
|
|
; CHECK-PWR9-NEXT: xvnegsp v3, v3
|
|
; CHECK-PWR9-NEXT: xvnegsp v2, v2
|
|
; CHECK-PWR9-NEXT: xvnegsp v4, v4
|
|
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
|
|
; CHECK-PWR9-NEXT: xvnegsp v3, v5
|
|
; CHECK-PWR9-NEXT: vabsduw v3, v4, v3
|
|
; CHECK-PWR9-NEXT: vpkuwum v2, v3, v2
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR8-LABEL: sub_absv_16_ext:
|
|
; CHECK-PWR8: # %bb.0: # %entry
|
|
; CHECK-PWR8-NEXT: vmrglh v5, v2, v2
|
|
; CHECK-PWR8-NEXT: vspltisw v4, 8
|
|
; CHECK-PWR8-NEXT: vmrghh v2, v2, v2
|
|
; CHECK-PWR8-NEXT: vmrglh v0, v3, v3
|
|
; CHECK-PWR8-NEXT: vmrghh v3, v3, v3
|
|
; CHECK-PWR8-NEXT: vadduwm v4, v4, v4
|
|
; CHECK-PWR8-NEXT: vslw v5, v5, v4
|
|
; CHECK-PWR8-NEXT: vslw v2, v2, v4
|
|
; CHECK-PWR8-NEXT: vslw v0, v0, v4
|
|
; CHECK-PWR8-NEXT: vslw v3, v3, v4
|
|
; CHECK-PWR8-NEXT: vsraw v5, v5, v4
|
|
; CHECK-PWR8-NEXT: vsraw v2, v2, v4
|
|
; CHECK-PWR8-NEXT: vsraw v0, v0, v4
|
|
; CHECK-PWR8-NEXT: vsraw v3, v3, v4
|
|
; CHECK-PWR8-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR8-NEXT: vsubuwm v5, v5, v0
|
|
; CHECK-PWR8-NEXT: vsubuwm v2, v2, v3
|
|
; CHECK-PWR8-NEXT: vsubuwm v3, v4, v5
|
|
; CHECK-PWR8-NEXT: vsubuwm v4, v4, v2
|
|
; CHECK-PWR8-NEXT: vmaxsw v3, v5, v3
|
|
; CHECK-PWR8-NEXT: vmaxsw v2, v2, v4
|
|
; CHECK-PWR8-NEXT: vpkuwum v2, v2, v3
|
|
; CHECK-PWR8-NEXT: blr
|
|
;
|
|
; CHECK-PWR7-LABEL: sub_absv_16_ext:
|
|
; CHECK-PWR7: # %bb.0: # %entry
|
|
; CHECK-PWR7-NEXT: vmrglh v5, v2, v2
|
|
; CHECK-PWR7-NEXT: vmrghh v2, v2, v2
|
|
; CHECK-PWR7-NEXT: vmrglh v0, v3, v3
|
|
; CHECK-PWR7-NEXT: vmrghh v3, v3, v3
|
|
; CHECK-PWR7-NEXT: vspltisw v4, 8
|
|
; CHECK-PWR7-NEXT: vadduwm v4, v4, v4
|
|
; CHECK-PWR7-NEXT: vslw v5, v5, v4
|
|
; CHECK-PWR7-NEXT: vslw v2, v2, v4
|
|
; CHECK-PWR7-NEXT: vslw v0, v0, v4
|
|
; CHECK-PWR7-NEXT: vslw v3, v3, v4
|
|
; CHECK-PWR7-NEXT: vsraw v5, v5, v4
|
|
; CHECK-PWR7-NEXT: vsraw v2, v2, v4
|
|
; CHECK-PWR7-NEXT: vsraw v0, v0, v4
|
|
; CHECK-PWR7-NEXT: vsraw v3, v3, v4
|
|
; CHECK-PWR7-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR7-NEXT: vsubuwm v5, v5, v0
|
|
; CHECK-PWR7-NEXT: vsubuwm v2, v2, v3
|
|
; CHECK-PWR7-NEXT: vsubuwm v3, v4, v5
|
|
; CHECK-PWR7-NEXT: vsubuwm v4, v4, v2
|
|
; CHECK-PWR7-NEXT: vmaxsw v3, v5, v3
|
|
; CHECK-PWR7-NEXT: vmaxsw v2, v2, v4
|
|
; CHECK-PWR7-NEXT: vpkuwum v2, v2, v3
|
|
; CHECK-PWR7-NEXT: blr
|
|
entry:
|
|
%0 = sext <8 x i16> %a to <8 x i32>
|
|
%1 = sext <8 x i16> %b to <8 x i32>
|
|
%2 = sub nsw <8 x i32> %0, %1
|
|
%3 = icmp sgt <8 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
|
|
%4 = sub nsw <8 x i32> zeroinitializer, %2
|
|
%5 = select <8 x i1> %3, <8 x i32> %2, <8 x i32> %4
|
|
%6 = trunc <8 x i32> %5 to <8 x i16>
|
|
ret <8 x i16> %6
|
|
}
|
|
|
|
; FIXME: This does not produce ISD::ABS. This does not even vectorize correctly!
|
|
; This function should look like sub_absv_32 and sub_absv_16 except that the type is v16i8.
|
|
; Function Attrs: norecurse nounwind readnone
|
|
define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
|
|
; CHECK-PWR9-LE-LABEL: sub_absv_8_ext:
|
|
; CHECK-PWR9-LE: # %bb.0: # %entry
|
|
; CHECK-PWR9-LE-NEXT: li r3, 0
|
|
; CHECK-PWR9-LE-NEXT: li r5, 2
|
|
; CHECK-PWR9-LE-NEXT: li r4, 1
|
|
; CHECK-PWR9-LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r6, r3, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r3, r3, v3
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r8, r5, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r5, r5, v3
|
|
; CHECK-PWR9-LE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR9-LE-NEXT: std r28, -32(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR9-LE-NEXT: std r27, -40(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR9-LE-NEXT: std r26, -48(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR9-LE-NEXT: std r25, -56(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r6, r6, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r3, r3, 24
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r7, r4, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r4, r4, v3
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r8, r8, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r3, r6, r3
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r5, r5, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r7, r7, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r4, r4, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r5, r8, r5
|
|
; CHECK-PWR9-LE-NEXT: sub r4, r7, r4
|
|
; CHECK-PWR9-LE-NEXT: srawi r6, r3, 31
|
|
; CHECK-PWR9-LE-NEXT: srawi r7, r4, 31
|
|
; CHECK-PWR9-LE-NEXT: xor r3, r3, r6
|
|
; CHECK-PWR9-LE-NEXT: xor r4, r4, r7
|
|
; CHECK-PWR9-LE-NEXT: sub r6, r3, r6
|
|
; CHECK-PWR9-LE-NEXT: srawi r3, r5, 31
|
|
; CHECK-PWR9-LE-NEXT: sub r4, r4, r7
|
|
; CHECK-PWR9-LE-NEXT: xor r5, r5, r3
|
|
; CHECK-PWR9-LE-NEXT: sub r3, r5, r3
|
|
; CHECK-PWR9-LE-NEXT: li r5, 3
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r7, r5, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r5, r5, v3
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r7, r7, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r5, r5, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r5, r7, r5
|
|
; CHECK-PWR9-LE-NEXT: srawi r7, r5, 31
|
|
; CHECK-PWR9-LE-NEXT: xor r5, r5, r7
|
|
; CHECK-PWR9-LE-NEXT: sub r5, r5, r7
|
|
; CHECK-PWR9-LE-NEXT: li r7, 4
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r8, r7, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r7, r7, v3
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v4, r5
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r8, r8, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r7, r7, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r7, r8, r7
|
|
; CHECK-PWR9-LE-NEXT: srawi r8, r7, 31
|
|
; CHECK-PWR9-LE-NEXT: xor r7, r7, r8
|
|
; CHECK-PWR9-LE-NEXT: sub r7, r7, r8
|
|
; CHECK-PWR9-LE-NEXT: li r8, 5
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r9, r8, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r8, r8, v3
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r9, r9, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r8, r8, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r8, r9, r8
|
|
; CHECK-PWR9-LE-NEXT: srawi r9, r8, 31
|
|
; CHECK-PWR9-LE-NEXT: xor r8, r8, r9
|
|
; CHECK-PWR9-LE-NEXT: sub r8, r8, r9
|
|
; CHECK-PWR9-LE-NEXT: li r9, 6
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r10, r9, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r9, r9, v3
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r10, r10, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r9, r9, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r9, r10, r9
|
|
; CHECK-PWR9-LE-NEXT: srawi r10, r9, 31
|
|
; CHECK-PWR9-LE-NEXT: xor r9, r9, r10
|
|
; CHECK-PWR9-LE-NEXT: sub r9, r9, r10
|
|
; CHECK-PWR9-LE-NEXT: li r10, 7
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r11, r10, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r10, r10, v3
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r11, r11, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r10, r10, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r10, r11, r10
|
|
; CHECK-PWR9-LE-NEXT: srawi r11, r10, 31
|
|
; CHECK-PWR9-LE-NEXT: xor r10, r10, r11
|
|
; CHECK-PWR9-LE-NEXT: sub r10, r10, r11
|
|
; CHECK-PWR9-LE-NEXT: li r11, 8
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r12, r11, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r11, r11, v3
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v5, r10
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r12, r12, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r11, r11, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r11, r12, r11
|
|
; CHECK-PWR9-LE-NEXT: srawi r12, r11, 31
|
|
; CHECK-PWR9-LE-NEXT: xor r11, r11, r12
|
|
; CHECK-PWR9-LE-NEXT: sub r11, r11, r12
|
|
; CHECK-PWR9-LE-NEXT: li r12, 9
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r0, r12, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r12, r12, v3
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r0, r0, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r12, r12, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r12, r0, r12
|
|
; CHECK-PWR9-LE-NEXT: srawi r0, r12, 31
|
|
; CHECK-PWR9-LE-NEXT: xor r12, r12, r0
|
|
; CHECK-PWR9-LE-NEXT: sub r12, r12, r0
|
|
; CHECK-PWR9-LE-NEXT: li r0, 10
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r30, r0, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r0, r0, v3
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r30, r30, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r0, r0, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r0, r30, r0
|
|
; CHECK-PWR9-LE-NEXT: srawi r30, r0, 31
|
|
; CHECK-PWR9-LE-NEXT: xor r0, r0, r30
|
|
; CHECK-PWR9-LE-NEXT: sub r0, r0, r30
|
|
; CHECK-PWR9-LE-NEXT: li r30, 11
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r29, r30, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r30, r30, v3
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r29, r29, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r30, r30, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r30, r29, r30
|
|
; CHECK-PWR9-LE-NEXT: srawi r29, r30, 31
|
|
; CHECK-PWR9-LE-NEXT: xor r30, r30, r29
|
|
; CHECK-PWR9-LE-NEXT: sub r30, r30, r29
|
|
; CHECK-PWR9-LE-NEXT: li r29, 12
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r28, r29, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r29, r29, v3
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r28, r28, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r29, r29, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r29, r28, r29
|
|
; CHECK-PWR9-LE-NEXT: srawi r28, r29, 31
|
|
; CHECK-PWR9-LE-NEXT: xor r29, r29, r28
|
|
; CHECK-PWR9-LE-NEXT: sub r29, r29, r28
|
|
; CHECK-PWR9-LE-NEXT: li r28, 13
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r27, r28, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r28, r28, v3
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r27, r27, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r28, r28, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r28, r27, r28
|
|
; CHECK-PWR9-LE-NEXT: srawi r27, r28, 31
|
|
; CHECK-PWR9-LE-NEXT: xor r28, r28, r27
|
|
; CHECK-PWR9-LE-NEXT: sub r28, r28, r27
|
|
; CHECK-PWR9-LE-NEXT: li r27, 14
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r26, r27, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r27, r27, v3
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r26, r26, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r27, r27, 24
|
|
; CHECK-PWR9-LE-NEXT: sub r27, r26, r27
|
|
; CHECK-PWR9-LE-NEXT: srawi r26, r27, 31
|
|
; CHECK-PWR9-LE-NEXT: xor r27, r27, r26
|
|
; CHECK-PWR9-LE-NEXT: sub r27, r27, r26
|
|
; CHECK-PWR9-LE-NEXT: li r26, 15
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r25, r26, v2
|
|
; CHECK-PWR9-LE-NEXT: vextubrx r26, r26, v3
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v2, r6
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v3, r4
|
|
; CHECK-PWR9-LE-NEXT: vmrghb v2, v3, v2
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v3, r3
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r25, r25, 24
|
|
; CHECK-PWR9-LE-NEXT: clrlwi r26, r26, 24
|
|
; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v4, r8
|
|
; CHECK-PWR9-LE-NEXT: sub r26, r25, r26
|
|
; CHECK-PWR9-LE-NEXT: vmrglh v2, v3, v2
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v3, r7
|
|
; CHECK-PWR9-LE-NEXT: srawi r25, r26, 31
|
|
; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v4, r9
|
|
; CHECK-PWR9-LE-NEXT: xor r26, r26, r25
|
|
; CHECK-PWR9-LE-NEXT: vmrghb v4, v5, v4
|
|
; CHECK-PWR9-LE-NEXT: sub r26, r26, r25
|
|
; CHECK-PWR9-LE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v5, r26
|
|
; CHECK-PWR9-LE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR9-LE-NEXT: vmrglh v3, v4, v3
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v4, r30
|
|
; CHECK-PWR9-LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR9-LE-NEXT: xxmrglw vs0, v3, v2
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v2, r11
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v3, r12
|
|
; CHECK-PWR9-LE-NEXT: vmrghb v2, v3, v2
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v3, r0
|
|
; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v4, r28
|
|
; CHECK-PWR9-LE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR9-LE-NEXT: vmrglh v2, v3, v2
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v3, r29
|
|
; CHECK-PWR9-LE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3
|
|
; CHECK-PWR9-LE-NEXT: mtvsrd v4, r27
|
|
; CHECK-PWR9-LE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR9-LE-NEXT: vmrghb v4, v5, v4
|
|
; CHECK-PWR9-LE-NEXT: vmrglh v3, v4, v3
|
|
; CHECK-PWR9-LE-NEXT: xxmrglw vs1, v3, v2
|
|
; CHECK-PWR9-LE-NEXT: xxmrgld v2, vs1, vs0
|
|
; CHECK-PWR9-LE-NEXT: blr
|
|
;
|
|
; CHECK-PWR9-BE-LABEL: sub_absv_8_ext:
|
|
; CHECK-PWR9-BE: # %bb.0: # %entry
|
|
; CHECK-PWR9-BE-NEXT: li r3, 0
|
|
; CHECK-PWR9-BE-NEXT: li r4, 1
|
|
; CHECK-PWR9-BE-NEXT: li r5, 2
|
|
; CHECK-PWR9-BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR9-BE-NEXT: vextublx r6, r3, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r3, r3, v3
|
|
; CHECK-PWR9-BE-NEXT: vextublx r7, r4, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r4, r4, v3
|
|
; CHECK-PWR9-BE-NEXT: std r29, -24(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR9-BE-NEXT: std r28, -32(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR9-BE-NEXT: std r27, -40(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR9-BE-NEXT: std r26, -48(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR9-BE-NEXT: std r25, -56(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r6, r6, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r3, r3, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r7, r7, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r4, r4, 24
|
|
; CHECK-PWR9-BE-NEXT: vextublx r8, r5, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r5, r5, v3
|
|
; CHECK-PWR9-BE-NEXT: sub r3, r6, r3
|
|
; CHECK-PWR9-BE-NEXT: sub r4, r7, r4
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r8, r8, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r5, r5, 24
|
|
; CHECK-PWR9-BE-NEXT: sub r5, r8, r5
|
|
; CHECK-PWR9-BE-NEXT: srawi r6, r3, 31
|
|
; CHECK-PWR9-BE-NEXT: srawi r7, r4, 31
|
|
; CHECK-PWR9-BE-NEXT: srawi r8, r5, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r3, r3, r6
|
|
; CHECK-PWR9-BE-NEXT: xor r4, r4, r7
|
|
; CHECK-PWR9-BE-NEXT: xor r5, r5, r8
|
|
; CHECK-PWR9-BE-NEXT: sub r3, r3, r6
|
|
; CHECK-PWR9-BE-NEXT: li r6, 3
|
|
; CHECK-PWR9-BE-NEXT: sub r4, r4, r7
|
|
; CHECK-PWR9-BE-NEXT: sub r5, r5, r8
|
|
; CHECK-PWR9-BE-NEXT: vextublx r7, r6, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r6, r6, v3
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r7, r7, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r6, r6, 24
|
|
; CHECK-PWR9-BE-NEXT: sub r6, r7, r6
|
|
; CHECK-PWR9-BE-NEXT: srawi r7, r6, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r6, r6, r7
|
|
; CHECK-PWR9-BE-NEXT: sub r6, r6, r7
|
|
; CHECK-PWR9-BE-NEXT: li r7, 4
|
|
; CHECK-PWR9-BE-NEXT: vextublx r8, r7, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r7, r7, v3
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r8, r8, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r7, r7, 24
|
|
; CHECK-PWR9-BE-NEXT: sub r7, r8, r7
|
|
; CHECK-PWR9-BE-NEXT: srawi r8, r7, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r7, r7, r8
|
|
; CHECK-PWR9-BE-NEXT: sub r7, r7, r8
|
|
; CHECK-PWR9-BE-NEXT: li r8, 5
|
|
; CHECK-PWR9-BE-NEXT: vextublx r9, r8, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r8, r8, v3
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r9, r9, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r8, r8, 24
|
|
; CHECK-PWR9-BE-NEXT: sub r8, r9, r8
|
|
; CHECK-PWR9-BE-NEXT: srawi r9, r8, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r8, r8, r9
|
|
; CHECK-PWR9-BE-NEXT: sub r8, r8, r9
|
|
; CHECK-PWR9-BE-NEXT: li r9, 6
|
|
; CHECK-PWR9-BE-NEXT: vextublx r10, r9, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r9, r9, v3
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r10, r10, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r9, r9, 24
|
|
; CHECK-PWR9-BE-NEXT: sub r9, r10, r9
|
|
; CHECK-PWR9-BE-NEXT: srawi r10, r9, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r9, r9, r10
|
|
; CHECK-PWR9-BE-NEXT: sub r9, r9, r10
|
|
; CHECK-PWR9-BE-NEXT: li r10, 7
|
|
; CHECK-PWR9-BE-NEXT: vextublx r11, r10, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r10, r10, v3
|
|
; CHECK-PWR9-BE-NEXT: mtfprwz f2, r9
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r11, r11, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r10, r10, 24
|
|
; CHECK-PWR9-BE-NEXT: sub r10, r11, r10
|
|
; CHECK-PWR9-BE-NEXT: srawi r11, r10, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r10, r10, r11
|
|
; CHECK-PWR9-BE-NEXT: sub r10, r10, r11
|
|
; CHECK-PWR9-BE-NEXT: li r11, 8
|
|
; CHECK-PWR9-BE-NEXT: vextublx r12, r11, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r11, r11, v3
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r12, r12, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r11, r11, 24
|
|
; CHECK-PWR9-BE-NEXT: sub r11, r12, r11
|
|
; CHECK-PWR9-BE-NEXT: srawi r12, r11, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r11, r11, r12
|
|
; CHECK-PWR9-BE-NEXT: sub r11, r11, r12
|
|
; CHECK-PWR9-BE-NEXT: li r12, 9
|
|
; CHECK-PWR9-BE-NEXT: vextublx r0, r12, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r12, r12, v3
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r0, r0, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r12, r12, 24
|
|
; CHECK-PWR9-BE-NEXT: sub r12, r0, r12
|
|
; CHECK-PWR9-BE-NEXT: srawi r0, r12, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r12, r12, r0
|
|
; CHECK-PWR9-BE-NEXT: sub r12, r12, r0
|
|
; CHECK-PWR9-BE-NEXT: li r0, 10
|
|
; CHECK-PWR9-BE-NEXT: vextublx r30, r0, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r0, r0, v3
|
|
; CHECK-PWR9-BE-NEXT: mtvsrwz v4, r12
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r30, r30, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r0, r0, 24
|
|
; CHECK-PWR9-BE-NEXT: sub r0, r30, r0
|
|
; CHECK-PWR9-BE-NEXT: srawi r30, r0, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r0, r0, r30
|
|
; CHECK-PWR9-BE-NEXT: sub r0, r0, r30
|
|
; CHECK-PWR9-BE-NEXT: li r30, 11
|
|
; CHECK-PWR9-BE-NEXT: vextublx r29, r30, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r30, r30, v3
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r29, r29, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r30, r30, 24
|
|
; CHECK-PWR9-BE-NEXT: sub r30, r29, r30
|
|
; CHECK-PWR9-BE-NEXT: srawi r29, r30, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r30, r30, r29
|
|
; CHECK-PWR9-BE-NEXT: sub r30, r30, r29
|
|
; CHECK-PWR9-BE-NEXT: li r29, 12
|
|
; CHECK-PWR9-BE-NEXT: vextublx r28, r29, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r29, r29, v3
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r28, r28, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r29, r29, 24
|
|
; CHECK-PWR9-BE-NEXT: sub r29, r28, r29
|
|
; CHECK-PWR9-BE-NEXT: srawi r28, r29, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r29, r29, r28
|
|
; CHECK-PWR9-BE-NEXT: sub r29, r29, r28
|
|
; CHECK-PWR9-BE-NEXT: li r28, 13
|
|
; CHECK-PWR9-BE-NEXT: vextublx r27, r28, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r28, r28, v3
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r27, r27, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r28, r28, 24
|
|
; CHECK-PWR9-BE-NEXT: sub r28, r27, r28
|
|
; CHECK-PWR9-BE-NEXT: srawi r27, r28, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r28, r28, r27
|
|
; CHECK-PWR9-BE-NEXT: sub r28, r28, r27
|
|
; CHECK-PWR9-BE-NEXT: li r27, 14
|
|
; CHECK-PWR9-BE-NEXT: vextublx r26, r27, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r27, r27, v3
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r26, r26, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r27, r27, 24
|
|
; CHECK-PWR9-BE-NEXT: sub r27, r26, r27
|
|
; CHECK-PWR9-BE-NEXT: srawi r26, r27, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r27, r27, r26
|
|
; CHECK-PWR9-BE-NEXT: sub r27, r27, r26
|
|
; CHECK-PWR9-BE-NEXT: li r26, 15
|
|
; CHECK-PWR9-BE-NEXT: vextublx r25, r26, v2
|
|
; CHECK-PWR9-BE-NEXT: vextublx r26, r26, v3
|
|
; CHECK-PWR9-BE-NEXT: mtfprwz f0, r27
|
|
; CHECK-PWR9-BE-NEXT: addis r27, r2, .LCPI9_0@toc@ha
|
|
; CHECK-PWR9-BE-NEXT: mtvsrwz v3, r28
|
|
; CHECK-PWR9-BE-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR9-BE-NEXT: addi r27, r27, .LCPI9_0@toc@l
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r25, r25, 24
|
|
; CHECK-PWR9-BE-NEXT: clrlwi r26, r26, 24
|
|
; CHECK-PWR9-BE-NEXT: lxv vs1, 0(r27)
|
|
; CHECK-PWR9-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR9-BE-NEXT: sub r26, r25, r26
|
|
; CHECK-PWR9-BE-NEXT: srawi r25, r26, 31
|
|
; CHECK-PWR9-BE-NEXT: xor r26, r26, r25
|
|
; CHECK-PWR9-BE-NEXT: sub r26, r26, r25
|
|
; CHECK-PWR9-BE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR9-BE-NEXT: mtvsrwz v2, r26
|
|
; CHECK-PWR9-BE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR9-BE-NEXT: xxperm v2, vs0, vs1
|
|
; CHECK-PWR9-BE-NEXT: mtfprwz f0, r29
|
|
; CHECK-PWR9-BE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR9-BE-NEXT: xxperm v3, vs0, vs1
|
|
; CHECK-PWR9-BE-NEXT: mtfprwz f0, r0
|
|
; CHECK-PWR9-BE-NEXT: vmrghh v2, v3, v2
|
|
; CHECK-PWR9-BE-NEXT: mtvsrwz v3, r30
|
|
; CHECK-PWR9-BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR9-BE-NEXT: xxperm v3, vs0, vs1
|
|
; CHECK-PWR9-BE-NEXT: mtfprwz f0, r11
|
|
; CHECK-PWR9-BE-NEXT: xxperm v4, vs0, vs1
|
|
; CHECK-PWR9-BE-NEXT: vmrghh v3, v4, v3
|
|
; CHECK-PWR9-BE-NEXT: mtvsrwz v4, r4
|
|
; CHECK-PWR9-BE-NEXT: xxmrghw vs0, v3, v2
|
|
; CHECK-PWR9-BE-NEXT: mtvsrwz v2, r10
|
|
; CHECK-PWR9-BE-NEXT: mtvsrwz v3, r8
|
|
; CHECK-PWR9-BE-NEXT: xxperm v2, vs2, vs1
|
|
; CHECK-PWR9-BE-NEXT: mtfprwz f2, r7
|
|
; CHECK-PWR9-BE-NEXT: xxperm v3, vs2, vs1
|
|
; CHECK-PWR9-BE-NEXT: mtfprwz f2, r5
|
|
; CHECK-PWR9-BE-NEXT: vmrghh v2, v3, v2
|
|
; CHECK-PWR9-BE-NEXT: mtvsrwz v3, r6
|
|
; CHECK-PWR9-BE-NEXT: xxperm v3, vs2, vs1
|
|
; CHECK-PWR9-BE-NEXT: mtfprwz f2, r3
|
|
; CHECK-PWR9-BE-NEXT: xxperm v4, vs2, vs1
|
|
; CHECK-PWR9-BE-NEXT: vmrghh v3, v4, v3
|
|
; CHECK-PWR9-BE-NEXT: xxmrghw vs1, v3, v2
|
|
; CHECK-PWR9-BE-NEXT: xxmrghd v2, vs1, vs0
|
|
; CHECK-PWR9-BE-NEXT: blr
|
|
;
|
|
; CHECK-PWR8-LABEL: sub_absv_8_ext:
|
|
; CHECK-PWR8: # %bb.0: # %entry
|
|
; CHECK-PWR8-NEXT: xxswapd vs0, v2
|
|
; CHECK-PWR8-NEXT: mfvsrd r5, v2
|
|
; CHECK-PWR8-NEXT: std r26, -48(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: std r25, -56(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: std r27, -40(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: mfvsrd r6, v3
|
|
; CHECK-PWR8-NEXT: xxswapd vs1, v3
|
|
; CHECK-PWR8-NEXT: clrldi r3, r5, 56
|
|
; CHECK-PWR8-NEXT: rldicl r7, r5, 56, 56
|
|
; CHECK-PWR8-NEXT: clrldi r4, r6, 56
|
|
; CHECK-PWR8-NEXT: rldicl r8, r6, 56, 56
|
|
; CHECK-PWR8-NEXT: mffprd r26, f0
|
|
; CHECK-PWR8-NEXT: clrlwi r3, r3, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r7, r7, 24
|
|
; CHECK-PWR8-NEXT: std r28, -32(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: std r29, -24(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: rldicl r11, r5, 40, 56
|
|
; CHECK-PWR8-NEXT: rldicl r12, r6, 40, 56
|
|
; CHECK-PWR8-NEXT: clrlwi r4, r4, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r8, r8, 24
|
|
; CHECK-PWR8-NEXT: rldicl r9, r5, 48, 56
|
|
; CHECK-PWR8-NEXT: rldicl r10, r6, 48, 56
|
|
; CHECK-PWR8-NEXT: sub r4, r3, r4
|
|
; CHECK-PWR8-NEXT: clrlwi r11, r11, 24
|
|
; CHECK-PWR8-NEXT: rldicl r3, r26, 16, 56
|
|
; CHECK-PWR8-NEXT: clrlwi r12, r12, 24
|
|
; CHECK-PWR8-NEXT: sub r7, r7, r8
|
|
; CHECK-PWR8-NEXT: clrlwi r9, r9, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r10, r10, 24
|
|
; CHECK-PWR8-NEXT: std r24, -64(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: mffprd r24, f1
|
|
; CHECK-PWR8-NEXT: rldicl r0, r5, 32, 56
|
|
; CHECK-PWR8-NEXT: rldicl r30, r6, 32, 56
|
|
; CHECK-PWR8-NEXT: std r3, -160(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: sub r11, r11, r12
|
|
; CHECK-PWR8-NEXT: sub r9, r9, r10
|
|
; CHECK-PWR8-NEXT: srawi r3, r4, 31
|
|
; CHECK-PWR8-NEXT: srawi r12, r7, 31
|
|
; CHECK-PWR8-NEXT: clrlwi r10, r0, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r0, r30, 24
|
|
; CHECK-PWR8-NEXT: xor r4, r4, r3
|
|
; CHECK-PWR8-NEXT: xor r7, r7, r12
|
|
; CHECK-PWR8-NEXT: sub r10, r10, r0
|
|
; CHECK-PWR8-NEXT: std r20, -96(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: std r21, -88(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: sub r3, r4, r3
|
|
; CHECK-PWR8-NEXT: srawi r4, r9, 31
|
|
; CHECK-PWR8-NEXT: sub r7, r7, r12
|
|
; CHECK-PWR8-NEXT: std r22, -80(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: rldicl r29, r5, 24, 56
|
|
; CHECK-PWR8-NEXT: rldicl r28, r6, 24, 56
|
|
; CHECK-PWR8-NEXT: xor r9, r9, r4
|
|
; CHECK-PWR8-NEXT: mtvsrd v3, r7
|
|
; CHECK-PWR8-NEXT: rldicl r27, r5, 16, 56
|
|
; CHECK-PWR8-NEXT: rldicl r25, r6, 16, 56
|
|
; CHECK-PWR8-NEXT: clrlwi r30, r29, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r29, r28, 24
|
|
; CHECK-PWR8-NEXT: mtvsrd v2, r3
|
|
; CHECK-PWR8-NEXT: sub r4, r9, r4
|
|
; CHECK-PWR8-NEXT: srawi r7, r10, 31
|
|
; CHECK-PWR8-NEXT: srawi r3, r11, 31
|
|
; CHECK-PWR8-NEXT: clrlwi r9, r27, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r12, r25, 24
|
|
; CHECK-PWR8-NEXT: sub r0, r30, r29
|
|
; CHECK-PWR8-NEXT: mtvsrd v4, r4
|
|
; CHECK-PWR8-NEXT: std r23, -72(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: xor r10, r10, r7
|
|
; CHECK-PWR8-NEXT: xor r11, r11, r3
|
|
; CHECK-PWR8-NEXT: sub r9, r9, r12
|
|
; CHECK-PWR8-NEXT: std r18, -112(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: std r19, -104(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: vmrghb v2, v3, v2
|
|
; CHECK-PWR8-NEXT: sub r7, r10, r7
|
|
; CHECK-PWR8-NEXT: rldicl r5, r5, 8, 56
|
|
; CHECK-PWR8-NEXT: sub r3, r11, r3
|
|
; CHECK-PWR8-NEXT: rldicl r6, r6, 8, 56
|
|
; CHECK-PWR8-NEXT: srawi r4, r0, 31
|
|
; CHECK-PWR8-NEXT: mtvsrd v0, r7
|
|
; CHECK-PWR8-NEXT: std r16, -128(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: std r17, -120(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: srawi r7, r9, 31
|
|
; CHECK-PWR8-NEXT: clrldi r23, r26, 56
|
|
; CHECK-PWR8-NEXT: mtvsrd v5, r3
|
|
; CHECK-PWR8-NEXT: clrlwi r3, r5, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r5, r6, 24
|
|
; CHECK-PWR8-NEXT: clrldi r22, r24, 56
|
|
; CHECK-PWR8-NEXT: rldicl r21, r26, 56, 56
|
|
; CHECK-PWR8-NEXT: xor r10, r0, r4
|
|
; CHECK-PWR8-NEXT: xor r9, r9, r7
|
|
; CHECK-PWR8-NEXT: rldicl r20, r24, 56, 56
|
|
; CHECK-PWR8-NEXT: rldicl r19, r26, 48, 56
|
|
; CHECK-PWR8-NEXT: sub r3, r3, r5
|
|
; CHECK-PWR8-NEXT: sub r4, r10, r4
|
|
; CHECK-PWR8-NEXT: sub r7, r9, r7
|
|
; CHECK-PWR8-NEXT: clrlwi r9, r23, 24
|
|
; CHECK-PWR8-NEXT: rldicl r18, r24, 48, 56
|
|
; CHECK-PWR8-NEXT: clrlwi r10, r22, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r11, r21, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r12, r20, 24
|
|
; CHECK-PWR8-NEXT: mtvsrd v1, r4
|
|
; CHECK-PWR8-NEXT: std r14, -144(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: std r15, -136(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: rldicl r17, r26, 40, 56
|
|
; CHECK-PWR8-NEXT: rldicl r16, r24, 40, 56
|
|
; CHECK-PWR8-NEXT: sub r9, r9, r10
|
|
; CHECK-PWR8-NEXT: sub r10, r11, r12
|
|
; CHECK-PWR8-NEXT: mtvsrd v3, r7
|
|
; CHECK-PWR8-NEXT: srawi r4, r3, 31
|
|
; CHECK-PWR8-NEXT: clrlwi r11, r19, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r12, r18, 24
|
|
; CHECK-PWR8-NEXT: vmrghb v4, v5, v4
|
|
; CHECK-PWR8-NEXT: std r31, -8(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: xor r3, r3, r4
|
|
; CHECK-PWR8-NEXT: sub r7, r11, r12
|
|
; CHECK-PWR8-NEXT: clrlwi r11, r17, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r12, r16, 24
|
|
; CHECK-PWR8-NEXT: vmrghb v0, v1, v0
|
|
; CHECK-PWR8-NEXT: std r2, -152(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR8-NEXT: rldicl r15, r26, 32, 56
|
|
; CHECK-PWR8-NEXT: rldicl r14, r24, 32, 56
|
|
; CHECK-PWR8-NEXT: sub r3, r3, r4
|
|
; CHECK-PWR8-NEXT: sub r11, r11, r12
|
|
; CHECK-PWR8-NEXT: srawi r4, r9, 31
|
|
; CHECK-PWR8-NEXT: srawi r12, r10, 31
|
|
; CHECK-PWR8-NEXT: clrlwi r0, r15, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r30, r14, 24
|
|
; CHECK-PWR8-NEXT: mtvsrd v5, r3
|
|
; CHECK-PWR8-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: xor r9, r9, r4
|
|
; CHECK-PWR8-NEXT: xor r10, r10, r12
|
|
; CHECK-PWR8-NEXT: sub r3, r0, r30
|
|
; CHECK-PWR8-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: ld r23, -72(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: ld r22, -80(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: srawi r28, r11, 31
|
|
; CHECK-PWR8-NEXT: sub r4, r9, r4
|
|
; CHECK-PWR8-NEXT: sub r10, r10, r12
|
|
; CHECK-PWR8-NEXT: vmrghb v3, v5, v3
|
|
; CHECK-PWR8-NEXT: ld r21, -88(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: ld r20, -96(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: srawi r29, r7, 31
|
|
; CHECK-PWR8-NEXT: srawi r9, r3, 31
|
|
; CHECK-PWR8-NEXT: mtvsrd v5, r4
|
|
; CHECK-PWR8-NEXT: xor r4, r11, r28
|
|
; CHECK-PWR8-NEXT: ld r19, -104(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: ld r18, -112(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: mtvsrd v1, r10
|
|
; CHECK-PWR8-NEXT: ld r10, -160(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: rldicl r31, r26, 24, 56
|
|
; CHECK-PWR8-NEXT: rldicl r2, r24, 24, 56
|
|
; CHECK-PWR8-NEXT: xor r7, r7, r29
|
|
; CHECK-PWR8-NEXT: xor r3, r3, r9
|
|
; CHECK-PWR8-NEXT: rldicl r8, r24, 16, 56
|
|
; CHECK-PWR8-NEXT: rldicl r6, r26, 8, 56
|
|
; CHECK-PWR8-NEXT: sub r4, r4, r28
|
|
; CHECK-PWR8-NEXT: clrlwi r0, r31, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r30, r2, 24
|
|
; CHECK-PWR8-NEXT: sub r7, r7, r29
|
|
; CHECK-PWR8-NEXT: rldicl r5, r24, 8, 56
|
|
; CHECK-PWR8-NEXT: clrlwi r10, r10, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r8, r8, 24
|
|
; CHECK-PWR8-NEXT: sub r3, r3, r9
|
|
; CHECK-PWR8-NEXT: mtvsrd v7, r4
|
|
; CHECK-PWR8-NEXT: clrlwi r4, r6, 24
|
|
; CHECK-PWR8-NEXT: clrlwi r5, r5, 24
|
|
; CHECK-PWR8-NEXT: sub r0, r0, r30
|
|
; CHECK-PWR8-NEXT: mtvsrd v6, r7
|
|
; CHECK-PWR8-NEXT: sub r7, r10, r8
|
|
; CHECK-PWR8-NEXT: ld r2, -152(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: ld r31, -8(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: mtvsrd v8, r3
|
|
; CHECK-PWR8-NEXT: sub r3, r4, r5
|
|
; CHECK-PWR8-NEXT: srawi r12, r0, 31
|
|
; CHECK-PWR8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: srawi r6, r7, 31
|
|
; CHECK-PWR8-NEXT: srawi r5, r3, 31
|
|
; CHECK-PWR8-NEXT: xor r8, r0, r12
|
|
; CHECK-PWR8-NEXT: vmrghb v5, v1, v5
|
|
; CHECK-PWR8-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: ld r24, -64(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: xor r4, r7, r6
|
|
; CHECK-PWR8-NEXT: xor r3, r3, r5
|
|
; CHECK-PWR8-NEXT: sub r8, r8, r12
|
|
; CHECK-PWR8-NEXT: vmrghb v6, v7, v6
|
|
; CHECK-PWR8-NEXT: ld r17, -120(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: ld r16, -128(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: sub r4, r4, r6
|
|
; CHECK-PWR8-NEXT: sub r3, r3, r5
|
|
; CHECK-PWR8-NEXT: mtvsrd v9, r8
|
|
; CHECK-PWR8-NEXT: ld r15, -136(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: ld r14, -144(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR8-NEXT: mtvsrd v1, r4
|
|
; CHECK-PWR8-NEXT: mtvsrd v7, r3
|
|
; CHECK-PWR8-NEXT: vmrghb v8, v9, v8
|
|
; CHECK-PWR8-NEXT: vmrghb v1, v7, v1
|
|
; CHECK-PWR8-NEXT: vmrglh v2, v4, v2
|
|
; CHECK-PWR8-NEXT: vmrglh v3, v3, v0
|
|
; CHECK-PWR8-NEXT: vmrglh v4, v6, v5
|
|
; CHECK-PWR8-NEXT: vmrglh v5, v1, v8
|
|
; CHECK-PWR8-NEXT: xxmrglw vs0, v3, v2
|
|
; CHECK-PWR8-NEXT: xxmrglw vs1, v5, v4
|
|
; CHECK-PWR8-NEXT: xxmrgld v2, vs0, vs1
|
|
; CHECK-PWR8-NEXT: blr
|
|
;
|
|
; CHECK-PWR7-LABEL: sub_absv_8_ext:
|
|
; CHECK-PWR7: # %bb.0: # %entry
|
|
; CHECK-PWR7-NEXT: stdu r1, -416(r1)
|
|
; CHECK-PWR7-NEXT: .cfi_def_cfa_offset 416
|
|
; CHECK-PWR7-NEXT: .cfi_offset r23, -72
|
|
; CHECK-PWR7-NEXT: .cfi_offset r24, -64
|
|
; CHECK-PWR7-NEXT: .cfi_offset r25, -56
|
|
; CHECK-PWR7-NEXT: .cfi_offset r26, -48
|
|
; CHECK-PWR7-NEXT: .cfi_offset r27, -40
|
|
; CHECK-PWR7-NEXT: .cfi_offset r28, -32
|
|
; CHECK-PWR7-NEXT: .cfi_offset r29, -24
|
|
; CHECK-PWR7-NEXT: .cfi_offset r30, -16
|
|
; CHECK-PWR7-NEXT: addi r3, r1, 304
|
|
; CHECK-PWR7-NEXT: std r23, 344(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR7-NEXT: addi r4, r1, 320
|
|
; CHECK-PWR7-NEXT: std r24, 352(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR7-NEXT: std r25, 360(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR7-NEXT: std r26, 368(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR7-NEXT: std r27, 376(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR7-NEXT: std r28, 384(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR7-NEXT: std r29, 392(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR7-NEXT: std r30, 400(r1) # 8-byte Folded Spill
|
|
; CHECK-PWR7-NEXT: stxvw4x v2, 0, r3
|
|
; CHECK-PWR7-NEXT: lbz r3, 304(r1)
|
|
; CHECK-PWR7-NEXT: stxvw4x v3, 0, r4
|
|
; CHECK-PWR7-NEXT: lbz r9, 307(r1)
|
|
; CHECK-PWR7-NEXT: lbz r10, 323(r1)
|
|
; CHECK-PWR7-NEXT: lbz r11, 308(r1)
|
|
; CHECK-PWR7-NEXT: lbz r12, 324(r1)
|
|
; CHECK-PWR7-NEXT: lbz r0, 309(r1)
|
|
; CHECK-PWR7-NEXT: lbz r30, 325(r1)
|
|
; CHECK-PWR7-NEXT: sub r9, r9, r10
|
|
; CHECK-PWR7-NEXT: lbz r29, 310(r1)
|
|
; CHECK-PWR7-NEXT: lbz r28, 326(r1)
|
|
; CHECK-PWR7-NEXT: sub r11, r11, r12
|
|
; CHECK-PWR7-NEXT: lbz r27, 311(r1)
|
|
; CHECK-PWR7-NEXT: lbz r26, 327(r1)
|
|
; CHECK-PWR7-NEXT: sub r0, r0, r30
|
|
; CHECK-PWR7-NEXT: lbz r25, 312(r1)
|
|
; CHECK-PWR7-NEXT: lbz r24, 328(r1)
|
|
; CHECK-PWR7-NEXT: sub r29, r29, r28
|
|
; CHECK-PWR7-NEXT: lbz r10, 315(r1)
|
|
; CHECK-PWR7-NEXT: lbz r12, 331(r1)
|
|
; CHECK-PWR7-NEXT: sub r27, r27, r26
|
|
; CHECK-PWR7-NEXT: lbz r30, 316(r1)
|
|
; CHECK-PWR7-NEXT: lbz r28, 332(r1)
|
|
; CHECK-PWR7-NEXT: sub r25, r25, r24
|
|
; CHECK-PWR7-NEXT: lbz r4, 320(r1)
|
|
; CHECK-PWR7-NEXT: lbz r5, 305(r1)
|
|
; CHECK-PWR7-NEXT: sub r10, r10, r12
|
|
; CHECK-PWR7-NEXT: lbz r6, 321(r1)
|
|
; CHECK-PWR7-NEXT: lbz r26, 317(r1)
|
|
; CHECK-PWR7-NEXT: sub r30, r30, r28
|
|
; CHECK-PWR7-NEXT: lbz r24, 333(r1)
|
|
; CHECK-PWR7-NEXT: lbz r12, 319(r1)
|
|
; CHECK-PWR7-NEXT: sub r3, r3, r4
|
|
; CHECK-PWR7-NEXT: lbz r28, 335(r1)
|
|
; CHECK-PWR7-NEXT: lbz r7, 306(r1)
|
|
; CHECK-PWR7-NEXT: sub r5, r5, r6
|
|
; CHECK-PWR7-NEXT: lbz r8, 322(r1)
|
|
; CHECK-PWR7-NEXT: sub r26, r26, r24
|
|
; CHECK-PWR7-NEXT: srawi r24, r5, 31
|
|
; CHECK-PWR7-NEXT: lbz r23, 313(r1)
|
|
; CHECK-PWR7-NEXT: sub r12, r12, r28
|
|
; CHECK-PWR7-NEXT: srawi r28, r3, 31
|
|
; CHECK-PWR7-NEXT: xor r5, r5, r24
|
|
; CHECK-PWR7-NEXT: lbz r4, 329(r1)
|
|
; CHECK-PWR7-NEXT: sub r7, r7, r8
|
|
; CHECK-PWR7-NEXT: xor r3, r3, r28
|
|
; CHECK-PWR7-NEXT: lbz r6, 314(r1)
|
|
; CHECK-PWR7-NEXT: lbz r8, 330(r1)
|
|
; CHECK-PWR7-NEXT: sub r3, r3, r28
|
|
; CHECK-PWR7-NEXT: srawi r28, r7, 31
|
|
; CHECK-PWR7-NEXT: sub r5, r5, r24
|
|
; CHECK-PWR7-NEXT: srawi r24, r9, 31
|
|
; CHECK-PWR7-NEXT: xor r7, r7, r28
|
|
; CHECK-PWR7-NEXT: xor r9, r9, r24
|
|
; CHECK-PWR7-NEXT: sub r7, r7, r28
|
|
; CHECK-PWR7-NEXT: srawi r28, r11, 31
|
|
; CHECK-PWR7-NEXT: sub r9, r9, r24
|
|
; CHECK-PWR7-NEXT: srawi r24, r0, 31
|
|
; CHECK-PWR7-NEXT: xor r11, r11, r28
|
|
; CHECK-PWR7-NEXT: xor r0, r0, r24
|
|
; CHECK-PWR7-NEXT: sub r11, r11, r28
|
|
; CHECK-PWR7-NEXT: srawi r28, r29, 31
|
|
; CHECK-PWR7-NEXT: sub r0, r0, r24
|
|
; CHECK-PWR7-NEXT: srawi r24, r27, 31
|
|
; CHECK-PWR7-NEXT: sub r4, r23, r4
|
|
; CHECK-PWR7-NEXT: xor r29, r29, r28
|
|
; CHECK-PWR7-NEXT: lbz r23, 318(r1)
|
|
; CHECK-PWR7-NEXT: xor r27, r27, r24
|
|
; CHECK-PWR7-NEXT: sub r29, r29, r28
|
|
; CHECK-PWR7-NEXT: srawi r28, r25, 31
|
|
; CHECK-PWR7-NEXT: sub r27, r27, r24
|
|
; CHECK-PWR7-NEXT: srawi r24, r4, 31
|
|
; CHECK-PWR7-NEXT: sub r6, r6, r8
|
|
; CHECK-PWR7-NEXT: xor r25, r25, r28
|
|
; CHECK-PWR7-NEXT: lbz r8, 334(r1)
|
|
; CHECK-PWR7-NEXT: xor r4, r4, r24
|
|
; CHECK-PWR7-NEXT: sub r28, r25, r28
|
|
; CHECK-PWR7-NEXT: srawi r25, r6, 31
|
|
; CHECK-PWR7-NEXT: sub r4, r4, r24
|
|
; CHECK-PWR7-NEXT: srawi r24, r10, 31
|
|
; CHECK-PWR7-NEXT: xor r6, r6, r25
|
|
; CHECK-PWR7-NEXT: xor r10, r10, r24
|
|
; CHECK-PWR7-NEXT: sub r6, r6, r25
|
|
; CHECK-PWR7-NEXT: srawi r25, r30, 31
|
|
; CHECK-PWR7-NEXT: sub r10, r10, r24
|
|
; CHECK-PWR7-NEXT: srawi r24, r26, 31
|
|
; CHECK-PWR7-NEXT: sub r8, r23, r8
|
|
; CHECK-PWR7-NEXT: xor r30, r30, r25
|
|
; CHECK-PWR7-NEXT: ld r23, 344(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR7-NEXT: xor r26, r26, r24
|
|
; CHECK-PWR7-NEXT: sub r30, r30, r25
|
|
; CHECK-PWR7-NEXT: srawi r25, r12, 31
|
|
; CHECK-PWR7-NEXT: sub r26, r26, r24
|
|
; CHECK-PWR7-NEXT: srawi r24, r8, 31
|
|
; CHECK-PWR7-NEXT: xor r12, r12, r25
|
|
; CHECK-PWR7-NEXT: xor r8, r8, r24
|
|
; CHECK-PWR7-NEXT: sub r12, r12, r25
|
|
; CHECK-PWR7-NEXT: addi r25, r1, 272
|
|
; CHECK-PWR7-NEXT: sub r8, r8, r24
|
|
; CHECK-PWR7-NEXT: stb r12, 288(r1)
|
|
; CHECK-PWR7-NEXT: addi r12, r1, 288
|
|
; CHECK-PWR7-NEXT: stb r8, 272(r1)
|
|
; CHECK-PWR7-NEXT: stb r26, 256(r1)
|
|
; CHECK-PWR7-NEXT: stb r30, 240(r1)
|
|
; CHECK-PWR7-NEXT: stb r10, 224(r1)
|
|
; CHECK-PWR7-NEXT: stb r6, 208(r1)
|
|
; CHECK-PWR7-NEXT: stb r4, 192(r1)
|
|
; CHECK-PWR7-NEXT: stb r28, 176(r1)
|
|
; CHECK-PWR7-NEXT: stb r27, 160(r1)
|
|
; CHECK-PWR7-NEXT: stb r29, 144(r1)
|
|
; CHECK-PWR7-NEXT: stb r0, 128(r1)
|
|
; CHECK-PWR7-NEXT: stb r11, 112(r1)
|
|
; CHECK-PWR7-NEXT: stb r9, 96(r1)
|
|
; CHECK-PWR7-NEXT: stb r7, 80(r1)
|
|
; CHECK-PWR7-NEXT: stb r5, 64(r1)
|
|
; CHECK-PWR7-NEXT: stb r3, 48(r1)
|
|
; CHECK-PWR7-NEXT: addi r8, r1, 256
|
|
; CHECK-PWR7-NEXT: addi r26, r1, 240
|
|
; CHECK-PWR7-NEXT: lxvw4x v2, 0, r12
|
|
; CHECK-PWR7-NEXT: lxvw4x v3, 0, r25
|
|
; CHECK-PWR7-NEXT: addi r10, r1, 224
|
|
; CHECK-PWR7-NEXT: addi r30, r1, 208
|
|
; CHECK-PWR7-NEXT: addi r3, r1, 192
|
|
; CHECK-PWR7-NEXT: addi r4, r1, 176
|
|
; CHECK-PWR7-NEXT: addi r5, r1, 160
|
|
; CHECK-PWR7-NEXT: addi r6, r1, 144
|
|
; CHECK-PWR7-NEXT: lxvw4x v4, 0, r8
|
|
; CHECK-PWR7-NEXT: lxvw4x v5, 0, r26
|
|
; CHECK-PWR7-NEXT: addi r7, r1, 128
|
|
; CHECK-PWR7-NEXT: addi r8, r1, 112
|
|
; CHECK-PWR7-NEXT: lxvw4x v0, 0, r10
|
|
; CHECK-PWR7-NEXT: lxvw4x v1, 0, r30
|
|
; CHECK-PWR7-NEXT: vmrghb v2, v3, v2
|
|
; CHECK-PWR7-NEXT: addi r9, r1, 96
|
|
; CHECK-PWR7-NEXT: lxvw4x v6, 0, r3
|
|
; CHECK-PWR7-NEXT: lxvw4x v7, 0, r4
|
|
; CHECK-PWR7-NEXT: addi r3, r1, 80
|
|
; CHECK-PWR7-NEXT: addi r4, r1, 64
|
|
; CHECK-PWR7-NEXT: lxvw4x v3, 0, r5
|
|
; CHECK-PWR7-NEXT: lxvw4x v8, 0, r6
|
|
; CHECK-PWR7-NEXT: addi r5, r1, 48
|
|
; CHECK-PWR7-NEXT: vmrghb v4, v5, v4
|
|
; CHECK-PWR7-NEXT: lxvw4x v5, 0, r7
|
|
; CHECK-PWR7-NEXT: lxvw4x v9, 0, r8
|
|
; CHECK-PWR7-NEXT: vmrghb v0, v1, v0
|
|
; CHECK-PWR7-NEXT: lxvw4x v1, 0, r9
|
|
; CHECK-PWR7-NEXT: lxvw4x v10, 0, r3
|
|
; CHECK-PWR7-NEXT: vmrghb v6, v7, v6
|
|
; CHECK-PWR7-NEXT: lxvw4x v7, 0, r4
|
|
; CHECK-PWR7-NEXT: vmrghb v3, v8, v3
|
|
; CHECK-PWR7-NEXT: lxvw4x v8, 0, r5
|
|
; CHECK-PWR7-NEXT: vmrghb v5, v9, v5
|
|
; CHECK-PWR7-NEXT: ld r30, 400(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR7-NEXT: ld r29, 392(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR7-NEXT: vmrghb v1, v10, v1
|
|
; CHECK-PWR7-NEXT: ld r28, 384(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR7-NEXT: ld r27, 376(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR7-NEXT: vmrghb v7, v8, v7
|
|
; CHECK-PWR7-NEXT: ld r26, 368(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR7-NEXT: ld r25, 360(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR7-NEXT: vmrghh v2, v4, v2
|
|
; CHECK-PWR7-NEXT: ld r24, 352(r1) # 8-byte Folded Reload
|
|
; CHECK-PWR7-NEXT: vmrghh v4, v6, v0
|
|
; CHECK-PWR7-NEXT: vmrghh v3, v5, v3
|
|
; CHECK-PWR7-NEXT: vmrghh v5, v7, v1
|
|
; CHECK-PWR7-NEXT: xxmrghw vs0, v4, v2
|
|
; CHECK-PWR7-NEXT: xxmrghw vs1, v5, v3
|
|
; CHECK-PWR7-NEXT: xxmrghd v2, vs1, vs0
|
|
; CHECK-PWR7-NEXT: addi r1, r1, 416
|
|
; CHECK-PWR7-NEXT: blr
|
|
entry:
|
|
%vecext = extractelement <16 x i8> %a, i32 0
|
|
%conv = zext i8 %vecext to i32
|
|
%vecext1 = extractelement <16 x i8> %b, i32 0
|
|
%conv2 = zext i8 %vecext1 to i32
|
|
%sub = sub nsw i32 %conv, %conv2
|
|
%ispos = icmp sgt i32 %sub, -1
|
|
%neg = sub nsw i32 0, %sub
|
|
%0 = select i1 %ispos, i32 %sub, i32 %neg
|
|
%conv3 = trunc i32 %0 to i8
|
|
%vecins = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, i8 %conv3, i32 0
|
|
%vecext4 = extractelement <16 x i8> %a, i32 1
|
|
%conv5 = zext i8 %vecext4 to i32
|
|
%vecext6 = extractelement <16 x i8> %b, i32 1
|
|
%conv7 = zext i8 %vecext6 to i32
|
|
%sub8 = sub nsw i32 %conv5, %conv7
|
|
%ispos171 = icmp sgt i32 %sub8, -1
|
|
%neg172 = sub nsw i32 0, %sub8
|
|
%1 = select i1 %ispos171, i32 %sub8, i32 %neg172
|
|
%conv10 = trunc i32 %1 to i8
|
|
%vecins11 = insertelement <16 x i8> %vecins, i8 %conv10, i32 1
|
|
%vecext12 = extractelement <16 x i8> %a, i32 2
|
|
%conv13 = zext i8 %vecext12 to i32
|
|
%vecext14 = extractelement <16 x i8> %b, i32 2
|
|
%conv15 = zext i8 %vecext14 to i32
|
|
%sub16 = sub nsw i32 %conv13, %conv15
|
|
%ispos173 = icmp sgt i32 %sub16, -1
|
|
%neg174 = sub nsw i32 0, %sub16
|
|
%2 = select i1 %ispos173, i32 %sub16, i32 %neg174
|
|
%conv18 = trunc i32 %2 to i8
|
|
%vecins19 = insertelement <16 x i8> %vecins11, i8 %conv18, i32 2
|
|
%vecext20 = extractelement <16 x i8> %a, i32 3
|
|
%conv21 = zext i8 %vecext20 to i32
|
|
%vecext22 = extractelement <16 x i8> %b, i32 3
|
|
%conv23 = zext i8 %vecext22 to i32
|
|
%sub24 = sub nsw i32 %conv21, %conv23
|
|
%ispos175 = icmp sgt i32 %sub24, -1
|
|
%neg176 = sub nsw i32 0, %sub24
|
|
%3 = select i1 %ispos175, i32 %sub24, i32 %neg176
|
|
%conv26 = trunc i32 %3 to i8
|
|
%vecins27 = insertelement <16 x i8> %vecins19, i8 %conv26, i32 3
|
|
%vecext28 = extractelement <16 x i8> %a, i32 4
|
|
%conv29 = zext i8 %vecext28 to i32
|
|
%vecext30 = extractelement <16 x i8> %b, i32 4
|
|
%conv31 = zext i8 %vecext30 to i32
|
|
%sub32 = sub nsw i32 %conv29, %conv31
|
|
%ispos177 = icmp sgt i32 %sub32, -1
|
|
%neg178 = sub nsw i32 0, %sub32
|
|
%4 = select i1 %ispos177, i32 %sub32, i32 %neg178
|
|
%conv34 = trunc i32 %4 to i8
|
|
%vecins35 = insertelement <16 x i8> %vecins27, i8 %conv34, i32 4
|
|
%vecext36 = extractelement <16 x i8> %a, i32 5
|
|
%conv37 = zext i8 %vecext36 to i32
|
|
%vecext38 = extractelement <16 x i8> %b, i32 5
|
|
%conv39 = zext i8 %vecext38 to i32
|
|
%sub40 = sub nsw i32 %conv37, %conv39
|
|
%ispos179 = icmp sgt i32 %sub40, -1
|
|
%neg180 = sub nsw i32 0, %sub40
|
|
%5 = select i1 %ispos179, i32 %sub40, i32 %neg180
|
|
%conv42 = trunc i32 %5 to i8
|
|
%vecins43 = insertelement <16 x i8> %vecins35, i8 %conv42, i32 5
|
|
%vecext44 = extractelement <16 x i8> %a, i32 6
|
|
%conv45 = zext i8 %vecext44 to i32
|
|
%vecext46 = extractelement <16 x i8> %b, i32 6
|
|
%conv47 = zext i8 %vecext46 to i32
|
|
%sub48 = sub nsw i32 %conv45, %conv47
|
|
%ispos181 = icmp sgt i32 %sub48, -1
|
|
%neg182 = sub nsw i32 0, %sub48
|
|
%6 = select i1 %ispos181, i32 %sub48, i32 %neg182
|
|
%conv50 = trunc i32 %6 to i8
|
|
%vecins51 = insertelement <16 x i8> %vecins43, i8 %conv50, i32 6
|
|
%vecext52 = extractelement <16 x i8> %a, i32 7
|
|
%conv53 = zext i8 %vecext52 to i32
|
|
%vecext54 = extractelement <16 x i8> %b, i32 7
|
|
%conv55 = zext i8 %vecext54 to i32
|
|
%sub56 = sub nsw i32 %conv53, %conv55
|
|
%ispos183 = icmp sgt i32 %sub56, -1
|
|
%neg184 = sub nsw i32 0, %sub56
|
|
%7 = select i1 %ispos183, i32 %sub56, i32 %neg184
|
|
%conv58 = trunc i32 %7 to i8
|
|
%vecins59 = insertelement <16 x i8> %vecins51, i8 %conv58, i32 7
|
|
%vecext60 = extractelement <16 x i8> %a, i32 8
|
|
%conv61 = zext i8 %vecext60 to i32
|
|
%vecext62 = extractelement <16 x i8> %b, i32 8
|
|
%conv63 = zext i8 %vecext62 to i32
|
|
%sub64 = sub nsw i32 %conv61, %conv63
|
|
%ispos185 = icmp sgt i32 %sub64, -1
|
|
%neg186 = sub nsw i32 0, %sub64
|
|
%8 = select i1 %ispos185, i32 %sub64, i32 %neg186
|
|
%conv66 = trunc i32 %8 to i8
|
|
%vecins67 = insertelement <16 x i8> %vecins59, i8 %conv66, i32 8
|
|
%vecext68 = extractelement <16 x i8> %a, i32 9
|
|
%conv69 = zext i8 %vecext68 to i32
|
|
%vecext70 = extractelement <16 x i8> %b, i32 9
|
|
%conv71 = zext i8 %vecext70 to i32
|
|
%sub72 = sub nsw i32 %conv69, %conv71
|
|
%ispos187 = icmp sgt i32 %sub72, -1
|
|
%neg188 = sub nsw i32 0, %sub72
|
|
%9 = select i1 %ispos187, i32 %sub72, i32 %neg188
|
|
%conv74 = trunc i32 %9 to i8
|
|
%vecins75 = insertelement <16 x i8> %vecins67, i8 %conv74, i32 9
|
|
%vecext76 = extractelement <16 x i8> %a, i32 10
|
|
%conv77 = zext i8 %vecext76 to i32
|
|
%vecext78 = extractelement <16 x i8> %b, i32 10
|
|
%conv79 = zext i8 %vecext78 to i32
|
|
%sub80 = sub nsw i32 %conv77, %conv79
|
|
%ispos189 = icmp sgt i32 %sub80, -1
|
|
%neg190 = sub nsw i32 0, %sub80
|
|
%10 = select i1 %ispos189, i32 %sub80, i32 %neg190
|
|
%conv82 = trunc i32 %10 to i8
|
|
%vecins83 = insertelement <16 x i8> %vecins75, i8 %conv82, i32 10
|
|
%vecext84 = extractelement <16 x i8> %a, i32 11
|
|
%conv85 = zext i8 %vecext84 to i32
|
|
%vecext86 = extractelement <16 x i8> %b, i32 11
|
|
%conv87 = zext i8 %vecext86 to i32
|
|
%sub88 = sub nsw i32 %conv85, %conv87
|
|
%ispos191 = icmp sgt i32 %sub88, -1
|
|
%neg192 = sub nsw i32 0, %sub88
|
|
%11 = select i1 %ispos191, i32 %sub88, i32 %neg192
|
|
%conv90 = trunc i32 %11 to i8
|
|
%vecins91 = insertelement <16 x i8> %vecins83, i8 %conv90, i32 11
|
|
%vecext92 = extractelement <16 x i8> %a, i32 12
|
|
%conv93 = zext i8 %vecext92 to i32
|
|
%vecext94 = extractelement <16 x i8> %b, i32 12
|
|
%conv95 = zext i8 %vecext94 to i32
|
|
%sub96 = sub nsw i32 %conv93, %conv95
|
|
%ispos193 = icmp sgt i32 %sub96, -1
|
|
%neg194 = sub nsw i32 0, %sub96
|
|
%12 = select i1 %ispos193, i32 %sub96, i32 %neg194
|
|
%conv98 = trunc i32 %12 to i8
|
|
%vecins99 = insertelement <16 x i8> %vecins91, i8 %conv98, i32 12
|
|
%vecext100 = extractelement <16 x i8> %a, i32 13
|
|
%conv101 = zext i8 %vecext100 to i32
|
|
%vecext102 = extractelement <16 x i8> %b, i32 13
|
|
%conv103 = zext i8 %vecext102 to i32
|
|
%sub104 = sub nsw i32 %conv101, %conv103
|
|
%ispos195 = icmp sgt i32 %sub104, -1
|
|
%neg196 = sub nsw i32 0, %sub104
|
|
%13 = select i1 %ispos195, i32 %sub104, i32 %neg196
|
|
%conv106 = trunc i32 %13 to i8
|
|
%vecins107 = insertelement <16 x i8> %vecins99, i8 %conv106, i32 13
|
|
%vecext108 = extractelement <16 x i8> %a, i32 14
|
|
%conv109 = zext i8 %vecext108 to i32
|
|
%vecext110 = extractelement <16 x i8> %b, i32 14
|
|
%conv111 = zext i8 %vecext110 to i32
|
|
%sub112 = sub nsw i32 %conv109, %conv111
|
|
%ispos197 = icmp sgt i32 %sub112, -1
|
|
%neg198 = sub nsw i32 0, %sub112
|
|
%14 = select i1 %ispos197, i32 %sub112, i32 %neg198
|
|
%conv114 = trunc i32 %14 to i8
|
|
%vecins115 = insertelement <16 x i8> %vecins107, i8 %conv114, i32 14
|
|
%vecext116 = extractelement <16 x i8> %a, i32 15
|
|
%conv117 = zext i8 %vecext116 to i32
|
|
%vecext118 = extractelement <16 x i8> %b, i32 15
|
|
%conv119 = zext i8 %vecext118 to i32
|
|
%sub120 = sub nsw i32 %conv117, %conv119
|
|
%ispos199 = icmp sgt i32 %sub120, -1
|
|
%neg200 = sub nsw i32 0, %sub120
|
|
%15 = select i1 %ispos199, i32 %sub120, i32 %neg200
|
|
%conv122 = trunc i32 %15 to i8
|
|
%vecins123 = insertelement <16 x i8> %vecins115, i8 %conv122, i32 15
|
|
ret <16 x i8> %vecins123
|
|
}
|
|
|
|
define <4 x i32> @sub_absv_vec_32(<4 x i32> %a, <4 x i32> %b) local_unnamed_addr {
|
|
; CHECK-PWR9-LABEL: sub_absv_vec_32:
|
|
; CHECK-PWR9: # %bb.0: # %entry
|
|
; CHECK-PWR9-NEXT: xvnegsp v3, v3
|
|
; CHECK-PWR9-NEXT: xvnegsp v2, v2
|
|
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: sub_absv_vec_32:
|
|
; CHECK-PWR78: # %bb.0: # %entry
|
|
; CHECK-PWR78-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR78-NEXT: vsubuwm v2, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuwm v3, v4, v2
|
|
; CHECK-PWR78-NEXT: vmaxsw v2, v2, v3
|
|
; CHECK-PWR78-NEXT: blr
|
|
entry:
|
|
%sub = sub <4 x i32> %a, %b
|
|
%sub.i = sub <4 x i32> zeroinitializer, %sub
|
|
%0 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %sub, <4 x i32> %sub.i)
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define <8 x i16> @sub_absv_vec_16(<8 x i16> %a, <8 x i16> %b) local_unnamed_addr {
|
|
; CHECK-PWR9-LABEL: sub_absv_vec_16:
|
|
; CHECK-PWR9: # %bb.0: # %entry
|
|
; CHECK-PWR9-NEXT: vsubuhm v2, v2, v3
|
|
; CHECK-PWR9-NEXT: xxlxor v3, v3, v3
|
|
; CHECK-PWR9-NEXT: vsubuhm v3, v3, v2
|
|
; CHECK-PWR9-NEXT: vmaxsh v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: sub_absv_vec_16:
|
|
; CHECK-PWR78: # %bb.0: # %entry
|
|
; CHECK-PWR78-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR78-NEXT: vsubuhm v2, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuhm v3, v4, v2
|
|
; CHECK-PWR78-NEXT: vmaxsh v2, v2, v3
|
|
; CHECK-PWR78-NEXT: blr
|
|
entry:
|
|
%sub = sub <8 x i16> %a, %b
|
|
%sub.i = sub <8 x i16> zeroinitializer, %sub
|
|
%0 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %sub, <8 x i16> %sub.i)
|
|
ret <8 x i16> %0
|
|
}
|
|
|
|
define <16 x i8> @sub_absv_vec_8(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr {
|
|
; CHECK-PWR9-LABEL: sub_absv_vec_8:
|
|
; CHECK-PWR9: # %bb.0: # %entry
|
|
; CHECK-PWR9-NEXT: vsububm v2, v2, v3
|
|
; CHECK-PWR9-NEXT: xxlxor v3, v3, v3
|
|
; CHECK-PWR9-NEXT: vsububm v3, v3, v2
|
|
; CHECK-PWR9-NEXT: vmaxsb v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: sub_absv_vec_8:
|
|
; CHECK-PWR78: # %bb.0: # %entry
|
|
; CHECK-PWR78-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR78-NEXT: vsububm v2, v2, v3
|
|
; CHECK-PWR78-NEXT: vsububm v3, v4, v2
|
|
; CHECK-PWR78-NEXT: vmaxsb v2, v2, v3
|
|
; CHECK-PWR78-NEXT: blr
|
|
entry:
|
|
%sub = sub <16 x i8> %a, %b
|
|
%sub.i = sub <16 x i8> zeroinitializer, %sub
|
|
%0 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %sub, <16 x i8> %sub.i)
|
|
ret <16 x i8> %0
|
|
}
|
|
|
|
define <4 x i32> @zext_sub_absd32(<4 x i16>, <4 x i16>) local_unnamed_addr {
|
|
; CHECK-PWR9-LE-LABEL: zext_sub_absd32:
|
|
; CHECK-PWR9-LE: # %bb.0:
|
|
; CHECK-PWR9-LE-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR9-LE-NEXT: vmrglh v2, v4, v2
|
|
; CHECK-PWR9-LE-NEXT: vmrglh v3, v4, v3
|
|
; CHECK-PWR9-LE-NEXT: vabsduw v2, v2, v3
|
|
; CHECK-PWR9-LE-NEXT: blr
|
|
;
|
|
; CHECK-PWR9-BE-LABEL: zext_sub_absd32:
|
|
; CHECK-PWR9-BE: # %bb.0:
|
|
; CHECK-PWR9-BE-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR9-BE-NEXT: vmrghh v2, v4, v2
|
|
; CHECK-PWR9-BE-NEXT: vmrghh v3, v4, v3
|
|
; CHECK-PWR9-BE-NEXT: vabsduw v2, v2, v3
|
|
; CHECK-PWR9-BE-NEXT: blr
|
|
;
|
|
; CHECK-PWR8-LABEL: zext_sub_absd32:
|
|
; CHECK-PWR8: # %bb.0:
|
|
; CHECK-PWR8-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR8-NEXT: vmrglh v2, v4, v2
|
|
; CHECK-PWR8-NEXT: vmrglh v3, v4, v3
|
|
; CHECK-PWR8-NEXT: vsubuwm v2, v2, v3
|
|
; CHECK-PWR8-NEXT: vsubuwm v3, v4, v2
|
|
; CHECK-PWR8-NEXT: vmaxsw v2, v2, v3
|
|
; CHECK-PWR8-NEXT: blr
|
|
;
|
|
; CHECK-PWR7-LABEL: zext_sub_absd32:
|
|
; CHECK-PWR7: # %bb.0:
|
|
; CHECK-PWR7-NEXT: addis r3, r2, .LCPI13_0@toc@ha
|
|
; CHECK-PWR7-NEXT: xxlxor v5, v5, v5
|
|
; CHECK-PWR7-NEXT: addi r3, r3, .LCPI13_0@toc@l
|
|
; CHECK-PWR7-NEXT: lxvw4x v4, 0, r3
|
|
; CHECK-PWR7-NEXT: vperm v2, v5, v2, v4
|
|
; CHECK-PWR7-NEXT: vperm v3, v5, v3, v4
|
|
; CHECK-PWR7-NEXT: vsubuwm v2, v2, v3
|
|
; CHECK-PWR7-NEXT: vsubuwm v3, v5, v2
|
|
; CHECK-PWR7-NEXT: vmaxsw v2, v2, v3
|
|
; CHECK-PWR7-NEXT: blr
|
|
%3 = zext <4 x i16> %0 to <4 x i32>
|
|
%4 = zext <4 x i16> %1 to <4 x i32>
|
|
%5 = sub <4 x i32> %3, %4
|
|
%6 = sub <4 x i32> zeroinitializer, %5
|
|
%7 = tail call <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32> %5, <4 x i32> %6)
|
|
ret <4 x i32> %7
|
|
}
|
|
|
|
define <8 x i16> @zext_sub_absd16(<8 x i8>, <8 x i8>) local_unnamed_addr {
|
|
; CHECK-PWR9-LE-LABEL: zext_sub_absd16:
|
|
; CHECK-PWR9-LE: # %bb.0:
|
|
; CHECK-PWR9-LE-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR9-LE-NEXT: vmrglb v2, v4, v2
|
|
; CHECK-PWR9-LE-NEXT: vmrglb v3, v4, v3
|
|
; CHECK-PWR9-LE-NEXT: vabsduh v2, v2, v3
|
|
; CHECK-PWR9-LE-NEXT: blr
|
|
;
|
|
; CHECK-PWR9-BE-LABEL: zext_sub_absd16:
|
|
; CHECK-PWR9-BE: # %bb.0:
|
|
; CHECK-PWR9-BE-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR9-BE-NEXT: vmrghb v2, v4, v2
|
|
; CHECK-PWR9-BE-NEXT: vmrghb v3, v4, v3
|
|
; CHECK-PWR9-BE-NEXT: vabsduh v2, v2, v3
|
|
; CHECK-PWR9-BE-NEXT: blr
|
|
;
|
|
; CHECK-PWR8-LABEL: zext_sub_absd16:
|
|
; CHECK-PWR8: # %bb.0:
|
|
; CHECK-PWR8-NEXT: xxlxor v4, v4, v4
|
|
; CHECK-PWR8-NEXT: vmrglb v2, v4, v2
|
|
; CHECK-PWR8-NEXT: vmrglb v3, v4, v3
|
|
; CHECK-PWR8-NEXT: vsubuhm v2, v2, v3
|
|
; CHECK-PWR8-NEXT: vsubuhm v3, v4, v2
|
|
; CHECK-PWR8-NEXT: vmaxsh v2, v2, v3
|
|
; CHECK-PWR8-NEXT: blr
|
|
;
|
|
; CHECK-PWR7-LABEL: zext_sub_absd16:
|
|
; CHECK-PWR7: # %bb.0:
|
|
; CHECK-PWR7-NEXT: addis r3, r2, .LCPI14_0@toc@ha
|
|
; CHECK-PWR7-NEXT: xxlxor v5, v5, v5
|
|
; CHECK-PWR7-NEXT: addi r3, r3, .LCPI14_0@toc@l
|
|
; CHECK-PWR7-NEXT: lxvw4x v4, 0, r3
|
|
; CHECK-PWR7-NEXT: vperm v2, v5, v2, v4
|
|
; CHECK-PWR7-NEXT: vperm v3, v5, v3, v4
|
|
; CHECK-PWR7-NEXT: vsubuhm v2, v2, v3
|
|
; CHECK-PWR7-NEXT: vsubuhm v3, v5, v2
|
|
; CHECK-PWR7-NEXT: vmaxsh v2, v2, v3
|
|
; CHECK-PWR7-NEXT: blr
|
|
%3 = zext <8 x i8> %0 to <8 x i16>
|
|
%4 = zext <8 x i8> %1 to <8 x i16>
|
|
%5 = sub <8 x i16> %3, %4
|
|
%6 = sub <8 x i16> zeroinitializer, %5
|
|
%7 = tail call <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16> %5, <8 x i16> %6)
|
|
ret <8 x i16> %7
|
|
}
|
|
|
|
define <16 x i8> @zext_sub_absd8(<16 x i4>, <16 x i4>) local_unnamed_addr {
|
|
; CHECK-PWR9-LABEL: zext_sub_absd8:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: xxspltib vs0, 15
|
|
; CHECK-PWR9-NEXT: xxland v2, v2, vs0
|
|
; CHECK-PWR9-NEXT: xxland v3, v3, vs0
|
|
; CHECK-PWR9-NEXT: vabsdub v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: zext_sub_absd8:
|
|
; CHECK-PWR78: # %bb.0:
|
|
; CHECK-PWR78-NEXT: vspltisb v4, 15
|
|
; CHECK-PWR78-NEXT: xxland v2, v2, v4
|
|
; CHECK-PWR78-NEXT: xxland v3, v3, v4
|
|
; CHECK-PWR78-NEXT: vsububm v2, v2, v3
|
|
; CHECK-PWR78-NEXT: xxlxor v3, v3, v3
|
|
; CHECK-PWR78-NEXT: vsububm v3, v3, v2
|
|
; CHECK-PWR78-NEXT: vmaxsb v2, v2, v3
|
|
; CHECK-PWR78-NEXT: blr
|
|
%3 = zext <16 x i4> %0 to <16 x i8>
|
|
%4 = zext <16 x i4> %1 to <16 x i8>
|
|
%5 = sub <16 x i8> %3, %4
|
|
%6 = sub <16 x i8> zeroinitializer, %5
|
|
%7 = tail call <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8> %5, <16 x i8> %6)
|
|
ret <16 x i8> %7
|
|
}
|
|
|
|
; To verify vabsdu* exploitation for ucmp + sub + select sequence
|
|
|
|
define <4 x i32> @absd_int32_ugt(<4 x i32>, <4 x i32>) {
|
|
; CHECK-PWR9-LABEL: absd_int32_ugt:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: absd_int32_ugt:
|
|
; CHECK-PWR78: # %bb.0:
|
|
; CHECK-PWR78-NEXT: vcmpgtuw v4, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
|
|
; CHECK-PWR78-NEXT: xxsel v2, v2, v5, v4
|
|
; CHECK-PWR78-NEXT: blr
|
|
%3 = icmp ugt <4 x i32> %0, %1
|
|
%4 = sub <4 x i32> %0, %1
|
|
%5 = sub <4 x i32> %1, %0
|
|
%6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
|
|
ret <4 x i32> %6
|
|
}
|
|
|
|
define <4 x i32> @absd_int32_uge(<4 x i32>, <4 x i32>) {
|
|
; CHECK-PWR9-LABEL: absd_int32_uge:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: absd_int32_uge:
|
|
; CHECK-PWR78: # %bb.0:
|
|
; CHECK-PWR78-NEXT: vcmpgtuw v4, v3, v2
|
|
; CHECK-PWR78-NEXT: xxlnor vs0, v4, v4
|
|
; CHECK-PWR78-NEXT: vsubuwm v4, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
|
|
; CHECK-PWR78-NEXT: xxsel v2, v2, v4, vs0
|
|
; CHECK-PWR78-NEXT: blr
|
|
%3 = icmp uge <4 x i32> %0, %1
|
|
%4 = sub <4 x i32> %0, %1
|
|
%5 = sub <4 x i32> %1, %0
|
|
%6 = select <4 x i1> %3, <4 x i32> %4, <4 x i32> %5
|
|
ret <4 x i32> %6
|
|
}
|
|
|
|
define <4 x i32> @absd_int32_ult(<4 x i32>, <4 x i32>) {
|
|
; CHECK-PWR9-LABEL: absd_int32_ult:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: absd_int32_ult:
|
|
; CHECK-PWR78: # %bb.0:
|
|
; CHECK-PWR78-NEXT: vcmpgtuw v4, v3, v2
|
|
; CHECK-PWR78-NEXT: vsubuwm v5, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
|
|
; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
|
|
; CHECK-PWR78-NEXT: blr
|
|
%3 = icmp ult <4 x i32> %0, %1
|
|
%4 = sub <4 x i32> %0, %1
|
|
%5 = sub <4 x i32> %1, %0
|
|
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
|
|
ret <4 x i32> %6
|
|
}
|
|
|
|
define <4 x i32> @absd_int32_ule(<4 x i32>, <4 x i32>) {
|
|
; CHECK-PWR9-LABEL: absd_int32_ule:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: vabsduw v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: absd_int32_ule:
|
|
; CHECK-PWR78: # %bb.0:
|
|
; CHECK-PWR78-NEXT: vcmpgtuw v4, v2, v3
|
|
; CHECK-PWR78-NEXT: xxlnor vs0, v4, v4
|
|
; CHECK-PWR78-NEXT: vsubuwm v4, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuwm v2, v3, v2
|
|
; CHECK-PWR78-NEXT: xxsel v2, v4, v2, vs0
|
|
; CHECK-PWR78-NEXT: blr
|
|
%3 = icmp ule <4 x i32> %0, %1
|
|
%4 = sub <4 x i32> %0, %1
|
|
%5 = sub <4 x i32> %1, %0
|
|
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
|
|
ret <4 x i32> %6
|
|
}
|
|
|
|
define <8 x i16> @absd_int16_ugt(<8 x i16>, <8 x i16>) {
|
|
; CHECK-PWR9-LABEL: absd_int16_ugt:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: vabsduh v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: absd_int16_ugt:
|
|
; CHECK-PWR78: # %bb.0:
|
|
; CHECK-PWR78-NEXT: vcmpgtuh v4, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuhm v5, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuhm v2, v3, v2
|
|
; CHECK-PWR78-NEXT: xxsel v2, v2, v5, v4
|
|
; CHECK-PWR78-NEXT: blr
|
|
%3 = icmp ugt <8 x i16> %0, %1
|
|
%4 = sub <8 x i16> %0, %1
|
|
%5 = sub <8 x i16> %1, %0
|
|
%6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
|
|
ret <8 x i16> %6
|
|
}
|
|
|
|
define <8 x i16> @absd_int16_uge(<8 x i16>, <8 x i16>) {
|
|
; CHECK-PWR9-LABEL: absd_int16_uge:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: vabsduh v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: absd_int16_uge:
|
|
; CHECK-PWR78: # %bb.0:
|
|
; CHECK-PWR78-NEXT: vcmpgtuh v4, v3, v2
|
|
; CHECK-PWR78-NEXT: vsubuhm v5, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuhm v2, v3, v2
|
|
; CHECK-PWR78-NEXT: xxlnor v4, v4, v4
|
|
; CHECK-PWR78-NEXT: xxsel v2, v2, v5, v4
|
|
; CHECK-PWR78-NEXT: blr
|
|
%3 = icmp uge <8 x i16> %0, %1
|
|
%4 = sub <8 x i16> %0, %1
|
|
%5 = sub <8 x i16> %1, %0
|
|
%6 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> %5
|
|
ret <8 x i16> %6
|
|
}
|
|
|
|
define <8 x i16> @absd_int16_ult(<8 x i16>, <8 x i16>) {
|
|
; CHECK-PWR9-LABEL: absd_int16_ult:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: vabsduh v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: absd_int16_ult:
|
|
; CHECK-PWR78: # %bb.0:
|
|
; CHECK-PWR78-NEXT: vcmpgtuh v4, v3, v2
|
|
; CHECK-PWR78-NEXT: vsubuhm v5, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuhm v2, v3, v2
|
|
; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
|
|
; CHECK-PWR78-NEXT: blr
|
|
%3 = icmp ult <8 x i16> %0, %1
|
|
%4 = sub <8 x i16> %0, %1
|
|
%5 = sub <8 x i16> %1, %0
|
|
%6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
|
|
ret <8 x i16> %6
|
|
}
|
|
|
|
define <8 x i16> @absd_int16_ule(<8 x i16>, <8 x i16>) {
|
|
; CHECK-PWR9-LABEL: absd_int16_ule:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: vabsduh v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: absd_int16_ule:
|
|
; CHECK-PWR78: # %bb.0:
|
|
; CHECK-PWR78-NEXT: vcmpgtuh v4, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuhm v5, v2, v3
|
|
; CHECK-PWR78-NEXT: vsubuhm v2, v3, v2
|
|
; CHECK-PWR78-NEXT: xxlnor v4, v4, v4
|
|
; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
|
|
; CHECK-PWR78-NEXT: blr
|
|
%3 = icmp ule <8 x i16> %0, %1
|
|
%4 = sub <8 x i16> %0, %1
|
|
%5 = sub <8 x i16> %1, %0
|
|
%6 = select <8 x i1> %3, <8 x i16> %5, <8 x i16> %4
|
|
ret <8 x i16> %6
|
|
}
|
|
|
|
define <16 x i8> @absd_int8_ugt(<16 x i8>, <16 x i8>) {
|
|
; CHECK-PWR9-LABEL: absd_int8_ugt:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: vabsdub v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: absd_int8_ugt:
|
|
; CHECK-PWR78: # %bb.0:
|
|
; CHECK-PWR78-NEXT: vcmpgtub v4, v2, v3
|
|
; CHECK-PWR78-NEXT: vsububm v5, v2, v3
|
|
; CHECK-PWR78-NEXT: vsububm v2, v3, v2
|
|
; CHECK-PWR78-NEXT: xxsel v2, v2, v5, v4
|
|
; CHECK-PWR78-NEXT: blr
|
|
%3 = icmp ugt <16 x i8> %0, %1
|
|
%4 = sub <16 x i8> %0, %1
|
|
%5 = sub <16 x i8> %1, %0
|
|
%6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
|
|
ret <16 x i8> %6
|
|
}
|
|
|
|
define <16 x i8> @absd_int8_uge(<16 x i8>, <16 x i8>) {
|
|
; CHECK-PWR9-LABEL: absd_int8_uge:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: vabsdub v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: absd_int8_uge:
|
|
; CHECK-PWR78: # %bb.0:
|
|
; CHECK-PWR78-NEXT: vcmpgtub v4, v3, v2
|
|
; CHECK-PWR78-NEXT: vsububm v5, v2, v3
|
|
; CHECK-PWR78-NEXT: vsububm v2, v3, v2
|
|
; CHECK-PWR78-NEXT: xxlnor v4, v4, v4
|
|
; CHECK-PWR78-NEXT: xxsel v2, v2, v5, v4
|
|
; CHECK-PWR78-NEXT: blr
|
|
%3 = icmp uge <16 x i8> %0, %1
|
|
%4 = sub <16 x i8> %0, %1
|
|
%5 = sub <16 x i8> %1, %0
|
|
%6 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> %5
|
|
ret <16 x i8> %6
|
|
}
|
|
|
|
define <16 x i8> @absd_int8_ult(<16 x i8>, <16 x i8>) {
|
|
; CHECK-PWR9-LABEL: absd_int8_ult:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: vabsdub v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: absd_int8_ult:
|
|
; CHECK-PWR78: # %bb.0:
|
|
; CHECK-PWR78-NEXT: vcmpgtub v4, v3, v2
|
|
; CHECK-PWR78-NEXT: vsububm v5, v2, v3
|
|
; CHECK-PWR78-NEXT: vsububm v2, v3, v2
|
|
; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
|
|
; CHECK-PWR78-NEXT: blr
|
|
%3 = icmp ult <16 x i8> %0, %1
|
|
%4 = sub <16 x i8> %0, %1
|
|
%5 = sub <16 x i8> %1, %0
|
|
%6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
|
|
ret <16 x i8> %6
|
|
}
|
|
|
|
define <16 x i8> @absd_int8_ule(<16 x i8>, <16 x i8>) {
|
|
; CHECK-PWR9-LABEL: absd_int8_ule:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: vabsdub v2, v2, v3
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR78-LABEL: absd_int8_ule:
|
|
; CHECK-PWR78: # %bb.0:
|
|
; CHECK-PWR78-NEXT: vcmpgtub v4, v2, v3
|
|
; CHECK-PWR78-NEXT: vsububm v5, v2, v3
|
|
; CHECK-PWR78-NEXT: vsububm v2, v3, v2
|
|
; CHECK-PWR78-NEXT: xxlnor v4, v4, v4
|
|
; CHECK-PWR78-NEXT: xxsel v2, v5, v2, v4
|
|
; CHECK-PWR78-NEXT: blr
|
|
%3 = icmp ule <16 x i8> %0, %1
|
|
%4 = sub <16 x i8> %0, %1
|
|
%5 = sub <16 x i8> %1, %0
|
|
%6 = select <16 x i1> %3, <16 x i8> %5, <16 x i8> %4
|
|
ret <16 x i8> %6
|
|
}
|
|
|
|
; some cases we are unable to optimize
|
|
; check whether goes beyond the scope
|
|
define <4 x i32> @absd_int32_ugt_opp(<4 x i32>, <4 x i32>) {
|
|
; CHECK-LABEL: absd_int32_ugt_opp:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: vcmpgtuw v4, v2, v3
|
|
; CHECK-NEXT: vsubuwm v5, v2, v3
|
|
; CHECK-NEXT: vsubuwm v2, v3, v2
|
|
; CHECK-NEXT: xxsel v2, v5, v2, v4
|
|
; CHECK-NEXT: blr
|
|
%3 = icmp ugt <4 x i32> %0, %1
|
|
%4 = sub <4 x i32> %0, %1
|
|
%5 = sub <4 x i32> %1, %0
|
|
%6 = select <4 x i1> %3, <4 x i32> %5, <4 x i32> %4
|
|
ret <4 x i32> %6
|
|
}
|
|
|
|
define <2 x i64> @absd_int64_ugt(<2 x i64>, <2 x i64>) {
|
|
; CHECK-PWR9-LABEL: absd_int64_ugt:
|
|
; CHECK-PWR9: # %bb.0:
|
|
; CHECK-PWR9-NEXT: vcmpgtud v4, v2, v3
|
|
; CHECK-PWR9-NEXT: vsubudm v5, v2, v3
|
|
; CHECK-PWR9-NEXT: vsubudm v2, v3, v2
|
|
; CHECK-PWR9-NEXT: xxsel v2, v2, v5, v4
|
|
; CHECK-PWR9-NEXT: blr
|
|
;
|
|
; CHECK-PWR8-LABEL: absd_int64_ugt:
|
|
; CHECK-PWR8: # %bb.0:
|
|
; CHECK-PWR8-NEXT: vcmpgtud v4, v2, v3
|
|
; CHECK-PWR8-NEXT: vsubudm v5, v2, v3
|
|
; CHECK-PWR8-NEXT: vsubudm v2, v3, v2
|
|
; CHECK-PWR8-NEXT: xxsel v2, v2, v5, v4
|
|
; CHECK-PWR8-NEXT: blr
|
|
;
|
|
; CHECK-PWR7-LABEL: absd_int64_ugt:
|
|
; CHECK-PWR7: # %bb.0:
|
|
; CHECK-PWR7-NEXT: addi r3, r1, -64
|
|
; CHECK-PWR7-NEXT: addi r4, r1, -80
|
|
; CHECK-PWR7-NEXT: li r5, 0
|
|
; CHECK-PWR7-NEXT: li r6, -1
|
|
; CHECK-PWR7-NEXT: stxvd2x v3, 0, r3
|
|
; CHECK-PWR7-NEXT: stxvd2x v2, 0, r4
|
|
; CHECK-PWR7-NEXT: ld r3, -56(r1)
|
|
; CHECK-PWR7-NEXT: ld r4, -72(r1)
|
|
; CHECK-PWR7-NEXT: ld r8, -80(r1)
|
|
; CHECK-PWR7-NEXT: cmpld r4, r3
|
|
; CHECK-PWR7-NEXT: sub r9, r4, r3
|
|
; CHECK-PWR7-NEXT: iselgt r7, r6, r5
|
|
; CHECK-PWR7-NEXT: sub r3, r3, r4
|
|
; CHECK-PWR7-NEXT: std r7, -8(r1)
|
|
; CHECK-PWR7-NEXT: ld r7, -64(r1)
|
|
; CHECK-PWR7-NEXT: cmpld r8, r7
|
|
; CHECK-PWR7-NEXT: iselgt r4, r6, r5
|
|
; CHECK-PWR7-NEXT: addi r5, r1, -16
|
|
; CHECK-PWR7-NEXT: std r4, -16(r1)
|
|
; CHECK-PWR7-NEXT: sub r4, r8, r7
|
|
; CHECK-PWR7-NEXT: lxvd2x v2, 0, r5
|
|
; CHECK-PWR7-NEXT: std r9, -40(r1)
|
|
; CHECK-PWR7-NEXT: addi r5, r1, -48
|
|
; CHECK-PWR7-NEXT: std r4, -48(r1)
|
|
; CHECK-PWR7-NEXT: sub r4, r7, r8
|
|
; CHECK-PWR7-NEXT: lxvd2x v3, 0, r5
|
|
; CHECK-PWR7-NEXT: std r3, -24(r1)
|
|
; CHECK-PWR7-NEXT: addi r3, r1, -32
|
|
; CHECK-PWR7-NEXT: std r4, -32(r1)
|
|
; CHECK-PWR7-NEXT: lxvd2x v4, 0, r3
|
|
; CHECK-PWR7-NEXT: xxsel v2, v4, v3, v2
|
|
; CHECK-PWR7-NEXT: blr
|
|
%3 = icmp ugt <2 x i64> %0, %1
|
|
%4 = sub <2 x i64> %0, %1
|
|
%5 = sub <2 x i64> %1, %0
|
|
%6 = select <2 x i1> %3, <2 x i64> %4, <2 x i64> %5
|
|
ret <2 x i64> %6
|
|
}
|
|
|
|
declare <4 x i32> @llvm.ppc.altivec.vmaxsw(<4 x i32>, <4 x i32>)
|
|
|
|
declare <8 x i16> @llvm.ppc.altivec.vmaxsh(<8 x i16>, <8 x i16>)
|
|
|
|
declare <16 x i8> @llvm.ppc.altivec.vmaxsb(<16 x i8>, <16 x i8>)
|
|
|