VSX introduced some permute instructions that are direct replacements for Altivec ones except they can target all the VSX registers. We have added code generation for most of these but somehow missed the low/hi word merges (XXMRG[LH]W). This caused some additional spills on some large computationally intensive code. This patch simply adds the missed patterns.
111 lines
3.4 KiB
LLVM
111 lines
3.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- -mcpu=pwr8 < %s | FileCheck %s --check-prefix=LE
|
|
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck %s --check-prefix=BE
|
|
|
|
define <8 x i16> @pr25080(<8 x i32> %a) {
|
|
; LE-LABEL: pr25080:
|
|
; LE: # %bb.0: # %entry
|
|
; LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
|
; LE-NEXT: xxlxor 37, 37, 37
|
|
; LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
|
; LE-NEXT: lvx 4, 0, 3
|
|
; LE-NEXT: xxland 34, 34, 36
|
|
; LE-NEXT: xxland 35, 35, 36
|
|
; LE-NEXT: vcmpequw 2, 2, 5
|
|
; LE-NEXT: vcmpequw 3, 3, 5
|
|
; LE-NEXT: xxswapd 0, 34
|
|
; LE-NEXT: mfvsrwz 3, 34
|
|
; LE-NEXT: xxsldwi 1, 34, 34, 1
|
|
; LE-NEXT: mfvsrwz 4, 35
|
|
; LE-NEXT: xxsldwi 2, 34, 34, 3
|
|
; LE-NEXT: mtvsrd 36, 3
|
|
; LE-NEXT: mffprwz 3, 0
|
|
; LE-NEXT: xxswapd 0, 35
|
|
; LE-NEXT: mtvsrd 37, 4
|
|
; LE-NEXT: mffprwz 4, 1
|
|
; LE-NEXT: xxsldwi 1, 35, 35, 1
|
|
; LE-NEXT: mtvsrd 34, 3
|
|
; LE-NEXT: mffprwz 3, 2
|
|
; LE-NEXT: mtvsrd 32, 4
|
|
; LE-NEXT: mffprwz 4, 0
|
|
; LE-NEXT: xxsldwi 0, 35, 35, 3
|
|
; LE-NEXT: mtvsrd 33, 3
|
|
; LE-NEXT: mffprwz 3, 1
|
|
; LE-NEXT: mtvsrd 38, 4
|
|
; LE-NEXT: mtvsrd 35, 3
|
|
; LE-NEXT: mffprwz 3, 0
|
|
; LE-NEXT: vmrghh 2, 0, 2
|
|
; LE-NEXT: mtvsrd 32, 3
|
|
; LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
|
|
; LE-NEXT: vmrghh 4, 1, 4
|
|
; LE-NEXT: addi 3, 3, .LCPI0_1@toc@l
|
|
; LE-NEXT: vmrghh 3, 3, 6
|
|
; LE-NEXT: vmrghh 5, 0, 5
|
|
; LE-NEXT: xxmrglw 0, 36, 34
|
|
; LE-NEXT: vspltish 4, 15
|
|
; LE-NEXT: xxmrglw 1, 37, 35
|
|
; LE-NEXT: lvx 3, 0, 3
|
|
; LE-NEXT: xxmrgld 34, 1, 0
|
|
; LE-NEXT: xxlor 34, 34, 35
|
|
; LE-NEXT: vslh 2, 2, 4
|
|
; LE-NEXT: vsrah 2, 2, 4
|
|
; LE-NEXT: blr
|
|
;
|
|
; BE-LABEL: pr25080:
|
|
; BE: # %bb.0: # %entry
|
|
; BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
|
; BE-NEXT: xxlxor 36, 36, 36
|
|
; BE-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
|
; BE-NEXT: lxvw4x 0, 0, 3
|
|
; BE-NEXT: xxland 35, 35, 0
|
|
; BE-NEXT: xxland 34, 34, 0
|
|
; BE-NEXT: vcmpequw 3, 3, 4
|
|
; BE-NEXT: vcmpequw 2, 2, 4
|
|
; BE-NEXT: xxswapd 0, 35
|
|
; BE-NEXT: mfvsrwz 3, 35
|
|
; BE-NEXT: xxsldwi 1, 35, 35, 1
|
|
; BE-NEXT: mfvsrwz 4, 34
|
|
; BE-NEXT: mtvsrwz 36, 3
|
|
; BE-NEXT: xxsldwi 2, 35, 35, 3
|
|
; BE-NEXT: mffprwz 3, 0
|
|
; BE-NEXT: xxswapd 0, 34
|
|
; BE-NEXT: mtvsrwz 35, 4
|
|
; BE-NEXT: mffprwz 4, 1
|
|
; BE-NEXT: xxsldwi 1, 34, 34, 1
|
|
; BE-NEXT: mtvsrwz 37, 3
|
|
; BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
|
|
; BE-NEXT: addi 3, 3, .LCPI0_1@toc@l
|
|
; BE-NEXT: mtvsrwz 32, 4
|
|
; BE-NEXT: mffprwz 4, 0
|
|
; BE-NEXT: lxvw4x 33, 0, 3
|
|
; BE-NEXT: xxsldwi 0, 34, 34, 3
|
|
; BE-NEXT: mffprwz 3, 1
|
|
; BE-NEXT: mffprwz 5, 2
|
|
; BE-NEXT: vperm 2, 0, 5, 1
|
|
; BE-NEXT: mtvsrwz 37, 3
|
|
; BE-NEXT: mffprwz 3, 0
|
|
; BE-NEXT: mtvsrwz 38, 5
|
|
; BE-NEXT: mtvsrwz 39, 4
|
|
; BE-NEXT: mtvsrwz 32, 3
|
|
; BE-NEXT: addis 3, 2, .LCPI0_2@toc@ha
|
|
; BE-NEXT: vperm 4, 6, 4, 1
|
|
; BE-NEXT: addi 3, 3, .LCPI0_2@toc@l
|
|
; BE-NEXT: vperm 5, 5, 7, 1
|
|
; BE-NEXT: vperm 3, 0, 3, 1
|
|
; BE-NEXT: xxmrghw 0, 36, 34
|
|
; BE-NEXT: xxmrghw 1, 35, 37
|
|
; BE-NEXT: vspltish 3, 15
|
|
; BE-NEXT: xxmrghd 34, 1, 0
|
|
; BE-NEXT: lxvw4x 0, 0, 3
|
|
; BE-NEXT: xxlor 34, 34, 0
|
|
; BE-NEXT: vslh 2, 2, 3
|
|
; BE-NEXT: vsrah 2, 2, 3
|
|
; BE-NEXT: blr
|
|
entry:
|
|
%0 = trunc <8 x i32> %a to <8 x i23>
|
|
%1 = icmp eq <8 x i23> %0, zeroinitializer
|
|
%2 = or <8 x i1> %1, <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
|
|
%3 = sext <8 x i1> %2 to <8 x i16>
|
|
ret <8 x i16> %3
|
|
}
|