The combine step for shufflevector will sometimes replace undef in the mask with a defined value. This can cause an infinite loop in some cases as another combine will then put the undef back in the mask. This patch fixes the issue so that undefs are not replaced when doing a combine. Reviewed By: ZarkoCA, amyk, quinnp, saghir Differential Revision: https://reviews.llvm.org/D127439
97 lines
3.4 KiB
LLVM
97 lines
3.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr10 -verify-machineinstrs \
|
|
; RUN: < %s | FileCheck %s --check-prefix=AIX
|
|
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr10 -verify-machineinstrs \
|
|
; RUN: < %s | FileCheck %s --check-prefix=AIX-32
|
|
; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \
|
|
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
|
|
; RUN: | FileCheck %s --check-prefix=LE
|
|
; RUN: llc -verify-machineinstrs -mtriple powerpcle-unknown-linux-gnu \
|
|
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
|
|
; RUN: | FileCheck %s --check-prefix=LE-32
|
|
; RUN: llc -verify-machineinstrs -mtriple powerpc64-unknown-linux-gnu \
|
|
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
|
|
; RUN: | FileCheck %s --check-prefix=BE
|
|
; RUN: llc -verify-machineinstrs -mtriple powerpc-unknown-linux-gnu \
|
|
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
|
|
; RUN: | FileCheck %s --check-prefix=BE-32
|
|
|
|
|
|
; Function Attrs: nounwind
|
|
define dso_local <4 x i16> @shufflevector_combine(<4 x i32> %0) #0 {
|
|
; AIX-LABEL: shufflevector_combine:
|
|
; AIX: # %bb.0: # %newFuncRoot
|
|
; AIX-NEXT: ld 3, L..C0(2) # %const.0
|
|
; AIX-NEXT: xxlxor 36, 36, 36
|
|
; AIX-NEXT: lxv 35, 0(3)
|
|
; AIX-NEXT: li 3, 0
|
|
; AIX-NEXT: vperm 2, 4, 2, 3
|
|
; AIX-NEXT: vinsw 2, 3, 8
|
|
; AIX-NEXT: vpkuwum 2, 2, 2
|
|
; AIX-NEXT: blr
|
|
;
|
|
; AIX-32-LABEL: shufflevector_combine:
|
|
; AIX-32: # %bb.0: # %newFuncRoot
|
|
; AIX-32-NEXT: lwz 3, L..C0(2) # %const.0
|
|
; AIX-32-NEXT: xxlxor 36, 36, 36
|
|
; AIX-32-NEXT: lxv 35, 0(3)
|
|
; AIX-32-NEXT: li 3, 0
|
|
; AIX-32-NEXT: vperm 2, 4, 2, 3
|
|
; AIX-32-NEXT: vinsw 2, 3, 8
|
|
; AIX-32-NEXT: vpkuwum 2, 2, 2
|
|
; AIX-32-NEXT: blr
|
|
;
|
|
; LE-LABEL: shufflevector_combine:
|
|
; LE: # %bb.0: # %newFuncRoot
|
|
; LE-NEXT: plxv v3, .LCPI0_0@PCREL(0), 1
|
|
; LE-NEXT: xxlxor v4, v4, v4
|
|
; LE-NEXT: li r3, 0
|
|
; LE-NEXT: vperm v2, v2, v4, v3
|
|
; LE-NEXT: vinsw v2, r3, 4
|
|
; LE-NEXT: vpkuwum v2, v2, v2
|
|
; LE-NEXT: blr
|
|
;
|
|
; LE-32-LABEL: shufflevector_combine:
|
|
; LE-32: # %bb.0: # %newFuncRoot
|
|
; LE-32-NEXT: li r3, .LCPI0_0@l
|
|
; LE-32-NEXT: lis r4, .LCPI0_0@ha
|
|
; LE-32-NEXT: xxlxor v4, v4, v4
|
|
; LE-32-NEXT: lxvx v3, r4, r3
|
|
; LE-32-NEXT: li r3, 0
|
|
; LE-32-NEXT: vperm v2, v2, v4, v3
|
|
; LE-32-NEXT: vinsw v2, r3, 4
|
|
; LE-32-NEXT: vpkuwum v2, v2, v2
|
|
; LE-32-NEXT: blr
|
|
;
|
|
; BE-LABEL: shufflevector_combine:
|
|
; BE: # %bb.0: # %newFuncRoot
|
|
; BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
|
|
; BE-NEXT: xxlxor v4, v4, v4
|
|
; BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
|
|
; BE-NEXT: lxv v3, 0(r3)
|
|
; BE-NEXT: li r3, 0
|
|
; BE-NEXT: vperm v2, v4, v2, v3
|
|
; BE-NEXT: vinsw v2, r3, 8
|
|
; BE-NEXT: vpkuwum v2, v2, v2
|
|
; BE-NEXT: blr
|
|
;
|
|
; BE-32-LABEL: shufflevector_combine:
|
|
; BE-32: # %bb.0: # %newFuncRoot
|
|
; BE-32-NEXT: li r3, .LCPI0_0@l
|
|
; BE-32-NEXT: lis r4, .LCPI0_0@ha
|
|
; BE-32-NEXT: xxlxor v4, v4, v4
|
|
; BE-32-NEXT: lxvx v3, r4, r3
|
|
; BE-32-NEXT: li r3, 0
|
|
; BE-32-NEXT: vperm v2, v4, v2, v3
|
|
; BE-32-NEXT: vinsw v2, r3, 8
|
|
; BE-32-NEXT: vpkuwum v2, v2, v2
|
|
; BE-32-NEXT: blr
|
|
newFuncRoot:
|
|
%1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %0, <4 x i32> <i32 0, i32 7, i32 undef, i32 6>
|
|
%2 = insertelement <4 x i32> %1, i32 0, i64 2
|
|
%3 = trunc <4 x i32> %2 to <4 x i16>
|
|
ret <4 x i16> %3
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|