Files
clang-p2996/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll
Stefan Pintilie 263f1b2f5d [PowerPC] Fix combine step for shufflevector.
The combine step for shufflevector will sometimes replace undef in the mask
with a defined value. This can cause an infinite loop in some cases as another
combine will then put the undef back in the mask.

This patch fixes the issue so that undefs are not replaced when doing a combine.

Reviewed By: ZarkoCA, amyk, quinnp, saghir

Differential Revision: https://reviews.llvm.org/D127439
2022-06-14 11:31:24 -05:00

97 lines
3.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr10 -verify-machineinstrs \
; RUN: < %s | FileCheck %s --check-prefix=AIX
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr10 -verify-machineinstrs \
; RUN: < %s | FileCheck %s --check-prefix=AIX-32
; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
; RUN: | FileCheck %s --check-prefix=LE
; RUN: llc -verify-machineinstrs -mtriple powerpcle-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
; RUN: | FileCheck %s --check-prefix=LE-32
; RUN: llc -verify-machineinstrs -mtriple powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
; RUN: | FileCheck %s --check-prefix=BE
; RUN: llc -verify-machineinstrs -mtriple powerpc-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \
; RUN: | FileCheck %s --check-prefix=BE-32
; Function Attrs: nounwind
define dso_local <4 x i16> @shufflevector_combine(<4 x i32> %0) #0 {
; AIX-LABEL: shufflevector_combine:
; AIX: # %bb.0: # %newFuncRoot
; AIX-NEXT: ld 3, L..C0(2) # %const.0
; AIX-NEXT: xxlxor 36, 36, 36
; AIX-NEXT: lxv 35, 0(3)
; AIX-NEXT: li 3, 0
; AIX-NEXT: vperm 2, 4, 2, 3
; AIX-NEXT: vinsw 2, 3, 8
; AIX-NEXT: vpkuwum 2, 2, 2
; AIX-NEXT: blr
;
; AIX-32-LABEL: shufflevector_combine:
; AIX-32: # %bb.0: # %newFuncRoot
; AIX-32-NEXT: lwz 3, L..C0(2) # %const.0
; AIX-32-NEXT: xxlxor 36, 36, 36
; AIX-32-NEXT: lxv 35, 0(3)
; AIX-32-NEXT: li 3, 0
; AIX-32-NEXT: vperm 2, 4, 2, 3
; AIX-32-NEXT: vinsw 2, 3, 8
; AIX-32-NEXT: vpkuwum 2, 2, 2
; AIX-32-NEXT: blr
;
; LE-LABEL: shufflevector_combine:
; LE: # %bb.0: # %newFuncRoot
; LE-NEXT: plxv v3, .LCPI0_0@PCREL(0), 1
; LE-NEXT: xxlxor v4, v4, v4
; LE-NEXT: li r3, 0
; LE-NEXT: vperm v2, v2, v4, v3
; LE-NEXT: vinsw v2, r3, 4
; LE-NEXT: vpkuwum v2, v2, v2
; LE-NEXT: blr
;
; LE-32-LABEL: shufflevector_combine:
; LE-32: # %bb.0: # %newFuncRoot
; LE-32-NEXT: li r3, .LCPI0_0@l
; LE-32-NEXT: lis r4, .LCPI0_0@ha
; LE-32-NEXT: xxlxor v4, v4, v4
; LE-32-NEXT: lxvx v3, r4, r3
; LE-32-NEXT: li r3, 0
; LE-32-NEXT: vperm v2, v2, v4, v3
; LE-32-NEXT: vinsw v2, r3, 4
; LE-32-NEXT: vpkuwum v2, v2, v2
; LE-32-NEXT: blr
;
; BE-LABEL: shufflevector_combine:
; BE: # %bb.0: # %newFuncRoot
; BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha
; BE-NEXT: xxlxor v4, v4, v4
; BE-NEXT: addi r3, r3, .LCPI0_0@toc@l
; BE-NEXT: lxv v3, 0(r3)
; BE-NEXT: li r3, 0
; BE-NEXT: vperm v2, v4, v2, v3
; BE-NEXT: vinsw v2, r3, 8
; BE-NEXT: vpkuwum v2, v2, v2
; BE-NEXT: blr
;
; BE-32-LABEL: shufflevector_combine:
; BE-32: # %bb.0: # %newFuncRoot
; BE-32-NEXT: li r3, .LCPI0_0@l
; BE-32-NEXT: lis r4, .LCPI0_0@ha
; BE-32-NEXT: xxlxor v4, v4, v4
; BE-32-NEXT: lxvx v3, r4, r3
; BE-32-NEXT: li r3, 0
; BE-32-NEXT: vperm v2, v4, v2, v3
; BE-32-NEXT: vinsw v2, r3, 8
; BE-32-NEXT: vpkuwum v2, v2, v2
; BE-32-NEXT: blr
newFuncRoot:
%1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %0, <4 x i32> <i32 0, i32 7, i32 undef, i32 6>
%2 = insertelement <4 x i32> %1, i32 0, i64 2
%3 = trunc <4 x i32> %2 to <4 x i16>
ret <4 x i16> %3
}
attributes #0 = { nounwind }