Files
clang-p2996/llvm/test/CodeGen/X86/bool-vector.ll
Simon Pilgrim 78739fdb4d [DAG] Enable combineShiftOfShiftedLogic folds after type legalization
This was disabled to prevent regressions, which appear to be just occurring on AMDGPU (at least in our current lit tests), which I've addressed by adding AMDGPUTargetLowering::isDesirableToCommuteWithShift overrides.

Fixes #57872

Differential Revision: https://reviews.llvm.org/D136042
2022-10-29 12:30:04 +01:00

124 lines
4.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
define i32 @PR15215_bad(<4 x i32> %input) {
; X86-LABEL: PR15215_bad:
; X86: # %bb.0: # %entry
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movb {{[0-9]+}}(%esp), %ah
; X86-NEXT: shlb $3, %ah
; X86-NEXT: andb $1, %cl
; X86-NEXT: shlb $2, %cl
; X86-NEXT: orb %ah, %cl
; X86-NEXT: addb %dl, %dl
; X86-NEXT: andb $1, %al
; X86-NEXT: orb %dl, %al
; X86-NEXT: andb $3, %al
; X86-NEXT: orb %cl, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: andl $15, %eax
; X86-NEXT: retl
;
; X64-LABEL: PR15215_bad:
; X64: # %bb.0: # %entry
; X64-NEXT: shlb $3, %cl
; X64-NEXT: andb $1, %dl
; X64-NEXT: shlb $2, %dl
; X64-NEXT: orb %cl, %dl
; X64-NEXT: addb %sil, %sil
; X64-NEXT: andb $1, %dil
; X64-NEXT: orb %sil, %dil
; X64-NEXT: andb $3, %dil
; X64-NEXT: orb %dl, %dil
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: andl $15, %eax
; X64-NEXT: retq
;
; SSE2-LABEL: PR15215_bad:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: pslld $31, %xmm0
; SSE2-NEXT: movmskps %xmm0, %eax
; SSE2-NEXT: ret{{[l|q]}}
;
; AVX2-LABEL: PR15215_bad:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
; AVX2-NEXT: vmovmskps %xmm0, %eax
; AVX2-NEXT: ret{{[l|q]}}
entry:
%0 = trunc <4 x i32> %input to <4 x i1>
%1 = bitcast <4 x i1> %0 to i4
%2 = zext i4 %1 to i32
ret i32 %2
}
define i32 @PR15215_good(<4 x i32> %input) {
; X86-LABEL: PR15215_good:
; X86: # %bb.0: # %entry
; X86-NEXT: pushl %esi
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: andl $1, %esi
; X86-NEXT: andl $1, %edx
; X86-NEXT: andl $1, %ecx
; X86-NEXT: andl $1, %eax
; X86-NEXT: leal (%esi,%edx,2), %edx
; X86-NEXT: leal (%edx,%ecx,4), %ecx
; X86-NEXT: leal (%ecx,%eax,8), %eax
; X86-NEXT: popl %esi
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-LABEL: PR15215_good:
; X64: # %bb.0: # %entry
; X64-NEXT: # kill: def $ecx killed $ecx def $rcx
; X64-NEXT: # kill: def $edx killed $edx def $rdx
; X64-NEXT: # kill: def $esi killed $esi def $rsi
; X64-NEXT: # kill: def $edi killed $edi def $rdi
; X64-NEXT: andl $1, %edi
; X64-NEXT: andl $1, %esi
; X64-NEXT: andl $1, %edx
; X64-NEXT: andl $1, %ecx
; X64-NEXT: leal (%rdi,%rsi,2), %eax
; X64-NEXT: leal (%rax,%rdx,4), %eax
; X64-NEXT: leal (%rax,%rcx,8), %eax
; X64-NEXT: retq
;
; SSE2-LABEL: PR15215_good:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: pslld $31, %xmm0
; SSE2-NEXT: movmskps %xmm0, %eax
; SSE2-NEXT: ret{{[l|q]}}
;
; AVX2-LABEL: PR15215_good:
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
; AVX2-NEXT: vmovmskps %xmm0, %eax
; AVX2-NEXT: ret{{[l|q]}}
entry:
%0 = trunc <4 x i32> %input to <4 x i1>
%1 = extractelement <4 x i1> %0, i32 0
%e1 = select i1 %1, i32 1, i32 0
%2 = extractelement <4 x i1> %0, i32 1
%e2 = select i1 %2, i32 2, i32 0
%3 = extractelement <4 x i1> %0, i32 2
%e3 = select i1 %3, i32 4, i32 0
%4 = extractelement <4 x i1> %0, i32 3
%e4 = select i1 %4, i32 8, i32 0
%5 = or i32 %e1, %e2
%6 = or i32 %5, %e3
%7 = or i32 %6, %e4
ret i32 %7
}