Summary: Currently we convert to shuffles during lowering. This moves it to DAG combine so hopefully we can get it done before type legalization has to extend the condition. I believe in some cases we're creating SHRUNKBLENDs that end up with constant conditions because we see the extended on the condition and think its a dynamic selelect before DAG combine gets a chance to constant fold the extend. We could add combines to turn SHRUNKBLENDs with constant condition back to vselect. But it seemed like it might be better to just send them to shuffles as early as possible so they never get a chance to become SHRUNKBLENDs. This the reason some tests went from blends controlled by a constant pool load to just move. Some of the constant pool entries changed because the sign_extend introduced by type legalization turned undef elements in select condition into 0s. While the select->shuffle used -1 in the shuffle mask. So now the shuffle lowering can do what it wants with them. I'll remove the lowering code as a follow up. We might be able to simplify some of the pre-checks for SHRUNKBLEND as the FIXME there says. Reviewers: spatel, RKSimon, efriedma, zvi, andreadb Reviewed By: spatel Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D43367 llvm-svn: 325417
72 lines
4.3 KiB
LLVM
72 lines
4.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 -O0 | FileCheck %s
|
|
|
|
define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <16 x i64> %arg3, <16 x i64> %arg4) {
|
|
; CHECK-LABEL: pluto:
|
|
; CHECK: # %bb.0: # %bb
|
|
; CHECK-NEXT: pushq %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
|
; CHECK-NEXT: .cfi_offset %rbp, -16
|
|
; CHECK-NEXT: movq %rsp, %rbp
|
|
; CHECK-NEXT: .cfi_def_cfa_register %rbp
|
|
; CHECK-NEXT: andq $-32, %rsp
|
|
; CHECK-NEXT: subq $320, %rsp # imm = 0x140
|
|
; CHECK-NEXT: vmovaps 240(%rbp), %ymm8
|
|
; CHECK-NEXT: vmovaps 208(%rbp), %ymm9
|
|
; CHECK-NEXT: vmovaps 176(%rbp), %ymm10
|
|
; CHECK-NEXT: vmovaps 144(%rbp), %ymm11
|
|
; CHECK-NEXT: vmovaps 112(%rbp), %ymm12
|
|
; CHECK-NEXT: vmovaps 80(%rbp), %ymm13
|
|
; CHECK-NEXT: vmovaps 48(%rbp), %ymm14
|
|
; CHECK-NEXT: vmovaps 16(%rbp), %ymm15
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7]
|
|
; CHECK-NEXT: vxorps %xmm6, %xmm6, %xmm6
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm6[0,1],ymm8[2,3,4,5,6,7]
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1],ymm11[2,3,4,5,6,7]
|
|
; CHECK-NEXT: # kill: def $xmm9 killed $xmm9 killed $ymm9
|
|
; CHECK-NEXT: vmovdqa %xmm9, %xmm11
|
|
; CHECK-NEXT: # kill: def $ymm11 killed $xmm11
|
|
; CHECK-NEXT: vpalignr {{.*#+}} ymm6 = ymm2[8,9,10,11,12,13,14,15],ymm6[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm6[16,17,18,19,20,21,22,23]
|
|
; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,3,2,0]
|
|
; CHECK-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
|
|
; CHECK-NEXT: # implicit-def: $ymm0
|
|
; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5],ymm6[6,7]
|
|
; CHECK-NEXT: vmovaps %xmm2, %xmm9
|
|
; CHECK-NEXT: # implicit-def: $ymm2
|
|
; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm2, %ymm2
|
|
; CHECK-NEXT: vpunpcklqdq {{.*#+}} ymm6 = ymm7[0],ymm8[0],ymm7[2],ymm8[2]
|
|
; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,1,2,3]
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7]
|
|
; CHECK-NEXT: vmovaps %xmm7, %xmm9
|
|
; CHECK-NEXT: vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7]
|
|
; CHECK-NEXT: # implicit-def: $ymm6
|
|
; CHECK-NEXT: vmovaps %xmm9, %xmm6
|
|
; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
|
|
; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3]
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3],ymm11[4,5,6,7]
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm7 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7]
|
|
; CHECK-NEXT: vpermq {{.*#+}} ymm7 = ymm7[2,1,1,3]
|
|
; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5]
|
|
; CHECK-NEXT: vpblendd {{.*#+}} ymm5 = ymm7[0,1,2,3,4,5],ymm5[6,7]
|
|
; CHECK-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill
|
|
; CHECK-NEXT: vmovaps %ymm5, %ymm1
|
|
; CHECK-NEXT: vmovaps %ymm3, {{[0-9]+}}(%rsp) # 32-byte Spill
|
|
; CHECK-NEXT: vmovaps %ymm6, %ymm3
|
|
; CHECK-NEXT: vmovaps %ymm15, {{[0-9]+}}(%rsp) # 32-byte Spill
|
|
; CHECK-NEXT: vmovaps %ymm10, {{[0-9]+}}(%rsp) # 32-byte Spill
|
|
; CHECK-NEXT: vmovaps %ymm13, {{[0-9]+}}(%rsp) # 32-byte Spill
|
|
; CHECK-NEXT: vmovaps %ymm12, {{[0-9]+}}(%rsp) # 32-byte Spill
|
|
; CHECK-NEXT: vmovaps %ymm4, {{[0-9]+}}(%rsp) # 32-byte Spill
|
|
; CHECK-NEXT: vmovaps %ymm14, (%rsp) # 32-byte Spill
|
|
; CHECK-NEXT: movq %rbp, %rsp
|
|
; CHECK-NEXT: popq %rbp
|
|
; CHECK-NEXT: retq
|
|
bb:
|
|
%tmp = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg, <16 x i64> %arg1
|
|
%tmp5 = select <16 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg2, <16 x i64> zeroinitializer
|
|
%tmp6 = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true>, <16 x i64> %arg3, <16 x i64> %tmp5
|
|
%tmp7 = shufflevector <16 x i64> %tmp, <16 x i64> %tmp6, <16 x i32> <i32 11, i32 18, i32 24, i32 9, i32 14, i32 29, i32 29, i32 6, i32 14, i32 28, i32 8, i32 9, i32 22, i32 12, i32 25, i32 6>
|
|
ret <16 x i64> %tmp7
|
|
}
|