If a shl node leaves the upper half bits zero / undemanded, then see if we can profitably perform this with a half-width shl and a free trunc/zext.
Followup to D146121
Reapplied - moved after the ShrinkDemandedOp call; reuse the existing KnownBits result; ensure that we only attempt this if all the upper bits are demanded; 547dc46122 should address the remaining regressions that were noticed in the previous commit.
Differential Revision: https://reviews.llvm.org/D155472
122 lines
4.8 KiB
LLVM
122 lines
4.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
|
|
|
|
define <64 x i4> @pr62653(<64 x i4> %a0) nounwind {
|
|
; CHECK-LABEL: pr62653:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: movq %rdi, %rax
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
|
|
; CHECK-NEXT: andl $15, %edi
|
|
; CHECK-NEXT: shll $4, %edi
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
|
|
; CHECK-NEXT: andl $15, %r10d
|
|
; CHECK-NEXT: orq %rdi, %r10
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
|
|
; CHECK-NEXT: andl $15, %edi
|
|
; CHECK-NEXT: shll $8, %edi
|
|
; CHECK-NEXT: orq %r10, %rdi
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
|
|
; CHECK-NEXT: andl $15, %r10d
|
|
; CHECK-NEXT: shll $12, %r10d
|
|
; CHECK-NEXT: orq %rdi, %r10
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
|
|
; CHECK-NEXT: andl $15, %edi
|
|
; CHECK-NEXT: shll $16, %edi
|
|
; CHECK-NEXT: orq %r10, %rdi
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
|
|
; CHECK-NEXT: andl $15, %r10d
|
|
; CHECK-NEXT: shll $20, %r10d
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
|
|
; CHECK-NEXT: andl $15, %r11d
|
|
; CHECK-NEXT: shll $24, %r11d
|
|
; CHECK-NEXT: orq %r10, %r11
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
|
|
; CHECK-NEXT: shll $28, %r10d
|
|
; CHECK-NEXT: orq %r11, %r10
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
|
|
; CHECK-NEXT: andl $15, %r11d
|
|
; CHECK-NEXT: shlq $32, %r11
|
|
; CHECK-NEXT: orq %r10, %r11
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d
|
|
; CHECK-NEXT: andl $15, %r10d
|
|
; CHECK-NEXT: shlq $36, %r10
|
|
; CHECK-NEXT: orq %r11, %r10
|
|
; CHECK-NEXT: orq %rdi, %r10
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
|
|
; CHECK-NEXT: andl $15, %edi
|
|
; CHECK-NEXT: shlq $40, %rdi
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
|
|
; CHECK-NEXT: andl $15, %r11d
|
|
; CHECK-NEXT: shlq $44, %r11
|
|
; CHECK-NEXT: orq %rdi, %r11
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
|
|
; CHECK-NEXT: andl $15, %edi
|
|
; CHECK-NEXT: shlq $48, %rdi
|
|
; CHECK-NEXT: orq %r11, %rdi
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d
|
|
; CHECK-NEXT: andl $15, %r11d
|
|
; CHECK-NEXT: shlq $52, %r11
|
|
; CHECK-NEXT: orq %rdi, %r11
|
|
; CHECK-NEXT: orq %r10, %r11
|
|
; CHECK-NEXT: movq %r11, 8(%rax)
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edi
|
|
; CHECK-NEXT: andl $15, %edi
|
|
; CHECK-NEXT: shlq $32, %rdi
|
|
; CHECK-NEXT: andl $15, %esi
|
|
; CHECK-NEXT: andl $15, %edx
|
|
; CHECK-NEXT: shll $4, %edx
|
|
; CHECK-NEXT: orl %esi, %edx
|
|
; CHECK-NEXT: andl $15, %ecx
|
|
; CHECK-NEXT: shll $8, %ecx
|
|
; CHECK-NEXT: orl %edx, %ecx
|
|
; CHECK-NEXT: andl $15, %r8d
|
|
; CHECK-NEXT: shll $12, %r8d
|
|
; CHECK-NEXT: orl %ecx, %r8d
|
|
; CHECK-NEXT: andl $15, %r9d
|
|
; CHECK-NEXT: shll $16, %r9d
|
|
; CHECK-NEXT: orl %r8d, %r9d
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
|
; CHECK-NEXT: andl $15, %ecx
|
|
; CHECK-NEXT: shll $20, %ecx
|
|
; CHECK-NEXT: orl %r9d, %ecx
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
|
|
; CHECK-NEXT: andl $15, %edx
|
|
; CHECK-NEXT: shll $24, %edx
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
|
|
; CHECK-NEXT: shll $28, %esi
|
|
; CHECK-NEXT: orl %edx, %esi
|
|
; CHECK-NEXT: orl %ecx, %esi
|
|
; CHECK-NEXT: orq %rdi, %rsi
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
|
; CHECK-NEXT: andl $15, %ecx
|
|
; CHECK-NEXT: shlq $36, %rcx
|
|
; CHECK-NEXT: orq %rsi, %rcx
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
|
|
; CHECK-NEXT: andl $15, %edx
|
|
; CHECK-NEXT: shlq $40, %rdx
|
|
; CHECK-NEXT: orq %rcx, %rdx
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx
|
|
; CHECK-NEXT: andl $15, %ecx
|
|
; CHECK-NEXT: shlq $44, %rcx
|
|
; CHECK-NEXT: orq %rdx, %rcx
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
|
|
; CHECK-NEXT: andl $15, %edx
|
|
; CHECK-NEXT: shlq $48, %rdx
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
|
|
; CHECK-NEXT: andl $15, %esi
|
|
; CHECK-NEXT: shlq $52, %rsi
|
|
; CHECK-NEXT: orq %rdx, %rsi
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx
|
|
; CHECK-NEXT: andl $15, %edx
|
|
; CHECK-NEXT: shlq $56, %rdx
|
|
; CHECK-NEXT: orq %rsi, %rdx
|
|
; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi
|
|
; CHECK-NEXT: shlq $60, %rsi
|
|
; CHECK-NEXT: orq %rdx, %rsi
|
|
; CHECK-NEXT: orq %rcx, %rsi
|
|
; CHECK-NEXT: movq %rsi, (%rax)
|
|
; CHECK-NEXT: retq
|
|
%res = shufflevector <64 x i4> %a0, <64 x i4> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 64, i32 65, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
|
ret <64 x i4> %res
|
|
}
|