Files
clang-p2996/llvm/test/CodeGen/X86/shift-i256.ll
Roman Lebedev cc39c3b17f [Codegen][LegalizeIntegerTypes] New legalization strategy for scalar shifts: shift through stack
https://reviews.llvm.org/D140493 is going to teach SROA how to promote allocas
that have variably-indexed loads. That does bring up questions of cost model,
since that requires creating wide shifts.

Indeed, our legalization for them is not optimal.
We either split it into parts, or lower it into a libcall.
But if the shift amount is by a multiple of CHAR_BIT,
we can also legalize it throught stack.

The basic idea is very simple:
1. Get a stack slot 2x the width of the shift type
2. store the value we are shifting into one half of the slot
3. pad the other half of the slot. for logical shifts, with zero, for arithmetic shift with signbit
4. index into the slot (starting from the base half into which we spilled, either upwards or downwards)
5. load
6. split loaded integer

This works for both little-endian and big-endian machines:
https://alive2.llvm.org/ce/z/YNVwd5

And better yet, if the original shift amount was not a multiple of CHAR_BIT,
we can just shift by that remainder afterwards: https://alive2.llvm.org/ce/z/pz5G-K

I think, if we are going perform shift->shift-by-parts expansion more than once,
we should instead go through stack, which is what this patch does.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D140638
2023-01-14 19:12:18 +03:00

197 lines
8.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-- -O0 | FileCheck %s -check-prefixes=CHECK-X64,CHECK-X64-O0
; RUN: llc < %s -mtriple=x86_64-- -O2 | FileCheck %s -check-prefixes=CHECK-X64,CHECK-X64-O2
; CHECK-LABEL: shift1
define void @shift1(i256 %x, i256 %a, ptr nocapture %r) nounwind readnone {
entry:
%0 = ashr i256 %x, %a
store i256 %0, ptr %r
ret void
}
define i256 @shift2(i256 %c) nounwind
; CHECK-LABEL: shift2:
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %ebp
; CHECK-NEXT: pushl %ebx
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: subl $92, %esp
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $1, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl $0, {{[0-9]+}}(%esp)
; CHECK-NEXT: movb %al, %ch
; CHECK-NEXT: andb $7, %ch
; CHECK-NEXT: shrb $3, %al
; CHECK-NEXT: negb %al
; CHECK-NEXT: movsbl %al, %eax
; CHECK-NEXT: movl 68(%esp,%eax), %edx
; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movb %ch, %cl
; CHECK-NEXT: shll %cl, %edx
; CHECK-NEXT: notb %cl
; CHECK-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; CHECK-NEXT: movl 64(%esp,%eax), %edi
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: shrl %edi
; CHECK-NEXT: shrl %cl, %edi
; CHECK-NEXT: orl %edx, %edi
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movl 76(%esp,%eax), %edx
; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movb %ch, %cl
; CHECK-NEXT: shll %cl, %edx
; CHECK-NEXT: movl 72(%esp,%eax), %ebx
; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: shrl %ebx
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
; CHECK-NEXT: shrl %cl, %ebx
; CHECK-NEXT: orl %edx, %ebx
; CHECK-NEXT: movl 84(%esp,%eax), %esi
; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; CHECK-NEXT: movb %ch, %cl
; CHECK-NEXT: shll %cl, %esi
; CHECK-NEXT: movl 80(%esp,%eax), %ebp
; CHECK-NEXT: movl %ebp, %edx
; CHECK-NEXT: shrl %edx
; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
; CHECK-NEXT: shrl %cl, %edx
; CHECK-NEXT: orl %esi, %edx
; CHECK-NEXT: movb %ch, %cl
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; CHECK-NEXT: shldl %cl, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; CHECK-NEXT: shldl %cl, %esi, %ebp
; CHECK-NEXT: movl 60(%esp,%eax), %edi
; CHECK-NEXT: movl 88(%esp,%eax), %esi
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; CHECK-NEXT: shldl %cl, %eax, %esi
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl %esi, 28(%eax)
; CHECK-NEXT: movl %ebp, 20(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; CHECK-NEXT: movl %esi, 12(%eax)
; CHECK-NEXT: movl %edi, %esi
; CHECK-NEXT: shll %cl, %esi
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
; CHECK-NEXT: shldl %cl, %edi, %ebp
; CHECK-NEXT: movl %ebp, 4(%eax)
; CHECK-NEXT: movl %esi, (%eax)
; CHECK-NEXT: movl %edx, 24(%eax)
; CHECK-NEXT: movl %ebx, 16(%eax)
; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; CHECK-NEXT: movl %ecx, 8(%eax)
; CHECK-NEXT: addl $92, %esp
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: popl %ebx
; CHECK-NEXT: popl %ebp
; CHECK-NEXT: retl $4
;
; CHECK-X64-O0-LABEL: shift2:
; CHECK-X64-O0: # %bb.0:
; CHECK-X64-O0-NEXT: movq %rdi, %rax
; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O0-NEXT: movq $1, -{{[0-9]+}}(%rsp)
; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O0-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O0-NEXT: movb %sil, %dl
; CHECK-X64-O0-NEXT: movb %dl, %cl
; CHECK-X64-O0-NEXT: andb $7, %cl
; CHECK-X64-O0-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; CHECK-X64-O0-NEXT: shrb $3, %dl
; CHECK-X64-O0-NEXT: negb %dl
; CHECK-X64-O0-NEXT: movsbq %dl, %rdx
; CHECK-X64-O0-NEXT: movq -16(%rsp,%rdx), %rsi
; CHECK-X64-O0-NEXT: movq %rsi, %r10
; CHECK-X64-O0-NEXT: shlq %cl, %r10
; CHECK-X64-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
; CHECK-X64-O0-NEXT: notb %cl
; CHECK-X64-O0-NEXT: movq -32(%rsp,%rdx), %r9
; CHECK-X64-O0-NEXT: movq -24(%rsp,%rdx), %r8
; CHECK-X64-O0-NEXT: movq %r8, %r11
; CHECK-X64-O0-NEXT: shrq %r11
; CHECK-X64-O0-NEXT: shrq %cl, %r11
; CHECK-X64-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
; CHECK-X64-O0-NEXT: orq %r11, %r10
; CHECK-X64-O0-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-X64-O0-NEXT: movq -8(%rsp,%rdx), %rdx
; CHECK-X64-O0-NEXT: shldq %cl, %rsi, %rdx
; CHECK-X64-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
; CHECK-X64-O0-NEXT: movq %r9, %rsi
; CHECK-X64-O0-NEXT: shlq %cl, %rsi
; CHECK-X64-O0-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload
; CHECK-X64-O0-NEXT: shldq %cl, %r9, %r8
; CHECK-X64-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; CHECK-X64-O0-NEXT: movq %r8, 8(%rdi)
; CHECK-X64-O0-NEXT: movq %rsi, (%rdi)
; CHECK-X64-O0-NEXT: movq %rdx, 24(%rdi)
; CHECK-X64-O0-NEXT: movq %rcx, 16(%rdi)
; CHECK-X64-O0-NEXT: retq
;
; CHECK-X64-O2-LABEL: shift2:
; CHECK-X64-O2: # %bb.0:
; CHECK-X64-O2-NEXT: movq %rdi, %rax
; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O2-NEXT: movq $1, -{{[0-9]+}}(%rsp)
; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O2-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; CHECK-X64-O2-NEXT: movl %esi, %edx
; CHECK-X64-O2-NEXT: andb $7, %dl
; CHECK-X64-O2-NEXT: shrb $3, %sil
; CHECK-X64-O2-NEXT: negb %sil
; CHECK-X64-O2-NEXT: movsbq %sil, %rsi
; CHECK-X64-O2-NEXT: movq -16(%rsp,%rsi), %rdi
; CHECK-X64-O2-NEXT: movq %rdi, %r8
; CHECK-X64-O2-NEXT: movl %edx, %ecx
; CHECK-X64-O2-NEXT: shlq %cl, %r8
; CHECK-X64-O2-NEXT: notb %cl
; CHECK-X64-O2-NEXT: movq -32(%rsp,%rsi), %r9
; CHECK-X64-O2-NEXT: movq -24(%rsp,%rsi), %r10
; CHECK-X64-O2-NEXT: movq %r10, %r11
; CHECK-X64-O2-NEXT: shrq %r11
; CHECK-X64-O2-NEXT: shrq %cl, %r11
; CHECK-X64-O2-NEXT: orq %r8, %r11
; CHECK-X64-O2-NEXT: movq -8(%rsp,%rsi), %rsi
; CHECK-X64-O2-NEXT: movl %edx, %ecx
; CHECK-X64-O2-NEXT: shldq %cl, %rdi, %rsi
; CHECK-X64-O2-NEXT: movq %r9, %rdi
; CHECK-X64-O2-NEXT: shlq %cl, %rdi
; CHECK-X64-O2-NEXT: shldq %cl, %r9, %r10
; CHECK-X64-O2-NEXT: movq %rsi, 24(%rax)
; CHECK-X64-O2-NEXT: movq %r10, 8(%rax)
; CHECK-X64-O2-NEXT: movq %rdi, (%rax)
; CHECK-X64-O2-NEXT: movq %r11, 16(%rax)
; CHECK-X64-O2-NEXT: retq
{
%b = shl i256 1, %c ; %c must not be a constant
; Special case when %c is 0:
ret i256 %b
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-X64: {{.*}}