This patch contains following enhancements to SrcRegMap and DstRegMap:
1 In findOnlyInterestingUse not only check if the Reg is two address usage,
but also check after commutation can it be two address usage.
2 If a physical register is clobbered, remove SrcRegMap entries that are
mapped to it.
3 In processTiedPairs, when create a new COPY instruction, add a SrcRegMap
entry only when the COPY instruction is coalescable. (The COPY src is
killed)
With these enhancements isProfitableToCommute can do better commute decision,
and finally more register copies are removed.
Differential Revision: https://reviews.llvm.org/D108731
273 lines
8.1 KiB
LLVM
273 lines
8.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X86,NOBMI-X86
|
|
; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s --check-prefixes=X86,BMI-X86
|
|
; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=X64,NOBMI-X64
|
|
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+bmi < %s | FileCheck %s --check-prefixes=X64,BMI-X64
|
|
|
|
; Fold
|
|
; ptr - (ptr & (alignment-1))
|
|
; To
|
|
; ptr & (0 - alignment)
|
|
;
|
|
; This needs to be a backend-level fold because only by now pointers
|
|
; are just registers; in middle-end IR this can only be done via @llvm.ptrmask()
|
|
; intrinsic which is not sufficiently widely-spread yet.
|
|
;
|
|
; https://bugs.llvm.org/show_bug.cgi?id=44448
|
|
|
|
; The basic positive tests
|
|
|
|
define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind {
|
|
; X86-LABEL: t0_32:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: xorl %eax, %eax
|
|
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t0_32:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl %esi, %eax
|
|
; X64-NEXT: negl %eax
|
|
; X64-NEXT: andl %edi, %eax
|
|
; X64-NEXT: retq
|
|
%mask = add i32 %alignment, -1
|
|
%bias = and i32 %ptr, %mask
|
|
%r = sub i32 %ptr, %bias
|
|
ret i32 %r
|
|
}
|
|
define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind {
|
|
; X86-LABEL: t1_64:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: xorl %edx, %edx
|
|
; X86-NEXT: xorl %eax, %eax
|
|
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx
|
|
; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
|
|
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t1_64:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movq %rsi, %rax
|
|
; X64-NEXT: negq %rax
|
|
; X64-NEXT: andq %rdi, %rax
|
|
; X64-NEXT: retq
|
|
%mask = add i64 %alignment, -1
|
|
%bias = and i64 %ptr, %mask
|
|
%r = sub i64 %ptr, %bias
|
|
ret i64 %r
|
|
}
|
|
|
|
define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind {
|
|
; X86-LABEL: t2_commutative:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: xorl %eax, %eax
|
|
; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t2_commutative:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl %esi, %eax
|
|
; X64-NEXT: negl %eax
|
|
; X64-NEXT: andl %edi, %eax
|
|
; X64-NEXT: retq
|
|
%mask = add i32 %alignment, -1
|
|
%bias = and i32 %mask, %ptr ; swapped
|
|
%r = sub i32 %ptr, %bias
|
|
ret i32 %r
|
|
}
|
|
|
|
; Extra use tests
|
|
|
|
define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind {
|
|
; X86-LABEL: t3_extrause0:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: leal -1(%eax), %edx
|
|
; X86-NEXT: movl %edx, (%ecx)
|
|
; X86-NEXT: negl %eax
|
|
; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: t3_extrause0:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl %esi, %eax
|
|
; X64-NEXT: leal -1(%rax), %ecx
|
|
; X64-NEXT: movl %ecx, (%rdx)
|
|
; X64-NEXT: negl %eax
|
|
; X64-NEXT: andl %edi, %eax
|
|
; X64-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; X64-NEXT: retq
|
|
%mask = add i32 %alignment, -1
|
|
store i32 %mask, i32* %mask_storage
|
|
%bias = and i32 %ptr, %mask
|
|
%r = sub i32 %ptr, %bias
|
|
ret i32 %r
|
|
}
|
|
define i32 @n4_extrause1(i32 %ptr, i32 %alignment, i32* %bias_storage) nounwind {
|
|
; X86-LABEL: n4_extrause1:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X86-NEXT: decl %edx
|
|
; X86-NEXT: andl %eax, %edx
|
|
; X86-NEXT: movl %edx, (%ecx)
|
|
; X86-NEXT: subl %edx, %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: n4_extrause1:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl %edi, %eax
|
|
; X64-NEXT: decl %esi
|
|
; X64-NEXT: andl %edi, %esi
|
|
; X64-NEXT: movl %esi, (%rdx)
|
|
; X64-NEXT: subl %esi, %eax
|
|
; X64-NEXT: retq
|
|
%mask = add i32 %alignment, -1
|
|
%bias = and i32 %ptr, %mask ; has extra uses, can't fold
|
|
store i32 %bias, i32* %bias_storage
|
|
%r = sub i32 %ptr, %bias
|
|
ret i32 %r
|
|
}
|
|
define i32 @n5_extrause2(i32 %ptr, i32 %alignment, i32* %mask_storage, i32* %bias_storage) nounwind {
|
|
; X86-LABEL: n5_extrause2:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: pushl %esi
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; X86-NEXT: decl %esi
|
|
; X86-NEXT: movl %esi, (%edx)
|
|
; X86-NEXT: andl %eax, %esi
|
|
; X86-NEXT: movl %esi, (%ecx)
|
|
; X86-NEXT: subl %esi, %eax
|
|
; X86-NEXT: popl %esi
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: n5_extrause2:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl %edi, %eax
|
|
; X64-NEXT: decl %esi
|
|
; X64-NEXT: movl %esi, (%rdx)
|
|
; X64-NEXT: andl %edi, %esi
|
|
; X64-NEXT: movl %esi, (%rcx)
|
|
; X64-NEXT: subl %esi, %eax
|
|
; X64-NEXT: retq
|
|
%mask = add i32 %alignment, -1
|
|
store i32 %mask, i32* %mask_storage
|
|
%bias = and i32 %ptr, %mask ; has extra uses, can't fold
|
|
store i32 %bias, i32* %bias_storage
|
|
%r = sub i32 %ptr, %bias
|
|
ret i32 %r
|
|
}
|
|
|
|
; Negative tests
|
|
|
|
define i32 @n6_different_ptrs(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind {
|
|
; X86-LABEL: n6_different_ptrs:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: decl %ecx
|
|
; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: subl %ecx, %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: n6_different_ptrs:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl %edi, %eax
|
|
; X64-NEXT: decl %edx
|
|
; X64-NEXT: andl %esi, %edx
|
|
; X64-NEXT: subl %edx, %eax
|
|
; X64-NEXT: retq
|
|
%mask = add i32 %alignment, -1
|
|
%bias = and i32 %ptr1, %mask ; not %ptr0
|
|
%r = sub i32 %ptr0, %bias ; not %ptr1
|
|
ret i32 %r
|
|
}
|
|
define i32 @n7_different_ptrs_commutative(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind {
|
|
; X86-LABEL: n7_different_ptrs_commutative:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: decl %ecx
|
|
; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: subl %ecx, %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: n7_different_ptrs_commutative:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl %edi, %eax
|
|
; X64-NEXT: decl %edx
|
|
; X64-NEXT: andl %esi, %edx
|
|
; X64-NEXT: subl %edx, %eax
|
|
; X64-NEXT: retq
|
|
%mask = add i32 %alignment, -1
|
|
%bias = and i32 %mask, %ptr1 ; swapped, not %ptr0
|
|
%r = sub i32 %ptr0, %bias ; not %ptr1
|
|
ret i32 %r
|
|
}
|
|
|
|
define i32 @n8_not_lowbit_mask(i32 %ptr, i32 %alignment) nounwind {
|
|
; NOBMI-X86-LABEL: n8_not_lowbit_mask:
|
|
; NOBMI-X86: # %bb.0:
|
|
; NOBMI-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; NOBMI-X86-NEXT: incl %eax
|
|
; NOBMI-X86-NEXT: notl %eax
|
|
; NOBMI-X86-NEXT: andl {{[0-9]+}}(%esp), %eax
|
|
; NOBMI-X86-NEXT: retl
|
|
;
|
|
; BMI-X86-LABEL: n8_not_lowbit_mask:
|
|
; BMI-X86: # %bb.0:
|
|
; BMI-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; BMI-X86-NEXT: incl %eax
|
|
; BMI-X86-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax
|
|
; BMI-X86-NEXT: retl
|
|
;
|
|
; NOBMI-X64-LABEL: n8_not_lowbit_mask:
|
|
; NOBMI-X64: # %bb.0:
|
|
; NOBMI-X64-NEXT: # kill: def $esi killed $esi def $rsi
|
|
; NOBMI-X64-NEXT: leal 1(%rsi), %eax
|
|
; NOBMI-X64-NEXT: notl %eax
|
|
; NOBMI-X64-NEXT: andl %edi, %eax
|
|
; NOBMI-X64-NEXT: retq
|
|
;
|
|
; BMI-X64-LABEL: n8_not_lowbit_mask:
|
|
; BMI-X64: # %bb.0:
|
|
; BMI-X64-NEXT: incl %esi
|
|
; BMI-X64-NEXT: andnl %edi, %esi, %eax
|
|
; BMI-X64-NEXT: retq
|
|
%mask = add i32 %alignment, 1 ; not -1
|
|
%bias = and i32 %ptr, %mask
|
|
%r = sub i32 %ptr, %bias
|
|
ret i32 %r
|
|
}
|
|
|
|
define i32 @n9_sub_is_not_commutative(i32 %ptr, i32 %alignment) nounwind {
|
|
; X86-LABEL: n9_sub_is_not_commutative:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X86-NEXT: decl %eax
|
|
; X86-NEXT: andl %ecx, %eax
|
|
; X86-NEXT: subl %ecx, %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: n9_sub_is_not_commutative:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: # kill: def $esi killed $esi def $rsi
|
|
; X64-NEXT: leal -1(%rsi), %eax
|
|
; X64-NEXT: andl %edi, %eax
|
|
; X64-NEXT: subl %edi, %eax
|
|
; X64-NEXT: retq
|
|
%mask = add i32 %alignment, -1
|
|
%bias = and i32 %ptr, %mask
|
|
%r = sub i32 %bias, %ptr ; wrong order
|
|
ret i32 %r
|
|
}
|