Currently targets without LZCNT/TZCNT won't speculate with BSR/BSF instructions in case they have a zero value input, meaning we always insert a test+branch for the zero-input case. This patch proposes we allow speculation if the target has CMOV, and perform a branchless select instead to handle the zero input case. This will predominately help x86-64 targets where we haven't set any particular cpu target. We already always perform BSR/BSF instructions if we were lowering a CTLZ/CTTZ_ZERO_UNDEF instruction.
109 lines
3.3 KiB
LLVM
109 lines
3.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
|
|
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64
|
|
|
|
define i32 @sext_known_nonzero(i16 %xx) {
|
|
; X86-LABEL: sext_known_nonzero:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl $256, %eax # imm = 0x100
|
|
; X86-NEXT: shll %cl, %eax
|
|
; X86-NEXT: cwtl
|
|
; X86-NEXT: bsfl %eax, %ecx
|
|
; X86-NEXT: movl $32, %eax
|
|
; X86-NEXT: cmovnel %ecx, %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: sext_known_nonzero:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl %edi, %ecx
|
|
; X64-NEXT: movl $256, %eax # imm = 0x100
|
|
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
|
; X64-NEXT: shll %cl, %eax
|
|
; X64-NEXT: movswq %ax, %rax
|
|
; X64-NEXT: movabsq $4294967296, %rcx # imm = 0x100000000
|
|
; X64-NEXT: orq %rax, %rcx
|
|
; X64-NEXT: rep bsfq %rcx, %rax
|
|
; X64-NEXT: # kill: def $eax killed $eax killed $rax
|
|
; X64-NEXT: retq
|
|
%x = shl i16 256, %xx
|
|
%z = sext i16 %x to i32
|
|
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
|
|
ret i32 %r
|
|
}
|
|
|
|
define i32 @sext_known_nonzero_nuw(i16 %xx) {
|
|
; X86-LABEL: sext_known_nonzero_nuw:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl $256, %eax # imm = 0x100
|
|
; X86-NEXT: shll %cl, %eax
|
|
; X86-NEXT: cwtl
|
|
; X86-NEXT: rep bsfl %eax, %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: sext_known_nonzero_nuw:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl %edi, %ecx
|
|
; X64-NEXT: movl $256, %eax # imm = 0x100
|
|
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
|
; X64-NEXT: shll %cl, %eax
|
|
; X64-NEXT: cwtl
|
|
; X64-NEXT: rep bsfl %eax, %eax
|
|
; X64-NEXT: retq
|
|
%x = shl nuw i16 256, %xx
|
|
%z = sext i16 %x to i32
|
|
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
|
|
ret i32 %r
|
|
}
|
|
|
|
define i32 @sext_known_nonzero_nsw(i16 %xx) {
|
|
; X86-LABEL: sext_known_nonzero_nsw:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl $256, %eax # imm = 0x100
|
|
; X86-NEXT: shll %cl, %eax
|
|
; X86-NEXT: movzwl %ax, %eax
|
|
; X86-NEXT: rep bsfl %eax, %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: sext_known_nonzero_nsw:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl %edi, %ecx
|
|
; X64-NEXT: movl $256, %eax # imm = 0x100
|
|
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
|
; X64-NEXT: shll %cl, %eax
|
|
; X64-NEXT: movzwl %ax, %eax
|
|
; X64-NEXT: rep bsfl %eax, %eax
|
|
; X64-NEXT: retq
|
|
%x = shl nsw i16 256, %xx
|
|
%z = sext i16 %x to i32
|
|
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
|
|
ret i32 %r
|
|
}
|
|
|
|
define i32 @sext_known_nonzero_nuw_nsw(i16 %xx) {
|
|
; X86-LABEL: sext_known_nonzero_nuw_nsw:
|
|
; X86: # %bb.0:
|
|
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
|
|
; X86-NEXT: movl $256, %eax # imm = 0x100
|
|
; X86-NEXT: shll %cl, %eax
|
|
; X86-NEXT: movzwl %ax, %eax
|
|
; X86-NEXT: rep bsfl %eax, %eax
|
|
; X86-NEXT: retl
|
|
;
|
|
; X64-LABEL: sext_known_nonzero_nuw_nsw:
|
|
; X64: # %bb.0:
|
|
; X64-NEXT: movl %edi, %ecx
|
|
; X64-NEXT: movl $256, %eax # imm = 0x100
|
|
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
|
|
; X64-NEXT: shll %cl, %eax
|
|
; X64-NEXT: movzwl %ax, %eax
|
|
; X64-NEXT: rep bsfl %eax, %eax
|
|
; X64-NEXT: retq
|
|
%x = shl nuw nsw i16 256, %xx
|
|
%z = sext i16 %x to i32
|
|
%r = call i32 @llvm.cttz.i32(i32 %z, i1 false)
|
|
ret i32 %r
|
|
}
|