Currently targets without LZCNT/TZCNT won't speculate with BSR/BSF instructions in case they have a zero value input, meaning we always insert a test+branch for the zero-input case. This patch proposes we allow speculation if the target has CMOV, and perform a branchless select instead to handle the zero input case. This will predominately help x86-64 targets where we haven't set any particular cpu target. We already always perform BSR/BSF instructions if we were lowering a CTLZ/CTTZ_ZERO_UNDEF instruction.
26 lines
907 B
LLVM
26 lines
907 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
|
|
|
|
define void @PR92569(i64 %arg, <8 x i8> %arg1) {
|
|
; CHECK-LABEL: PR92569:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: bsfq %rdi, %rax
|
|
; CHECK-NEXT: movl $64, %ecx
|
|
; CHECK-NEXT: cmovneq %rax, %rcx
|
|
; CHECK-NEXT: shrb $3, %cl
|
|
; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
|
|
; CHECK-NEXT: movzbl %cl, %eax
|
|
; CHECK-NEXT: andl $15, %eax
|
|
; CHECK-NEXT: movzbl -24(%rsp,%rax), %eax
|
|
; CHECK-NEXT: movl %eax, 0
|
|
; CHECK-NEXT: retq
|
|
%cttz = call i64 @llvm.cttz.i64(i64 %arg, i1 false)
|
|
%trunc = trunc i64 %cttz to i8
|
|
%lshr = lshr i8 %trunc, 3
|
|
%extractelement = extractelement <8 x i8> %arg1, i8 %lshr
|
|
%freeze = freeze i8 %extractelement
|
|
%zext = zext i8 %freeze to i32
|
|
store i32 %zext, ptr addrspace(1) null, align 4
|
|
ret void
|
|
}
|