This patch contains changes necessary to carry physical condition register (SCC) dependencies through the SDNode scheduler. It adds the edge in the SDNodeScheduler dependency graph instead of inserting the SCC copy between each definition and use. This approach lets the scheduler place instructions in an optimal way placing the copy only when the dependency cannot be resolved. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D133593
46 lines
1.8 KiB
LLVM
46 lines
1.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
|
|
|
|
declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
|
|
declare i32 @llvm.amdgcn.sffbh.i32(i32) nounwind readnone speculatable
|
|
define amdgpu_kernel void @select_constant_cttz(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
|
|
; GCN-LABEL: select_constant_cttz:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_load_dword s2, s[2:3], 0x0
|
|
; GCN-NEXT: s_mov_b32 s3, 0xf000
|
|
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
|
; GCN-NEXT: s_lshr_b32 s4, 1, s2
|
|
; GCN-NEXT: s_cmp_lg_u32 s2, 0
|
|
; GCN-NEXT: s_ff1_i32_b32 s2, s4
|
|
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
|
|
; GCN-NEXT: s_and_b64 s[6:7], s[4:5], exec
|
|
; GCN-NEXT: s_cselect_b32 s2, -1, s2
|
|
; GCN-NEXT: s_flbit_i32 s6, s2
|
|
; GCN-NEXT: s_sub_i32 s8, 31, s6
|
|
; GCN-NEXT: s_cmp_eq_u32 s2, 0
|
|
; GCN-NEXT: s_cselect_b64 s[6:7], -1, 0
|
|
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
|
|
; GCN-NEXT: s_and_b64 s[4:5], s[4:5], exec
|
|
; GCN-NEXT: s_cselect_b32 s4, -1, s8
|
|
; GCN-NEXT: s_mov_b32 s2, -1
|
|
; GCN-NEXT: v_mov_b32_e32 v0, s4
|
|
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
|
|
; GCN-NEXT: s_endpgm
|
|
%v = load i32, i32 addrspace(1)* %arrayidx, align 4
|
|
%sr = lshr i32 1, %v
|
|
%cmp = icmp ne i32 %v, 0
|
|
%cttz = call i32 @llvm.cttz.i32(i32 %sr, i1 true), !range !0
|
|
%sel = select i1 %cmp, i32 -1, i32 %cttz
|
|
%ffbh = call i32 @llvm.amdgcn.sffbh.i32(i32 %sel)
|
|
%sub = sub i32 31, %ffbh
|
|
%cmp2 = icmp eq i32 %sel, 0
|
|
%or = or i1 %cmp, %cmp2
|
|
%sel2 = select i1 %or, i32 -1, i32 %sub
|
|
store i32 %sel2, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
!0 = !{i32 0, i32 33}
|