Files
clang-p2996/llvm/test/CodeGen/AMDGPU/select64.ll
Alexander Timofeev fbdea5a2e9 [AMDGPU] Always select s_cselect_b32 for uniform 'select' SDNode
This patch contains changes necessary to carry physical condition register (SCC) dependencies through the SDNode scheduler.  It adds the edge in the SDNodeScheduler dependency graph instead of inserting the SCC copy between each definition and use. This approach lets the scheduler place instructions in an optimal way placing the copy only when the dependency cannot be resolved.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D133593
2022-09-15 22:03:56 +02:00

65 lines
2.3 KiB
LLVM

; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefix=GCN %s
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}select0:
; i64 select should be split into two i32 selects, and we shouldn't need
; to use a shfit to extract the hi dword of the input.
; GCN-NOT: s_lshr_b64
; GCN: s_cselect_b32
; GCN: s_cselect_b32
define amdgpu_kernel void @select0(i64 addrspace(1)* %out, i32 %cond, i64 %in) {
entry:
%0 = icmp ugt i32 %cond, 5
%1 = select i1 %0, i64 0, i64 %in
store i64 %1, i64 addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}select_trunc_i64:
; GCN: s_cselect_b32
; GCN-NOT: s_cselect_b32
define amdgpu_kernel void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind {
%cmp = icmp ugt i32 %cond, 5
%sel = select i1 %cmp, i64 0, i64 %in
%trunc = trunc i64 %sel to i32
store i32 %trunc, i32 addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}select_trunc_i64_2:
; GCN: s_cselect_b32
; GCN-NOT: s_cselect_b32
define amdgpu_kernel void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind {
%cmp = icmp ugt i32 %cond, 5
%sel = select i1 %cmp, i64 %a, i64 %b
%trunc = trunc i64 %sel to i32
store i32 %trunc, i32 addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_select_trunc_i64_2:
; GCN: s_cselect_b32
; GCN-NOT: s_cselect_b32
define amdgpu_kernel void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%cmp = icmp ugt i32 %cond, 5
%a = load i64, i64 addrspace(1)* %aptr, align 8
%b = load i64, i64 addrspace(1)* %bptr, align 8
%sel = select i1 %cmp, i64 %a, i64 %b
%trunc = trunc i64 %sel to i32
store i32 %trunc, i32 addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}v_select_i64_split_imm:
; GCN-DAG: s_cselect_b32
; GCN-DAG: s_cselect_b32
; GCN: s_endpgm
define amdgpu_kernel void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
%cmp = icmp ugt i32 %cond, 5
%a = load i64, i64 addrspace(1)* %aptr, align 8
%b = load i64, i64 addrspace(1)* %bptr, align 8
%sel = select i1 %cmp, i64 %a, i64 270582939648 ; 63 << 32
store i64 %sel, i64 addrspace(1)* %out, align 8
ret void
}