Files
clang-p2996/llvm/test/CodeGen/AMDGPU/selectcc-opt.ll
John Brawn 88ac25b357 [MachineCSE] Allow PRE of instructions that read physical registers
Currently MachineCSE forbids PRE when the instruction reads a physical
register. Relax this so that it's allowed when the value being read is
the same as what would be read in the place the instruction would be
hoisted to.

This is being done in preparation for adding FPCR handling to the
AArch64 backend, in order to prevent it to from worsening the
generated code, but for targets that already have a similar register
it should improve things.

This patch affects code generation in several tests. The new code
looks better except for in Thumb2/LowOverheadLoops/memcall.ll where
we perform PRE but the LowOverheadLoops transformation then undoes
it. Also in AMDGPU/selectcc-opt.ll the CHECK makes things look worse,
but actually the function as a whole is better (as a MOV is PRE'd).

Differential Revision: https://reviews.llvm.org/D136675
2022-11-02 13:53:12 +00:00

82 lines
2.2 KiB
LLVM

; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}test_a:
; EG-NOT: CND
; EG: SET{{[NEQGTL]+}}_DX10
define amdgpu_kernel void @test_a(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp olt float %in, 0.000000e+00
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
%4 = bitcast i32 %3 to float
%5 = bitcast float %4 to i32
%6 = icmp ne i32 %5, 0
br i1 %6, label %IF, label %ENDIF
IF:
%7 = getelementptr i32, i32 addrspace(1)* %out, i32 1
store i32 0, i32 addrspace(1)* %7
br label %ENDIF
ENDIF:
store i32 0, i32 addrspace(1)* %out
ret void
}
; Same as test_a, but the branch labels are swapped to produce the inverse cc
; for the icmp instruction
; EG-LABEL: {{^}}test_b:
; EG: SET{{[GTEQN]+}}_DX10
; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
; EG-NEXT: PRED_
; EG-NEXT: ALU clause starting
define amdgpu_kernel void @test_b(i32 addrspace(1)* %out, float %in) {
entry:
%0 = fcmp olt float %in, 0.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
%4 = bitcast i32 %3 to float
%5 = bitcast float %4 to i32
%6 = icmp ne i32 %5, 0
br i1 %6, label %ENDIF, label %IF
IF:
%7 = getelementptr i32, i32 addrspace(1)* %out, i32 1
store i32 0, i32 addrspace(1)* %7
br label %ENDIF
ENDIF:
store i32 0, i32 addrspace(1)* %out
ret void
}
; Test a CND*_INT instruction with float true/false values
; EG-LABEL: {{^}}test_c:
; EG: CND{{[GTE]+}}_INT
define amdgpu_kernel void @test_c(float addrspace(1)* %out, i32 %in) {
entry:
%0 = icmp sgt i32 %in, 0
%1 = select i1 %0, float 2.0, float 3.0
store float %1, float addrspace(1)* %out
ret void
}
; FUNC-LABEL: {{^}}selectcc_bool:
; SI: s_cmp_lg_u32
; SI: v_cndmask_b32_e64
; SI-NOT: cmp
; SI-NOT: cndmask
define amdgpu_kernel void @selectcc_bool(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%icmp0 = icmp ne i32 %a, %b
%ext = select i1 %icmp0, i32 -1, i32 0
store i32 %ext, i32 addrspace(1)* %out
ret void
}