Mirror of the previous log changes, OpenCL conformance doesn't like interpreting afn as ignore denormal handling but was previously hidden by flag dropping.
54 lines
2.1 KiB
LLVM
54 lines
2.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG %s
|
|
; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck --check-prefix=CM %s
|
|
|
|
define amdgpu_ps void @test(<4 x float> inreg %reg0) {
|
|
; EG-LABEL: test:
|
|
; EG: ; %bb.0:
|
|
; EG-NEXT: ALU 8, @4, KC0[], KC1[]
|
|
; EG-NEXT: EXPORT T0.X___
|
|
; EG-NEXT: CF_END
|
|
; EG-NEXT: PAD
|
|
; EG-NEXT: ALU clause starting at 4:
|
|
; EG-NEXT: SETGT * T0.W, literal.x, -|T0.X|,
|
|
; EG-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
|
|
; EG-NEXT: CNDE * T1.W, PV.W, 0.0, literal.x,
|
|
; EG-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
|
|
; EG-NEXT: ADD T1.W, -|T0.X|, PV.W,
|
|
; EG-NEXT: CNDE * T0.W, T0.W, 1.0, literal.x,
|
|
; EG-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
|
|
; EG-NEXT: EXP_IEEE * T0.X, PV.W,
|
|
; EG-NEXT: MUL_IEEE * T0.X, PS, T0.W,
|
|
;
|
|
; CM-LABEL: test:
|
|
; CM: ; %bb.0:
|
|
; CM-NEXT: ALU 11, @4, KC0[], KC1[]
|
|
; CM-NEXT: EXPORT T0.X___
|
|
; CM-NEXT: CF_END
|
|
; CM-NEXT: PAD
|
|
; CM-NEXT: ALU clause starting at 4:
|
|
; CM-NEXT: SETGT * T0.W, literal.x, -|T0.X|,
|
|
; CM-NEXT: -1023672320(-1.260000e+02), 0(0.000000e+00)
|
|
; CM-NEXT: CNDE * T1.W, PV.W, 0.0, literal.x,
|
|
; CM-NEXT: 1115684864(6.400000e+01), 0(0.000000e+00)
|
|
; CM-NEXT: CNDE T0.Z, T0.W, 1.0, literal.x,
|
|
; CM-NEXT: ADD * T0.W, -|T0.X|, PV.W,
|
|
; CM-NEXT: 528482304(5.421011e-20), 0(0.000000e+00)
|
|
; CM-NEXT: EXP_IEEE T0.X, T0.W,
|
|
; CM-NEXT: EXP_IEEE T0.Y (MASKED), T0.W,
|
|
; CM-NEXT: EXP_IEEE T0.Z (MASKED), T0.W,
|
|
; CM-NEXT: EXP_IEEE * T0.W (MASKED), T0.W,
|
|
; CM-NEXT: MUL_IEEE * T0.X, PV.X, T0.Z,
|
|
%r0 = extractelement <4 x float> %reg0, i32 0
|
|
%r1 = call float @llvm.fabs.f32(float %r0)
|
|
%r2 = fsub float -0.000000e+00, %r1
|
|
%r3 = call afn float @llvm.exp2.f32(float %r2)
|
|
%vec = insertelement <4 x float> undef, float %r3, i32 0
|
|
call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare float @llvm.exp2.f32(float) readnone
|
|
declare float @llvm.fabs.f32(float) readnone
|
|
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
|