Files
clang-p2996/llvm/test/CodeGen/DirectX/any.ll
Farzon Lotfi de1a97db39 [DXIL] exp, any, lerp, & rcp Intrinsic Lowering (#84526)
This change implements lowering for #70076, #70100, #70072, & #70102 
`CGBuiltin.cpp` - - simplify `lerp` intrinsic
`IntrinsicsDirectX.td` - simplify `lerp` intrinsic
`SemaChecking.cpp` - remove unnecessary check
`DXILIntrinsicExpansion.*` - add intrinsic to instruction expansion
cases
`DXILOpLowering.cpp` - make sure `DXILIntrinsicExpansion` happens first
`DirectX.h` - changes to support new pass
`DirectXTargetMachine.cpp` - changes to support new pass

Why `any`, and `lerp` as instruction expansion just for DXIL?
- SPIR-V there is an
[OpAny](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpAny)
- SPIR-V has a GLSL lerp extension via
[Fmix](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#FMix)

Why `exp` instruction expansion?
- We have an `exp2` opcode and `exp` reuses that opcode. So instruction
expansion is a convenient way to do preprocessing.
- Further SPIR-V has a GLSL exp extension via
[Exp](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#Exp)
and
[Exp2](https://registry.khronos.org/SPIR-V/specs/1.0/GLSL.std.450.html#Exp2)

Why `rcp` as instruction expansion?
This one is a bit of the odd man out and might have to move to
`cgbuiltins` when we better understand SPIRV requirements. However I
included it because it seems like [fast math mode has an AllowRecip
flag](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_fp_fast_math_mode)
which lets you compute the reciprocal without performing the division.
We don't have that in DXIL so thought to include it.
2024-03-14 20:25:57 -04:00

114 lines
3.4 KiB
LLVM

; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
; Make sure dxil operation function calls for any are generated for float and half.
; CHECK-LABEL: any_bool
; CHECK: icmp ne i1 %{{.*}}, false
define noundef i1 @any_bool(i1 noundef %p0) {
entry:
%p0.addr = alloca i8, align 1
%frombool = zext i1 %p0 to i8
store i8 %frombool, ptr %p0.addr, align 1
%0 = load i8, ptr %p0.addr, align 1
%tobool = trunc i8 %0 to i1
%dx.any = call i1 @llvm.dx.any.i1(i1 %tobool)
ret i1 %dx.any
}
; CHECK-LABEL: any_int64_t
; CHECK: icmp ne i64 %{{.*}}, 0
define noundef i1 @any_int64_t(i64 noundef %p0) {
entry:
%p0.addr = alloca i64, align 8
store i64 %p0, ptr %p0.addr, align 8
%0 = load i64, ptr %p0.addr, align 8
%dx.any = call i1 @llvm.dx.any.i64(i64 %0)
ret i1 %dx.any
}
; CHECK-LABEL: any_int
; CHECK: icmp ne i32 %{{.*}}, 0
define noundef i1 @any_int(i32 noundef %p0) {
entry:
%p0.addr = alloca i32, align 4
store i32 %p0, ptr %p0.addr, align 4
%0 = load i32, ptr %p0.addr, align 4
%dx.any = call i1 @llvm.dx.any.i32(i32 %0)
ret i1 %dx.any
}
; CHECK-LABEL: any_int16_t
; CHECK: icmp ne i16 %{{.*}}, 0
define noundef i1 @any_int16_t(i16 noundef %p0) {
entry:
%p0.addr = alloca i16, align 2
store i16 %p0, ptr %p0.addr, align 2
%0 = load i16, ptr %p0.addr, align 2
%dx.any = call i1 @llvm.dx.any.i16(i16 %0)
ret i1 %dx.any
}
; CHECK-LABEL: any_double
; CHECK: fcmp une double %{{.*}}, 0.000000e+00
define noundef i1 @any_double(double noundef %p0) {
entry:
%p0.addr = alloca double, align 8
store double %p0, ptr %p0.addr, align 8
%0 = load double, ptr %p0.addr, align 8
%dx.any = call i1 @llvm.dx.any.f64(double %0)
ret i1 %dx.any
}
; CHECK-LABEL: any_float
; CHECK: fcmp une float %{{.*}}, 0.000000e+00
define noundef i1 @any_float(float noundef %p0) {
entry:
%p0.addr = alloca float, align 4
store float %p0, ptr %p0.addr, align 4
%0 = load float, ptr %p0.addr, align 4
%dx.any = call i1 @llvm.dx.any.f32(float %0)
ret i1 %dx.any
}
; CHECK-LABEL: any_half
; CHECK: fcmp une half %{{.*}}, 0xH0000
define noundef i1 @any_half(half noundef %p0) {
entry:
%p0.addr = alloca half, align 2
store half %p0, ptr %p0.addr, align 2
%0 = load half, ptr %p0.addr, align 2
%dx.any = call i1 @llvm.dx.any.f16(half %0)
ret i1 %dx.any
}
; CHECK-LABEL: any_bool4
; CHECK: icmp ne <4 x i1> %extractvec, zeroinitialize
; CHECK: extractelement <4 x i1> %{{.*}}, i64 0
; CHECK: extractelement <4 x i1> %{{.*}}, i64 1
; CHECK: or i1 %{{.*}}, %{{.*}}
; CHECK: extractelement <4 x i1> %{{.*}}, i64 2
; CHECK: or i1 %{{.*}}, %{{.*}}
; CHECK: extractelement <4 x i1> %{{.*}}, i64 3
; CHECK: or i1 %{{.*}}, %{{.*}}
define noundef i1 @any_bool4(<4 x i1> noundef %p0) {
entry:
%p0.addr = alloca i8, align 1
%insertvec = shufflevector <4 x i1> %p0, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
%0 = bitcast <8 x i1> %insertvec to i8
store i8 %0, ptr %p0.addr, align 1
%load_bits = load i8, ptr %p0.addr, align 1
%1 = bitcast i8 %load_bits to <8 x i1>
%extractvec = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%dx.any = call i1 @llvm.dx.any.v4i1(<4 x i1> %extractvec)
ret i1 %dx.any
}
declare i1 @llvm.dx.any.v4i1(<4 x i1>)
declare i1 @llvm.dx.any.i1(i1)
declare i1 @llvm.dx.any.i16(i16)
declare i1 @llvm.dx.any.i32(i32)
declare i1 @llvm.dx.any.i64(i64)
declare i1 @llvm.dx.any.f16(half)
declare i1 @llvm.dx.any.f32(float)
declare i1 @llvm.dx.any.f64(double)