Files
clang-p2996/llvm/test/CodeGen/DirectX/fmad.ll
Farzon Lotfi 643b31dbe8 [HLSL] implement mad intrinsic (#83826)
This change implements #83736
The dot product lowering needs a tertiary multipy add operation. DXIL
has three mad opcodes for `fmad`(46), `imad`(48), and `umad`(49). Dot
product in DXIL only uses `imad`\ `umad`, but for completeness and
because the hlsl `mad` intrinsic requires it `fmad` was also included.
Two new intrinsics were needed to be created to complete this change.
the `fmad` case already supported by llvm via `fmuladd` intrinsic.

- `hlsl_intrinsics.h` - exposed mad api call.
- `Builtins.td` - exposed a `mad` builtin.
- `Sema.h` - make `tertiary` calls check for float types optional. 
- `CGBuiltin.cpp` - pick the intrinsic for singed\unsigned & float also
reuse `int_fmuladd`.
- `SemaChecking.cpp` - type checks for `__builtin_hlsl_mad`. 
- `IntrinsicsDirectX.td` create the two new intrinsics for
`imad`\`umad`/
- `DXIL.td` - create the llvm intrinsic to  `DXIL` opcode mapping.

---------

Co-authored-by: Farzon Lotfi <farzon@farzon.com>
2024-03-05 12:23:26 -05:00

68 lines
2.7 KiB
LLVM

; RUN: opt -S -dxil-op-lower < %s | FileCheck %s
; Make sure dxil operation function calls for round are generated for float and half.
; CHECK:call half @dx.op.tertiary.f16(i32 46, half %{{.*}}, half %{{.*}}, half %{{.*}})
; CHECK:call float @dx.op.tertiary.f32(i32 46, float %{{.*}}, float %{{.*}}, float %{{.*}})
; CHECK:call double @dx.op.tertiary.f64(i32 46, double %{{.*}}, double %{{.*}}, double %{{.*}})
target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
target triple = "dxil-pc-shadermodel6.7-library"
; Function Attrs: noinline nounwind optnone
define noundef half @fmad_half(half noundef %p0, half noundef %p1, half noundef %p2) #0 {
entry:
%p2.addr = alloca half, align 2
%p1.addr = alloca half, align 2
%p0.addr = alloca half, align 2
store half %p2, ptr %p2.addr, align 2
store half %p1, ptr %p1.addr, align 2
store half %p0, ptr %p0.addr, align 2
%0 = load half, ptr %p0.addr, align 2
%1 = load half, ptr %p1.addr, align 2
%2 = load half, ptr %p2.addr, align 2
%dx.fmad = call half @llvm.fmuladd.f16(half %0, half %1, half %2)
ret half %dx.fmad
}
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare half @llvm.fmuladd.f16(half, half, half) #2
; Function Attrs: noinline nounwind optnone
define noundef float @fmad_float(float noundef %p0, float noundef %p1, float noundef %p2) #0 {
entry:
%p2.addr = alloca float, align 4
%p1.addr = alloca float, align 4
%p0.addr = alloca float, align 4
store float %p2, ptr %p2.addr, align 4
store float %p1, ptr %p1.addr, align 4
store float %p0, ptr %p0.addr, align 4
%0 = load float, ptr %p0.addr, align 4
%1 = load float, ptr %p1.addr, align 4
%2 = load float, ptr %p2.addr, align 4
%dx.fmad = call float @llvm.fmuladd.f32(float %0, float %1, float %2)
ret float %dx.fmad
}
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.fmuladd.f32(float, float, float) #2
; Function Attrs: noinline nounwind optnone
define noundef double @fmad_double(double noundef %p0, double noundef %p1, double noundef %p2) #0 {
entry:
%p2.addr = alloca double, align 8
%p1.addr = alloca double, align 8
%p0.addr = alloca double, align 8
store double %p2, ptr %p2.addr, align 8
store double %p1, ptr %p1.addr, align 8
store double %p0, ptr %p0.addr, align 8
%0 = load double, ptr %p0.addr, align 8
%1 = load double, ptr %p1.addr, align 8
%2 = load double, ptr %p2.addr, align 8
%dx.fmad = call double @llvm.fmuladd.f64(double %0, double %1, double %2)
ret double %dx.fmad
}
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare double @llvm.fmuladd.f64(double, double, double) #2