Files
clang-p2996/llvm/test/CodeGen/NVPTX/fp-contract.ll
Pavel Kopyl 01afb3fb99 [NVPTX] Use by default 'sm_60' architecture when expanding %ptxas-verify macro.
Also get rid of explicitly specified '-march' values for old architectures.
This simplifies %ptxas-verify statements.
After the change, we can potentially miss cases where a new functionality
is added to the architecture without appropriate checks in the
backend. On the other hand, this is mostly true for old architectures
that have been thoroughly tested.

Differential Revision: https://reviews.llvm.org/D141736
2023-02-17 20:49:04 +01:00

36 lines
1.3 KiB
LLVM

; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefix=DEFAULT
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | %ptxas-verify %}
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_30 | %ptxas-verify %}
target triple = "nvptx64-unknown-cuda"
;; Make sure we are generating proper instruction sequences for fused ops
;; If fusion is allowed, we try to form fma.rn at the PTX level, and emit
;; add.f32 otherwise. Without an explicit rounding mode on add.f32, ptxas
;; is free to fuse with a multiply if it is able. If fusion is not allowed,
;; we do not form fma.rn at the PTX level and explicitly generate add.rn
;; for all adds to prevent ptxas from fusion the ops.
;; FAST-LABEL: @t0
;; DEFAULT-LABEL: @t0
define float @t0(float %a, float %b, float %c) {
;; FAST: fma.rn.f32
;; DEFAULT: mul.rn.f32
;; DEFAULT: add.rn.f32
%v0 = fmul float %a, %b
%v1 = fadd float %v0, %c
ret float %v1
}
;; FAST-LABEL: @t1
;; DEFAULT-LABEL: @t1
define float @t1(float %a, float %b) {
;; We cannot form an fma here, but make sure we explicitly emit add.rn.f32
;; to prevent ptxas from fusing this with anything else.
;; FAST: add.f32
;; DEFAULT: add.rn.f32
%v1 = fadd float %a, %b
ret float %v1
}