Files
clang-p2996/llvm/test/CodeGen/AMDGPU/inline-attr.ll
Florian Hahn e5822ded56 [FunctionAttrs] Infer argmemonly .
This patch adds initial argmemonly inference, by checking the underlying
objects of locations returned by MemoryLocation.

I think this should cover most cases, except function calls to other
argmemonly functions.

I'm not sure if there's a reason why we don't infer those yet.

Additional argmemonly can improve codegen in some cases. It also makes
it easier to come up with a C reproducer for 7662d1687b (already fixed,
but I'm trying to see if C/C++ fuzzing could help to uncover similar
issues.)

Compile-time impact:
NewPM-O3: +0.01%
NewPM-ReleaseThinLTO: +0.03%
NewPM-ReleaseLTO+g: +0.05%

https://llvm-compile-time-tracker.com/compare.php?from=067c035012fc061ad6378458774ac2df117283c6&to=fe209d4aab5b593bd62d18c0876732ddcca1614d&stat=instructions

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D121415
2022-03-16 10:24:33 +00:00

34 lines
2.0 KiB
LLVM

; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-unsafe-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=UNSAFE %s
; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-nans-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NONANS %s
; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NOINFS %s
; GCN: define float @foo(float %x) local_unnamed_addr #0 {
; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 {
; GCN: %mul.i = fmul float %load, 1.500000e+01
; UNSAFE: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "unsafe-fp-math"="true" }
; UNSAFE: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" }
; NOINFS: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "no-infs-fp-math"="true" }
; NOINFS: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" }
; NONANS: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "no-nans-fp-math"="true" }
; NONANS: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" }
define float @foo(float %x) #0 {
entry:
%mul = fmul float %x, 1.500000e+01
ret float %mul
}
define amdgpu_kernel void @caller(float addrspace(1)* %p) #1 {
entry:
%load = load float, float addrspace(1)* %p, align 4
%call = call fast float @foo(float %load) #0
store float %call, float addrspace(1)* %p, align 4
ret void
}
attributes #0 = { nounwind }
attributes #1 = { nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }