This patch adds initial argmemonly inference, by checking the underlying
objects of locations returned by MemoryLocation.
I think this should cover most cases, except function calls to other
argmemonly functions.
I'm not sure if there's a reason why we don't infer those yet.
Additional argmemonly can improve codegen in some cases. It also makes
it easier to come up with a C reproducer for 7662d1687b (already fixed,
but I'm trying to see if C/C++ fuzzing could help to uncover similar
issues.)
Compile-time impact:
NewPM-O3: +0.01%
NewPM-ReleaseThinLTO: +0.03%
NewPM-ReleaseLTO+g: +0.05%
https://llvm-compile-time-tracker.com/compare.php?from=067c035012fc061ad6378458774ac2df117283c6&to=fe209d4aab5b593bd62d18c0876732ddcca1614d&stat=instructions
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D121415
34 lines
2.0 KiB
LLVM
34 lines
2.0 KiB
LLVM
; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-unsafe-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=UNSAFE %s
|
|
; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-nans-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NONANS %s
|
|
; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NOINFS %s
|
|
|
|
; GCN: define float @foo(float %x) local_unnamed_addr #0 {
|
|
; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 {
|
|
; GCN: %mul.i = fmul float %load, 1.500000e+01
|
|
|
|
; UNSAFE: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "unsafe-fp-math"="true" }
|
|
; UNSAFE: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" }
|
|
|
|
; NOINFS: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "no-infs-fp-math"="true" }
|
|
; NOINFS: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" }
|
|
|
|
; NONANS: attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone willreturn "no-nans-fp-math"="true" }
|
|
; NONANS: attributes #1 = { argmemonly mustprogress nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" }
|
|
|
|
define float @foo(float %x) #0 {
|
|
entry:
|
|
%mul = fmul float %x, 1.500000e+01
|
|
ret float %mul
|
|
}
|
|
|
|
define amdgpu_kernel void @caller(float addrspace(1)* %p) #1 {
|
|
entry:
|
|
%load = load float, float addrspace(1)* %p, align 4
|
|
%call = call fast float @foo(float %load) #0
|
|
store float %call, float addrspace(1)* %p, align 4
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
|