We were running this immediately on the incoming IR, which is still littered with temporary allocas obscuring trivial values. This needs to run after initial SROA to handle sincos insertion.
78 lines
3.8 KiB
LLVM
78 lines
3.8 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -O1 -amdgpu-prelink %s | FileCheck %s
|
|
|
|
; Make sure that sin+cos -> sincos simplification happens after
|
|
; initial IR simplifications, otherwise we can't identify the common
|
|
; argument value.
|
|
|
|
@.str = private unnamed_addr addrspace(4) constant [21 x i8] c"x: %f, y: %f, z: %f\0A\00", align 1
|
|
|
|
; Should have call to sincos declarations, not calls to the asm pseudo-libcalls
|
|
define protected amdgpu_kernel void @swdev456865(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, float noundef %x) #0 {
|
|
; CHECK-LABEL: define protected amdgpu_kernel void @swdev456865(
|
|
; CHECK-SAME: ptr addrspace(1) nocapture writeonly [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5)
|
|
; CHECK-NEXT: [[I_I:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) #[[ATTR1:[0-9]+]]
|
|
; CHECK-NEXT: [[I_I2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4
|
|
; CHECK-NEXT: [[ADD:%.*]] = fadd float [[I_I]], [[I_I2]]
|
|
; CHECK-NEXT: [[CONV:%.*]] = fpext float [[X]] to double
|
|
; CHECK-NEXT: [[CONV5:%.*]] = fpext float [[ADD]] to double
|
|
; CHECK-NEXT: store double [[CONV]], ptr addrspace(1) [[OUT0]], align 8
|
|
; CHECK-NEXT: store double [[CONV5]], ptr addrspace(1) [[OUT1]], align 8
|
|
; CHECK-NEXT: store double [[CONV5]], ptr addrspace(1) [[OUT2]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%x.addr = alloca float, align 4, addrspace(5)
|
|
%y = alloca float, align 4, addrspace(5)
|
|
%z = alloca float, align 4, addrspace(5)
|
|
store float %x, ptr addrspace(5) %x.addr, align 4
|
|
call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %y)
|
|
%i = load float, ptr addrspace(5) %x.addr, align 4
|
|
%call = call float @_Z3sinf(float noundef %i) #3
|
|
%i1 = load float, ptr addrspace(5) %x.addr, align 4
|
|
%call1 = call float @_Z3cosf(float noundef %i1) #3
|
|
%add = fadd float %call, %call1
|
|
store float %add, ptr addrspace(5) %y, align 4
|
|
call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %z)
|
|
%i2 = load float, ptr addrspace(5) %x.addr, align 4
|
|
%call2 = call float @_Z3cosf(float noundef %i2) #3
|
|
%i3 = load float, ptr addrspace(5) %x.addr, align 4
|
|
%call3 = call float @_Z3sinf(float noundef %i3) #3
|
|
%add4 = fadd float %call2, %call3
|
|
store float %add4, ptr addrspace(5) %z, align 4
|
|
%i4 = load float, ptr addrspace(5) %x.addr, align 4
|
|
%conv = fpext float %i4 to double
|
|
%i5 = load float, ptr addrspace(5) %y, align 4
|
|
%conv5 = fpext float %i5 to double
|
|
%i6 = load float, ptr addrspace(5) %z, align 4
|
|
%conv6 = fpext float %i6 to double
|
|
store double %conv, ptr addrspace(1) %out0, align 8
|
|
store double %conv5, ptr addrspace(1) %out1, align 8
|
|
store double %conv6, ptr addrspace(1) %out2, align 8
|
|
call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %z)
|
|
call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %y)
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture) #1
|
|
declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) #1
|
|
|
|
define internal float @_Z3cosf(float noundef %arg) #2 {
|
|
bb:
|
|
%i = tail call float asm "pseudo-libcall-cos %0, %1", "=v,v"(float noundef %arg) #2
|
|
ret float %i
|
|
}
|
|
|
|
define internal float @_Z3sinf(float noundef %arg) #2 {
|
|
bb:
|
|
%i = tail call float asm "pseudo-libcall-sin %0, %1", "=v,v"(float noundef %arg) #2
|
|
ret float %i
|
|
}
|
|
|
|
attributes #0 = { norecurse nounwind }
|
|
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
|
|
attributes #2 = { mustprogress nofree norecurse nounwind willreturn memory(none) }
|
|
attributes #3 = { nounwind willreturn memory(none) }
|