There are many tests that specify a target triple/CPU flags but no DataLayout which can lead to IR being generated that has unusual behaviour. This commit attempts to use the default DataLayout based on the relevant flags if there is no explicit override on the command line or in the IR file. One thing that is not currently possible to differentiate from a missing datalayout `target datalayout = ""` in the IR file since the current APIs don't allow detecting this case. If it is considered useful to support this case (instead of passing "-data-layout=" on the command line), I can change IR parsers to track whether they have seen such a directive and change the callback type. Differential Revision: https://reviews.llvm.org/D141060
363 lines
21 KiB
LLVM
363 lines
21 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=sroa,amdgpu-promote-alloca < %s | FileCheck %s
|
|
|
|
; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly
|
|
; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but
|
|
; the pass should handle it gracefully if it is
|
|
; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt
|
|
; should now leave these unchanged
|
|
|
|
%Block = type { [1 x float], i32 }
|
|
%gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] }
|
|
%struct = type { i32, i32 }
|
|
|
|
@block = external addrspace(1) global %Block
|
|
@pv = external addrspace(1) global %gl_PerVertex
|
|
|
|
define amdgpu_vs void @promote_1d_aggr() #0 {
|
|
; CHECK-LABEL: @promote_1d_aggr(
|
|
; CHECK-NEXT: [[F1:%.*]] = alloca [1 x float], align 4, addrspace(5)
|
|
; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK:%.*]], ptr addrspace(1) @block, i32 0, i32 1
|
|
; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) [[FOO]], align 4
|
|
; CHECK-NEXT: [[FOO3:%.*]] = load [1 x float], ptr addrspace(1) @block, align 4
|
|
; CHECK-NEXT: [[FOO3_FCA_0_EXTRACT:%.*]] = extractvalue [1 x float] [[FOO3]], 0
|
|
; CHECK-NEXT: [[FOO3_FCA_0_GEP:%.*]] = getelementptr inbounds [1 x float], ptr addrspace(5) [[F1]], i32 0, i32 0
|
|
; CHECK-NEXT: store float [[FOO3_FCA_0_EXTRACT]], ptr addrspace(5) [[FOO3_FCA_0_GEP]], align 4
|
|
; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [1 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO1]]
|
|
; CHECK-NEXT: [[FOO6:%.*]] = load float, ptr addrspace(5) [[FOO5]], align 4
|
|
; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> undef, float [[FOO6]], i32 0
|
|
; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[FOO6]], i32 1
|
|
; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[FOO6]], i32 2
|
|
; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[FOO6]], i32 3
|
|
; CHECK-NEXT: store <4 x float> [[FOO12]], ptr addrspace(1) @pv, align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%i = alloca i32, addrspace(5)
|
|
%f1 = alloca [1 x float], addrspace(5)
|
|
%foo = getelementptr %Block, ptr addrspace(1) @block, i32 0, i32 1
|
|
%foo1 = load i32, ptr addrspace(1) %foo
|
|
store i32 %foo1, ptr addrspace(5) %i
|
|
%foo3 = load [1 x float], ptr addrspace(1) @block
|
|
store [1 x float] %foo3, ptr addrspace(5) %f1
|
|
%foo4 = load i32, ptr addrspace(5) %i
|
|
%foo5 = getelementptr [1 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
|
|
%foo6 = load float, ptr addrspace(5) %foo5
|
|
%foo7 = alloca <4 x float>, addrspace(5)
|
|
%foo8 = load <4 x float>, ptr addrspace(5) %foo7
|
|
%foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
|
|
%foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
|
|
%foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
|
|
%foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3
|
|
store <4 x float> %foo12, ptr addrspace(1) @pv
|
|
ret void
|
|
}
|
|
|
|
%Block2 = type { i32, [2 x float] }
|
|
@block2 = external addrspace(1) global %Block2
|
|
|
|
define amdgpu_vs void @promote_store_aggr() #0 {
|
|
; CHECK-LABEL: @promote_store_aggr(
|
|
; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) @block2, align 4
|
|
; CHECK-NEXT: [[FOO3:%.*]] = sitofp i32 [[FOO1]] to float
|
|
; CHECK-NEXT: [[FOO6_FCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[FOO3]], 0
|
|
; CHECK-NEXT: [[FOO6_FCA_1_INSERT:%.*]] = insertvalue [2 x float] [[FOO6_FCA_0_INSERT]], float 2.000000e+00, 1
|
|
; CHECK-NEXT: [[FOO7:%.*]] = getelementptr [[BLOCK2:%.*]], ptr addrspace(1) @block2, i32 0, i32 1
|
|
; CHECK-NEXT: store [2 x float] [[FOO6_FCA_1_INSERT]], ptr addrspace(1) [[FOO7]], align 4
|
|
; CHECK-NEXT: store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, ptr addrspace(1) @pv, align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%i = alloca i32, addrspace(5)
|
|
%f1 = alloca [2 x float], addrspace(5)
|
|
%foo1 = load i32, ptr addrspace(1) @block2
|
|
store i32 %foo1, ptr addrspace(5) %i
|
|
%foo2 = load i32, ptr addrspace(5) %i
|
|
%foo3 = sitofp i32 %foo2 to float
|
|
store float %foo3, ptr addrspace(5) %f1
|
|
%foo5 = getelementptr [2 x float], ptr addrspace(5) %f1, i32 0, i32 1
|
|
store float 2.000000e+00, ptr addrspace(5) %foo5
|
|
%foo6 = load [2 x float], ptr addrspace(5) %f1
|
|
%foo7 = getelementptr %Block2, ptr addrspace(1) @block2, i32 0, i32 1
|
|
store [2 x float] %foo6, ptr addrspace(1) %foo7
|
|
store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, ptr addrspace(1) @pv
|
|
ret void
|
|
}
|
|
|
|
%Block3 = type { [2 x float], i32 }
|
|
@block3 = external addrspace(1) global %Block3
|
|
|
|
define amdgpu_vs void @promote_load_from_store_aggr() #0 {
|
|
; CHECK-LABEL: @promote_load_from_store_aggr(
|
|
; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 1
|
|
; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) [[FOO]], align 4
|
|
; CHECK-NEXT: [[FOO3:%.*]] = load [2 x float], ptr addrspace(1) @block3, align 4
|
|
; CHECK-NEXT: [[FOO3_FCA_0_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[FOO3_FCA_0_EXTRACT]], i32 0
|
|
; CHECK-NEXT: [[FOO3_FCA_1_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[FOO3_FCA_1_EXTRACT]], i32 1
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 [[FOO1]]
|
|
; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[TMP3]], i32 1
|
|
; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[TMP3]], i32 2
|
|
; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[TMP3]], i32 3
|
|
; CHECK-NEXT: store <4 x float> [[FOO12]], ptr addrspace(1) @pv, align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%i = alloca i32, addrspace(5)
|
|
%f1 = alloca [2 x float], addrspace(5)
|
|
%foo = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 1
|
|
%foo1 = load i32, ptr addrspace(1) %foo
|
|
store i32 %foo1, ptr addrspace(5) %i
|
|
%foo3 = load [2 x float], ptr addrspace(1) @block3
|
|
store [2 x float] %foo3, ptr addrspace(5) %f1
|
|
%foo4 = load i32, ptr addrspace(5) %i
|
|
%foo5 = getelementptr [2 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
|
|
%foo6 = load float, ptr addrspace(5) %foo5
|
|
%foo7 = alloca <4 x float>, addrspace(5)
|
|
%foo8 = load <4 x float>, ptr addrspace(5) %foo7
|
|
%foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0
|
|
%foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1
|
|
%foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2
|
|
%foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3
|
|
store <4 x float> %foo12, ptr addrspace(1) @pv
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_vs void @promote_memmove_aggr() #0 {
|
|
; CHECK-LABEL: @promote_memmove_aggr(
|
|
; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(1) @pv, align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%f1 = alloca [5 x float], addrspace(5)
|
|
store [5 x float] zeroinitializer, ptr addrspace(5) %f1
|
|
%foo1 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 1
|
|
store float 1.0, ptr addrspace(5) %foo1
|
|
%foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
|
|
store float 2.0, ptr addrspace(5) %foo2
|
|
call void @llvm.memmove.p5i8.p5i8.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo1, i32 16, i1 false)
|
|
%foo3 = load float, ptr addrspace(5) %f1
|
|
store float %foo3, ptr addrspace(1) @pv
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_vs void @promote_memcpy_aggr() #0 {
|
|
; CHECK-LABEL: @promote_memcpy_aggr(
|
|
; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
|
|
; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 2.000000e+00, float 0.000000e+00>, float 3.000000e+00, i32 [[FOO4]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> poison, <5 x i32> <i32 3, i32 4, i32 2, i32 3, i32 4>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <5 x float> [[TMP2]], i32 0
|
|
; CHECK-NEXT: store float [[TMP3]], ptr addrspace(1) @pv, align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%f1 = alloca [5 x float], addrspace(5)
|
|
store [5 x float] zeroinitializer, ptr addrspace(5) %f1
|
|
|
|
%foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
|
|
store float 2.0, ptr addrspace(5) %foo2
|
|
|
|
%foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
|
|
%foo4 = load i32, ptr addrspace(1) %foo3
|
|
%foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
|
|
store float 3.0, ptr addrspace(5) %foo5
|
|
|
|
call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo2, i32 8, i1 false)
|
|
%foo6 = load float, ptr addrspace(5) %f1
|
|
store float %foo6, ptr addrspace(1) @pv
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_vs void @promote_memcpy_identity_aggr() #0 {
|
|
; CHECK-LABEL: @promote_memcpy_identity_aggr(
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) @pv, align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%f1 = alloca [5 x float], addrspace(5)
|
|
store [5 x float] zeroinitializer, ptr addrspace(5) %f1
|
|
%foo1 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 1
|
|
store float 1.0, ptr addrspace(5) %foo1
|
|
%foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
|
|
store float 2.0, ptr addrspace(5) %foo2
|
|
call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %f1, i32 20, i1 false)
|
|
%foo3 = load float, ptr addrspace(5) %f1
|
|
store float %foo3, ptr addrspace(1) @pv
|
|
ret void
|
|
}
|
|
|
|
; TODO: promote alloca even there is a memcpy between different alloca
|
|
define amdgpu_vs void @promote_memcpy_two_aggrs() #0 {
|
|
; CHECK-LABEL: @promote_memcpy_two_aggrs(
|
|
; CHECK-NEXT: [[F1:%.*]] = alloca [5 x float], align 4, addrspace(5)
|
|
; CHECK-NEXT: [[F2:%.*]] = alloca [5 x float], align 4, addrspace(5)
|
|
; CHECK-NEXT: [[DOTFCA_0_GEP1:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 0
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP1]], align 4
|
|
; CHECK-NEXT: [[DOTFCA_1_GEP2:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 1
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP2]], align 4
|
|
; CHECK-NEXT: [[DOTFCA_2_GEP3:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 2
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP3]], align 4
|
|
; CHECK-NEXT: [[DOTFCA_3_GEP4:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 3
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP4]], align 4
|
|
; CHECK-NEXT: [[DOTFCA_4_GEP5:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 4
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP5]], align 4
|
|
; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 0
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP]], align 4
|
|
; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 1
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP]], align 4
|
|
; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 2
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP]], align 4
|
|
; CHECK-NEXT: [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 3
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP]], align 4
|
|
; CHECK-NEXT: [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 4
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP]], align 4
|
|
; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
|
|
; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
|
|
; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO4]]
|
|
; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(5) [[FOO5]], align 4
|
|
; CHECK-NEXT: call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 [[F2]], ptr addrspace(5) align 4 [[F1]], i32 8, i1 false)
|
|
; CHECK-NEXT: [[FOO6:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 [[FOO4]]
|
|
; CHECK-NEXT: [[FOO7:%.*]] = load float, ptr addrspace(5) [[FOO6]], align 4
|
|
; CHECK-NEXT: store float [[FOO7]], ptr addrspace(1) @pv, align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%f1 = alloca [5 x float], addrspace(5)
|
|
%f2 = alloca [5 x float], addrspace(5)
|
|
|
|
store [5 x float] zeroinitializer, ptr addrspace(5) %f1
|
|
store [5 x float] zeroinitializer, ptr addrspace(5) %f2
|
|
|
|
%foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
|
|
%foo4 = load i32, ptr addrspace(1) %foo3
|
|
%foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
|
|
store float 3.0, ptr addrspace(5) %foo5
|
|
|
|
call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 4 %f2, ptr addrspace(5) align 4 %f1, i32 8, i1 false)
|
|
|
|
%foo6 = getelementptr [5 x float], ptr addrspace(5) %f2, i32 0, i32 %foo4
|
|
%foo7 = load float, ptr addrspace(5) %foo6
|
|
store float %foo7, ptr addrspace(1) @pv
|
|
ret void
|
|
}
|
|
|
|
; TODO: promote alloca even there is a memcpy between the alloca and other memory space.
|
|
define amdgpu_vs void @promote_memcpy_p1p5_aggr(ptr addrspace(1) inreg %src) #0 {
|
|
; CHECK-LABEL: @promote_memcpy_p1p5_aggr(
|
|
; CHECK-NEXT: [[F1:%.*]] = alloca [5 x float], align 4, addrspace(5)
|
|
; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 0
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP]], align 4
|
|
; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 1
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP]], align 4
|
|
; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 2
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP]], align 4
|
|
; CHECK-NEXT: [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 3
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP]], align 4
|
|
; CHECK-NEXT: [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 4
|
|
; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP]], align 4
|
|
; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
|
|
; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
|
|
; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO4]]
|
|
; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(5) [[FOO5]], align 4
|
|
; CHECK-NEXT: call void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) align 4 @pv, ptr addrspace(5) align 4 [[F1]], i32 8, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%f1 = alloca [5 x float], addrspace(5)
|
|
store [5 x float] zeroinitializer, ptr addrspace(5) %f1
|
|
|
|
%foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
|
|
%foo4 = load i32, ptr addrspace(1) %foo3
|
|
%foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
|
|
store float 3.0, ptr addrspace(5) %foo5
|
|
|
|
call void @llvm.memcpy.p1i8.p5i8.i32(ptr addrspace(1) align 4 @pv, ptr addrspace(5) align 4 %f1, i32 8, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_vs void @promote_memcpy_inline_aggr() #0 {
|
|
; CHECK-LABEL: @promote_memcpy_inline_aggr(
|
|
; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0
|
|
; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x float> zeroinitializer, float 3.000000e+00, i32 [[FOO4]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> poison, <5 x i32> <i32 3, i32 4, i32 2, i32 3, i32 4>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <5 x float> [[TMP2]], i32 0
|
|
; CHECK-NEXT: store float [[TMP3]], ptr addrspace(1) @pv, align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%f1 = alloca [5 x float], addrspace(5)
|
|
store [5 x float] zeroinitializer, ptr addrspace(5) %f1
|
|
|
|
%foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3
|
|
%foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0
|
|
%foo4 = load i32, ptr addrspace(1) %foo3
|
|
%foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4
|
|
store float 3.0, ptr addrspace(5) %foo5
|
|
|
|
call void @llvm.memcpy.inline.p5i8.p5i8.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo2, i32 8, i1 false)
|
|
%foo6 = load float, ptr addrspace(5) %f1
|
|
store float %foo6, ptr addrspace(1) @pv
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
|
|
declare void @llvm.memcpy.p1i8.p5i8.i32(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
|
|
declare void @llvm.memcpy.inline.p5i8.p5i8.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
|
|
declare void @llvm.memmove.p5i8.p5i8.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
|
|
|
|
@tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> }
|
|
@frag_color = external addrspace(1) global <4 x float>
|
|
|
|
define amdgpu_ps void @promote_double_aggr() #0 {
|
|
; CHECK-LABEL: @promote_double_aggr(
|
|
; CHECK-NEXT: [[FOO:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 0
|
|
; CHECK-NEXT: [[FOO1:%.*]] = load double, ptr addrspace(1) [[FOO]], align 8
|
|
; CHECK-NEXT: [[FOO2:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 1
|
|
; CHECK-NEXT: [[FOO3:%.*]] = load double, ptr addrspace(1) [[FOO2]], align 8
|
|
; CHECK-NEXT: [[FOO4:%.*]] = insertvalue [2 x double] undef, double [[FOO1]], 0
|
|
; CHECK-NEXT: [[FOO5:%.*]] = insertvalue [2 x double] [[FOO4]], double [[FOO3]], 1
|
|
; CHECK-NEXT: [[FOO5_FCA_0_EXTRACT:%.*]] = extractvalue [2 x double] [[FOO5]], 0
|
|
; CHECK-NEXT: [[FOO5_FCA_1_EXTRACT:%.*]] = extractvalue [2 x double] [[FOO5]], 1
|
|
; CHECK-NEXT: [[FOO10:%.*]] = fadd double [[FOO5_FCA_1_EXTRACT]], [[FOO5_FCA_1_EXTRACT]]
|
|
; CHECK-NEXT: [[FOO16:%.*]] = fadd double [[FOO10]], [[FOO5_FCA_1_EXTRACT]]
|
|
; CHECK-NEXT: [[FOO17:%.*]] = fptrunc double [[FOO16]] to float
|
|
; CHECK-NEXT: [[FOO18:%.*]] = insertelement <4 x float> undef, float [[FOO17]], i32 0
|
|
; CHECK-NEXT: [[FOO19:%.*]] = insertelement <4 x float> [[FOO18]], float [[FOO17]], i32 1
|
|
; CHECK-NEXT: [[FOO20:%.*]] = insertelement <4 x float> [[FOO19]], float [[FOO17]], i32 2
|
|
; CHECK-NEXT: [[FOO21:%.*]] = insertelement <4 x float> [[FOO20]], float [[FOO17]], i32 3
|
|
; CHECK-NEXT: store <4 x float> [[FOO21]], ptr addrspace(1) @frag_color, align 16
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%s = alloca [2 x double], addrspace(5)
|
|
%foo = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 0
|
|
%foo1 = load double, ptr addrspace(1) %foo
|
|
%foo2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 1
|
|
%foo3 = load double, ptr addrspace(1) %foo2
|
|
%foo4 = insertvalue [2 x double] undef, double %foo1, 0
|
|
%foo5 = insertvalue [2 x double] %foo4, double %foo3, 1
|
|
store [2 x double] %foo5, ptr addrspace(5) %s
|
|
%foo6 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1
|
|
%foo7 = load double, ptr addrspace(5) %foo6
|
|
%foo8 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1
|
|
%foo9 = load double, ptr addrspace(5) %foo8
|
|
%foo10 = fadd double %foo7, %foo9
|
|
store double %foo10, ptr addrspace(5) %s
|
|
%foo13 = load double, ptr addrspace(5) %s
|
|
%foo14 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1
|
|
%foo15 = load double, ptr addrspace(5) %foo14
|
|
%foo16 = fadd double %foo13, %foo15
|
|
%foo17 = fptrunc double %foo16 to float
|
|
%foo18 = insertelement <4 x float> undef, float %foo17, i32 0
|
|
%foo19 = insertelement <4 x float> %foo18, float %foo17, i32 1
|
|
%foo20 = insertelement <4 x float> %foo19, float %foo17, i32 2
|
|
%foo21 = insertelement <4 x float> %foo20, float %foo17, i32 3
|
|
store <4 x float> %foo21, ptr addrspace(1) @frag_color
|
|
ret void
|
|
}
|
|
|
|
; Don't crash on a type that isn't a valid vector element.
|
|
define amdgpu_kernel void @alloca_struct() #0 {
|
|
; CHECK-LABEL: @alloca_struct(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%alloca = alloca [2 x %struct], align 4, addrspace(5)
|
|
ret void
|
|
}
|