Scalarization can expose optimization opportunities for the individual elements of a vector, and can therefore be beneficial on targets like GPUs that tend to operate on scalars anyway. However, notably with 16-bit operations it is often beneficial to keep <2 x i16 / half> vectors around since there are packed instructions for those. Refactor the code to operate on "fragments" of split vectors. The fragments are usually scalars, but may themselves be smaller vectors when the scalarizer-min-bits option is used. If the split is uneven, the last fragment is a shorter remainder. This is almost NFC when the new option is unused, but it happens to clean up some code in the fully scalarized case as well. Differential Revision: https://reviews.llvm.org/D149842
875 lines
49 KiB
LLVM
875 lines
49 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt %s -passes='function(scalarizer,dce)' -scalarize-load-store -S | FileCheck %s
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
|
|
|
declare <4 x float> @ext(<4 x float>)
|
|
@g = global <4 x float> zeroinitializer
|
|
|
|
define void @f1(<4 x float> %init, ptr %base, i32 %count) {
|
|
; CHECK-LABEL: @f1(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x float> [[INIT:%.*]], i64 0
|
|
; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x float> [[INIT]], i64 1
|
|
; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x float> [[INIT]], i64 2
|
|
; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x float> [[INIT]], i64 3
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[ACC_I0:%.*]] = phi float [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[ACC_I1:%.*]] = phi float [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[ACC_I2:%.*]] = phi float [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[ACC_I3:%.*]] = phi float [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1
|
|
; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]]
|
|
; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16
|
|
; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
|
|
; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4
|
|
; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
|
|
; CHECK-NEXT: [[VAL_I2:%.*]] = load float, ptr [[PTR_I2]], align 8
|
|
; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, ptr [[PTR]], i32 3
|
|
; CHECK-NEXT: [[VAL_I3:%.*]] = load float, ptr [[PTR_I3]], align 4
|
|
; CHECK-NEXT: [[ADD_I0:%.*]] = fadd float [[VAL_I0]], [[VAL_I2]]
|
|
; CHECK-NEXT: [[ADD_I1:%.*]] = fadd float [[VAL_I1]], [[VAL_I3]]
|
|
; CHECK-NEXT: [[ADD_I2:%.*]] = fadd float [[ACC_I0]], [[ACC_I2]]
|
|
; CHECK-NEXT: [[ADD_I3:%.*]] = fadd float [[ACC_I1]], [[ACC_I3]]
|
|
; CHECK-NEXT: [[ADD_UPTO0:%.*]] = insertelement <4 x float> poison, float [[ADD_I0]], i64 0
|
|
; CHECK-NEXT: [[ADD_UPTO1:%.*]] = insertelement <4 x float> [[ADD_UPTO0]], float [[ADD_I1]], i64 1
|
|
; CHECK-NEXT: [[ADD_UPTO2:%.*]] = insertelement <4 x float> [[ADD_UPTO1]], float [[ADD_I2]], i64 2
|
|
; CHECK-NEXT: [[ADD:%.*]] = insertelement <4 x float> [[ADD_UPTO2]], float [[ADD_I3]], i64 3
|
|
; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[ADD]])
|
|
; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i64 0
|
|
; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
|
|
; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i64 1
|
|
; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
|
|
; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i64 2
|
|
; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
|
|
; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i64 3
|
|
; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
|
|
; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
|
|
; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
|
|
; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
|
|
; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
|
|
; CHECK-NEXT: store float [[SEL_I0]], ptr [[PTR]], align 16
|
|
; CHECK-NEXT: store float [[SEL_I1]], ptr [[PTR_I1]], align 4
|
|
; CHECK-NEXT: store float [[SEL_I2]], ptr [[PTR_I2]], align 8
|
|
; CHECK-NEXT: store float [[SEL_I3]], ptr [[PTR_I3]], align 4
|
|
; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
|
|
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
|
|
%acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
|
|
%nexti = sub i32 %i, 1
|
|
|
|
%ptr = getelementptr <4 x float>, ptr %base, i32 %i
|
|
%val = load <4 x float> , ptr %ptr
|
|
%dval = bitcast <4 x float> %val to <2 x double>
|
|
%dacc = bitcast <4 x float> %acc to <2 x double>
|
|
%shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
|
|
<2 x i32> <i32 0, i32 2>
|
|
%shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
|
|
<2 x i32> <i32 1, i32 3>
|
|
%f1 = bitcast <2 x double> %shuffle1 to <4 x float>
|
|
%f2 = bitcast <2 x double> %shuffle2 to <4 x float>
|
|
%add = fadd <4 x float> %f1, %f2
|
|
%call = call <4 x float> @ext(<4 x float> %add)
|
|
%cmp = fcmp ogt <4 x float> %call,
|
|
<float 1.0, float 2.0, float 3.0, float 4.0>
|
|
%sel = select <4 x i1> %cmp, <4 x float> %call,
|
|
<4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
|
|
store <4 x float> %sel, ptr %ptr
|
|
|
|
%test = icmp eq i32 %nexti, 0
|
|
br i1 %test, label %loop, label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define void @f2(<4 x i32> %init, ptr %base, i32 %count) {
|
|
; CHECK-LABEL: @f2(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[INIT_I0:%.*]] = extractelement <4 x i32> [[INIT:%.*]], i64 0
|
|
; CHECK-NEXT: [[INIT_I1:%.*]] = extractelement <4 x i32> [[INIT]], i64 1
|
|
; CHECK-NEXT: [[INIT_I2:%.*]] = extractelement <4 x i32> [[INIT]], i64 2
|
|
; CHECK-NEXT: [[INIT_I3:%.*]] = extractelement <4 x i32> [[INIT]], i64 3
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[ACC_I0:%.*]] = phi i32 [ [[INIT_I0]], [[ENTRY]] ], [ [[SEL_I0:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[ACC_I1:%.*]] = phi i32 [ [[INIT_I1]], [[ENTRY]] ], [ [[SEL_I1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[ACC_I2:%.*]] = phi i32 [ [[INIT_I2]], [[ENTRY]] ], [ [[SEL_I2:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[ACC_I3:%.*]] = phi i32 [ [[INIT_I3]], [[ENTRY]] ], [ [[SEL_I3:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1
|
|
; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x i8>, ptr [[BASE:%.*]], i32 [[I]]
|
|
; CHECK-NEXT: [[VAL_I0:%.*]] = load i8, ptr [[PTR]], align 4
|
|
; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr i8, ptr [[PTR]], i32 1
|
|
; CHECK-NEXT: [[VAL_I1:%.*]] = load i8, ptr [[PTR_I1]], align 1
|
|
; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr i8, ptr [[PTR]], i32 2
|
|
; CHECK-NEXT: [[VAL_I2:%.*]] = load i8, ptr [[PTR_I2]], align 2
|
|
; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr i8, ptr [[PTR]], i32 3
|
|
; CHECK-NEXT: [[VAL_I3:%.*]] = load i8, ptr [[PTR_I3]], align 1
|
|
; CHECK-NEXT: [[EXT_I0:%.*]] = sext i8 [[VAL_I0]] to i32
|
|
; CHECK-NEXT: [[EXT_I1:%.*]] = sext i8 [[VAL_I1]] to i32
|
|
; CHECK-NEXT: [[EXT_I2:%.*]] = sext i8 [[VAL_I2]] to i32
|
|
; CHECK-NEXT: [[EXT_I3:%.*]] = sext i8 [[VAL_I3]] to i32
|
|
; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[EXT_I0]], [[ACC_I0]]
|
|
; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[EXT_I1]], [[ACC_I1]]
|
|
; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[EXT_I2]], [[ACC_I2]]
|
|
; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[EXT_I3]], [[ACC_I3]]
|
|
; CHECK-NEXT: [[CMP_I0:%.*]] = icmp slt i32 [[ADD_I0]], -10
|
|
; CHECK-NEXT: [[CMP_I1:%.*]] = icmp slt i32 [[ADD_I1]], -11
|
|
; CHECK-NEXT: [[CMP_I2:%.*]] = icmp slt i32 [[ADD_I2]], -12
|
|
; CHECK-NEXT: [[CMP_I3:%.*]] = icmp slt i32 [[ADD_I3]], -13
|
|
; CHECK-NEXT: [[SEL_I0]] = select i1 [[CMP_I0]], i32 [[ADD_I0]], i32 [[I]]
|
|
; CHECK-NEXT: [[SEL_I1]] = select i1 [[CMP_I1]], i32 [[ADD_I1]], i32 [[I]]
|
|
; CHECK-NEXT: [[SEL_I2]] = select i1 [[CMP_I2]], i32 [[ADD_I2]], i32 [[I]]
|
|
; CHECK-NEXT: [[SEL_I3]] = select i1 [[CMP_I3]], i32 [[ADD_I3]], i32 [[I]]
|
|
; CHECK-NEXT: [[TRUNC_I0:%.*]] = trunc i32 [[SEL_I0]] to i8
|
|
; CHECK-NEXT: [[TRUNC_I1:%.*]] = trunc i32 [[SEL_I1]] to i8
|
|
; CHECK-NEXT: [[TRUNC_I2:%.*]] = trunc i32 [[SEL_I2]] to i8
|
|
; CHECK-NEXT: [[TRUNC_I3:%.*]] = trunc i32 [[SEL_I3]] to i8
|
|
; CHECK-NEXT: store i8 [[TRUNC_I0]], ptr [[PTR]], align 4
|
|
; CHECK-NEXT: store i8 [[TRUNC_I1]], ptr [[PTR_I1]], align 1
|
|
; CHECK-NEXT: store i8 [[TRUNC_I2]], ptr [[PTR_I2]], align 2
|
|
; CHECK-NEXT: store i8 [[TRUNC_I3]], ptr [[PTR_I3]], align 1
|
|
; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
|
|
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
|
|
%acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ]
|
|
%nexti = sub i32 %i, 1
|
|
|
|
%ptr = getelementptr <4 x i8>, ptr %base, i32 %i
|
|
%val = load <4 x i8> , ptr %ptr
|
|
%ext = sext <4 x i8> %val to <4 x i32>
|
|
%add = add <4 x i32> %ext, %acc
|
|
%cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
|
|
%single = insertelement <4 x i32> undef, i32 %i, i32 0
|
|
%limit = shufflevector <4 x i32> %single, <4 x i32> undef,
|
|
<4 x i32> zeroinitializer
|
|
%sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
|
|
%trunc = trunc <4 x i32> %sel to <4 x i8>
|
|
store <4 x i8> %trunc, ptr %ptr
|
|
|
|
%test = icmp eq i32 %nexti, 0
|
|
br i1 %test, label %loop, label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Check that !tbaa information is preserved.
|
|
define void @f3(ptr %src, ptr %dst) {
|
|
; CHECK-LABEL: @f3(
|
|
; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
|
|
; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
|
|
; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
|
|
; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa [[TBAA0:![0-9]+]]
|
|
; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
|
|
; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa [[TBAA0]]
|
|
; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
|
|
; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa [[TBAA0]]
|
|
; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3
|
|
; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa [[TBAA0]]
|
|
; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
|
|
; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
|
|
; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
|
|
; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
|
|
; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa [[TBAA3:![0-9]+]]
|
|
; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa [[TBAA3]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%val = load <4 x i32> , ptr %src, !tbaa !1
|
|
%add = add <4 x i32> %val, %val
|
|
store <4 x i32> %add, ptr %dst, !tbaa !2
|
|
ret void
|
|
}
|
|
|
|
; Check that !tbaa.struct information is preserved.
|
|
define void @f4(ptr %src, ptr %dst) {
|
|
; CHECK-LABEL: @f4(
|
|
; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
|
|
; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
|
|
; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
|
|
; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]]
|
|
; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
|
|
; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
|
|
; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
|
|
; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8, !tbaa.struct [[TBAA_STRUCT5]]
|
|
; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3
|
|
; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
|
|
; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
|
|
; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
|
|
; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
|
|
; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
|
|
; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[DST]], align 16, !tbaa.struct [[TBAA_STRUCT5]]
|
|
; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[DST_I1]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
|
|
; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[DST_I2]], align 8, !tbaa.struct [[TBAA_STRUCT5]]
|
|
; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[DST_I3]], align 4, !tbaa.struct [[TBAA_STRUCT5]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%val = load <4 x i32> , ptr %src, !tbaa.struct !5
|
|
%add = add <4 x i32> %val, %val
|
|
store <4 x i32> %add, ptr %dst, !tbaa.struct !5
|
|
ret void
|
|
}
|
|
|
|
; Check that llvm.access.group information is preserved.
|
|
define void @f5(i32 %count, ptr %src, ptr %dst) {
|
|
; CHECK-LABEL: @f5(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[THIS_SRC:%.*]] = getelementptr <4 x i32>, ptr [[SRC:%.*]], i32 [[INDEX]]
|
|
; CHECK-NEXT: [[THIS_SRC_I1:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 1
|
|
; CHECK-NEXT: [[THIS_SRC_I2:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 2
|
|
; CHECK-NEXT: [[THIS_SRC_I3:%.*]] = getelementptr i32, ptr [[THIS_SRC]], i32 3
|
|
; CHECK-NEXT: [[THIS_DST:%.*]] = getelementptr <4 x i32>, ptr [[DST:%.*]], i32 [[INDEX]]
|
|
; CHECK-NEXT: [[THIS_DST_I1:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 1
|
|
; CHECK-NEXT: [[THIS_DST_I2:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 2
|
|
; CHECK-NEXT: [[THIS_DST_I3:%.*]] = getelementptr i32, ptr [[THIS_DST]], i32 3
|
|
; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[THIS_SRC]], align 16, !llvm.access.group [[ACC_GRP6:![0-9]+]]
|
|
; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[THIS_SRC_I1]], align 4, !llvm.access.group [[ACC_GRP6]]
|
|
; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[THIS_SRC_I2]], align 8, !llvm.access.group [[ACC_GRP6]]
|
|
; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[THIS_SRC_I3]], align 4, !llvm.access.group [[ACC_GRP6]]
|
|
; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
|
|
; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
|
|
; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
|
|
; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
|
|
; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[THIS_DST]], align 16, !llvm.access.group [[ACC_GRP6]]
|
|
; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[THIS_DST_I1]], align 4, !llvm.access.group [[ACC_GRP6]]
|
|
; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[THIS_DST_I2]], align 8, !llvm.access.group [[ACC_GRP6]]
|
|
; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[THIS_DST_I3]], align 4, !llvm.access.group [[ACC_GRP6]]
|
|
; CHECK-NEXT: [[NEXT_INDEX]] = add i32 [[INDEX]], -1
|
|
; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ne i32 [[NEXT_INDEX]], [[COUNT:%.*]]
|
|
; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[END:%.*]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: end:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%index = phi i32 [ 0, %entry ], [ %next_index, %loop ]
|
|
%this_src = getelementptr <4 x i32>, ptr %src, i32 %index
|
|
%this_dst = getelementptr <4 x i32>, ptr %dst, i32 %index
|
|
%val = load <4 x i32> , ptr %this_src, !llvm.access.group !13
|
|
%add = add <4 x i32> %val, %val
|
|
store <4 x i32> %add, ptr %this_dst, !llvm.access.group !13
|
|
%next_index = add i32 %index, -1
|
|
%continue = icmp ne i32 %next_index, %count
|
|
br i1 %continue, label %loop, label %end, !llvm.loop !3
|
|
|
|
end:
|
|
ret void
|
|
}
|
|
|
|
; Check that fpmath information is preserved.
|
|
define <4 x float> @f6(<4 x float> %x) {
|
|
; CHECK-LABEL: @f6(
|
|
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <4 x float> [[X:%.*]], i64 0
|
|
; CHECK-NEXT: [[RES_I0:%.*]] = fadd float [[X_I0]], 1.000000e+00, !fpmath !9
|
|
; CHECK-NEXT: [[X_I1:%.*]] = extractelement <4 x float> [[X]], i64 1
|
|
; CHECK-NEXT: [[RES_I1:%.*]] = fadd float [[X_I1]], 2.000000e+00, !fpmath !9
|
|
; CHECK-NEXT: [[X_I2:%.*]] = extractelement <4 x float> [[X]], i64 2
|
|
; CHECK-NEXT: [[RES_I2:%.*]] = fadd float [[X_I2]], 3.000000e+00, !fpmath !9
|
|
; CHECK-NEXT: [[X_I3:%.*]] = extractelement <4 x float> [[X]], i64 3
|
|
; CHECK-NEXT: [[RES_I3:%.*]] = fadd float [[X_I3]], 4.000000e+00, !fpmath !9
|
|
; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <4 x float> poison, float [[RES_I0]], i64 0
|
|
; CHECK-NEXT: [[RES_UPTO1:%.*]] = insertelement <4 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
|
|
; CHECK-NEXT: [[RES_UPTO2:%.*]] = insertelement <4 x float> [[RES_UPTO1]], float [[RES_I2]], i64 2
|
|
; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x float> [[RES_UPTO2]], float [[RES_I3]], i64 3
|
|
; CHECK-NEXT: ret <4 x float> [[RES]]
|
|
;
|
|
%res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
|
|
!fpmath !4
|
|
ret <4 x float> %res
|
|
}
|
|
|
|
; Check that random metadata isn't kept.
|
|
define void @f7(ptr %src, ptr %dst) {
|
|
; CHECK-LABEL: @f7(
|
|
; CHECK-NEXT: [[DST_I1:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 1
|
|
; CHECK-NEXT: [[DST_I2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
|
|
; CHECK-NEXT: [[DST_I3:%.*]] = getelementptr i32, ptr [[DST]], i32 3
|
|
; CHECK-NEXT: [[VAL_I0:%.*]] = load i32, ptr [[SRC:%.*]], align 16
|
|
; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr i32, ptr [[SRC]], i32 1
|
|
; CHECK-NEXT: [[VAL_I1:%.*]] = load i32, ptr [[SRC_I1]], align 4
|
|
; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr i32, ptr [[SRC]], i32 2
|
|
; CHECK-NEXT: [[VAL_I2:%.*]] = load i32, ptr [[SRC_I2]], align 8
|
|
; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr i32, ptr [[SRC]], i32 3
|
|
; CHECK-NEXT: [[VAL_I3:%.*]] = load i32, ptr [[SRC_I3]], align 4
|
|
; CHECK-NEXT: [[ADD_I0:%.*]] = add i32 [[VAL_I0]], [[VAL_I0]]
|
|
; CHECK-NEXT: [[ADD_I1:%.*]] = add i32 [[VAL_I1]], [[VAL_I1]]
|
|
; CHECK-NEXT: [[ADD_I2:%.*]] = add i32 [[VAL_I2]], [[VAL_I2]]
|
|
; CHECK-NEXT: [[ADD_I3:%.*]] = add i32 [[VAL_I3]], [[VAL_I3]]
|
|
; CHECK-NEXT: store i32 [[ADD_I0]], ptr [[DST]], align 16
|
|
; CHECK-NEXT: store i32 [[ADD_I1]], ptr [[DST_I1]], align 4
|
|
; CHECK-NEXT: store i32 [[ADD_I2]], ptr [[DST_I2]], align 8
|
|
; CHECK-NEXT: store i32 [[ADD_I3]], ptr [[DST_I3]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%val = load <4 x i32> , ptr %src, !foo !5
|
|
%add = add <4 x i32> %val, %val
|
|
store <4 x i32> %add, ptr %dst, !foo !5
|
|
ret void
|
|
}
|
|
|
|
; Test GEP with vectors.
|
|
define void @f8(ptr %dest, <4 x ptr> %ptr0, <4 x i32> %i0,
|
|
; CHECK-LABEL: @f8(
|
|
; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1
|
|
; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2
|
|
; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3
|
|
; CHECK-NEXT: [[PTR0_I0:%.*]] = extractelement <4 x ptr> [[PTR0:%.*]], i64 0
|
|
; CHECK-NEXT: [[PTR0_I2:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 2
|
|
; CHECK-NEXT: [[PTR0_I3:%.*]] = extractelement <4 x ptr> [[PTR0]], i64 3
|
|
; CHECK-NEXT: [[I0_I1:%.*]] = extractelement <4 x i32> [[I0:%.*]], i64 1
|
|
; CHECK-NEXT: [[I0_I3:%.*]] = extractelement <4 x i32> [[I0]], i64 3
|
|
; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr float, ptr [[PTR0_I0]], i32 100
|
|
; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr float, ptr [[OTHER:%.*]], i32 [[I0_I1]]
|
|
; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr float, ptr [[PTR0_I2]], i32 100
|
|
; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr float, ptr [[PTR0_I3]], i32 [[I0_I3]]
|
|
; CHECK-NEXT: store ptr [[VAL_I0]], ptr [[DEST]], align 32
|
|
; CHECK-NEXT: store ptr [[VAL_I1]], ptr [[DEST_I1]], align 8
|
|
; CHECK-NEXT: store ptr [[VAL_I2]], ptr [[DEST_I2]], align 16
|
|
; CHECK-NEXT: store ptr [[VAL_I3]], ptr [[DEST_I3]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
ptr %other) {
|
|
%i1 = insertelement <4 x i32> %i0, i32 100, i32 0
|
|
%i2 = insertelement <4 x i32> %i1, i32 100, i32 2
|
|
%ptr1 = insertelement <4 x ptr> %ptr0, ptr %other, i32 1
|
|
%val = getelementptr float, <4 x ptr> %ptr1, <4 x i32> %i2
|
|
store <4 x ptr> %val, ptr %dest
|
|
ret void
|
|
}
|
|
|
|
; Test the handling of unaligned loads.
|
|
define void @f9(ptr %dest, ptr %src) {
|
|
; CHECK-LABEL: @f9(
|
|
; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1
|
|
; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2
|
|
; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3
|
|
; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 4
|
|
; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1
|
|
; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 4
|
|
; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2
|
|
; CHECK-NEXT: [[VAL_I2:%.*]] = load float, ptr [[SRC_I2]], align 4
|
|
; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, ptr [[SRC]], i32 3
|
|
; CHECK-NEXT: [[VAL_I3:%.*]] = load float, ptr [[SRC_I3]], align 4
|
|
; CHECK-NEXT: store float [[VAL_I0]], ptr [[DEST]], align 8
|
|
; CHECK-NEXT: store float [[VAL_I1]], ptr [[DEST_I1]], align 4
|
|
; CHECK-NEXT: store float [[VAL_I2]], ptr [[DEST_I2]], align 8
|
|
; CHECK-NEXT: store float [[VAL_I3]], ptr [[DEST_I3]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%val = load <4 x float> , ptr %src, align 4
|
|
store <4 x float> %val, ptr %dest, align 8
|
|
ret void
|
|
}
|
|
|
|
; ...and again with subelement alignment.
|
|
define void @f10(ptr %dest, ptr %src) {
|
|
; CHECK-LABEL: @f10(
|
|
; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr float, ptr [[DEST:%.*]], i32 1
|
|
; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr float, ptr [[DEST]], i32 2
|
|
; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr float, ptr [[DEST]], i32 3
|
|
; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[SRC:%.*]], align 1
|
|
; CHECK-NEXT: [[SRC_I1:%.*]] = getelementptr float, ptr [[SRC]], i32 1
|
|
; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[SRC_I1]], align 1
|
|
; CHECK-NEXT: [[SRC_I2:%.*]] = getelementptr float, ptr [[SRC]], i32 2
|
|
; CHECK-NEXT: [[VAL_I2:%.*]] = load float, ptr [[SRC_I2]], align 1
|
|
; CHECK-NEXT: [[SRC_I3:%.*]] = getelementptr float, ptr [[SRC]], i32 3
|
|
; CHECK-NEXT: [[VAL_I3:%.*]] = load float, ptr [[SRC_I3]], align 1
|
|
; CHECK-NEXT: store float [[VAL_I0]], ptr [[DEST]], align 2
|
|
; CHECK-NEXT: store float [[VAL_I1]], ptr [[DEST_I1]], align 2
|
|
; CHECK-NEXT: store float [[VAL_I2]], ptr [[DEST_I2]], align 2
|
|
; CHECK-NEXT: store float [[VAL_I3]], ptr [[DEST_I3]], align 2
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%val = load <4 x float> , ptr %src, align 1
|
|
store <4 x float> %val, ptr %dest, align 2
|
|
ret void
|
|
}
|
|
|
|
; Test that sub-byte loads aren't scalarized.
|
|
define void @f11(ptr %dest, ptr %src0) {
|
|
; CHECK-LABEL: @f11(
|
|
; CHECK-NEXT: [[SRC1:%.*]] = getelementptr <32 x i1>, ptr [[SRC0:%.*]], i32 1
|
|
; CHECK-NEXT: [[VAL0:%.*]] = load <32 x i1>, ptr [[SRC0]], align 4
|
|
; CHECK-NEXT: [[VAL0_I0:%.*]] = extractelement <32 x i1> [[VAL0]], i64 0
|
|
; CHECK-NEXT: [[VAL0_I1:%.*]] = extractelement <32 x i1> [[VAL0]], i64 1
|
|
; CHECK-NEXT: [[VAL0_I2:%.*]] = extractelement <32 x i1> [[VAL0]], i64 2
|
|
; CHECK-NEXT: [[VAL0_I3:%.*]] = extractelement <32 x i1> [[VAL0]], i64 3
|
|
; CHECK-NEXT: [[VAL0_I4:%.*]] = extractelement <32 x i1> [[VAL0]], i64 4
|
|
; CHECK-NEXT: [[VAL0_I5:%.*]] = extractelement <32 x i1> [[VAL0]], i64 5
|
|
; CHECK-NEXT: [[VAL0_I6:%.*]] = extractelement <32 x i1> [[VAL0]], i64 6
|
|
; CHECK-NEXT: [[VAL0_I7:%.*]] = extractelement <32 x i1> [[VAL0]], i64 7
|
|
; CHECK-NEXT: [[VAL0_I8:%.*]] = extractelement <32 x i1> [[VAL0]], i64 8
|
|
; CHECK-NEXT: [[VAL0_I9:%.*]] = extractelement <32 x i1> [[VAL0]], i64 9
|
|
; CHECK-NEXT: [[VAL0_I10:%.*]] = extractelement <32 x i1> [[VAL0]], i64 10
|
|
; CHECK-NEXT: [[VAL0_I11:%.*]] = extractelement <32 x i1> [[VAL0]], i64 11
|
|
; CHECK-NEXT: [[VAL0_I12:%.*]] = extractelement <32 x i1> [[VAL0]], i64 12
|
|
; CHECK-NEXT: [[VAL0_I13:%.*]] = extractelement <32 x i1> [[VAL0]], i64 13
|
|
; CHECK-NEXT: [[VAL0_I14:%.*]] = extractelement <32 x i1> [[VAL0]], i64 14
|
|
; CHECK-NEXT: [[VAL0_I15:%.*]] = extractelement <32 x i1> [[VAL0]], i64 15
|
|
; CHECK-NEXT: [[VAL0_I16:%.*]] = extractelement <32 x i1> [[VAL0]], i64 16
|
|
; CHECK-NEXT: [[VAL0_I17:%.*]] = extractelement <32 x i1> [[VAL0]], i64 17
|
|
; CHECK-NEXT: [[VAL0_I18:%.*]] = extractelement <32 x i1> [[VAL0]], i64 18
|
|
; CHECK-NEXT: [[VAL0_I19:%.*]] = extractelement <32 x i1> [[VAL0]], i64 19
|
|
; CHECK-NEXT: [[VAL0_I20:%.*]] = extractelement <32 x i1> [[VAL0]], i64 20
|
|
; CHECK-NEXT: [[VAL0_I21:%.*]] = extractelement <32 x i1> [[VAL0]], i64 21
|
|
; CHECK-NEXT: [[VAL0_I22:%.*]] = extractelement <32 x i1> [[VAL0]], i64 22
|
|
; CHECK-NEXT: [[VAL0_I23:%.*]] = extractelement <32 x i1> [[VAL0]], i64 23
|
|
; CHECK-NEXT: [[VAL0_I24:%.*]] = extractelement <32 x i1> [[VAL0]], i64 24
|
|
; CHECK-NEXT: [[VAL0_I25:%.*]] = extractelement <32 x i1> [[VAL0]], i64 25
|
|
; CHECK-NEXT: [[VAL0_I26:%.*]] = extractelement <32 x i1> [[VAL0]], i64 26
|
|
; CHECK-NEXT: [[VAL0_I27:%.*]] = extractelement <32 x i1> [[VAL0]], i64 27
|
|
; CHECK-NEXT: [[VAL0_I28:%.*]] = extractelement <32 x i1> [[VAL0]], i64 28
|
|
; CHECK-NEXT: [[VAL0_I29:%.*]] = extractelement <32 x i1> [[VAL0]], i64 29
|
|
; CHECK-NEXT: [[VAL0_I30:%.*]] = extractelement <32 x i1> [[VAL0]], i64 30
|
|
; CHECK-NEXT: [[VAL0_I31:%.*]] = extractelement <32 x i1> [[VAL0]], i64 31
|
|
; CHECK-NEXT: [[VAL1:%.*]] = load <32 x i1>, ptr [[SRC1]], align 4
|
|
; CHECK-NEXT: [[VAL1_I0:%.*]] = extractelement <32 x i1> [[VAL1]], i64 0
|
|
; CHECK-NEXT: [[AND_I0:%.*]] = and i1 [[VAL0_I0]], [[VAL1_I0]]
|
|
; CHECK-NEXT: [[VAL1_I1:%.*]] = extractelement <32 x i1> [[VAL1]], i64 1
|
|
; CHECK-NEXT: [[AND_I1:%.*]] = and i1 [[VAL0_I1]], [[VAL1_I1]]
|
|
; CHECK-NEXT: [[VAL1_I2:%.*]] = extractelement <32 x i1> [[VAL1]], i64 2
|
|
; CHECK-NEXT: [[AND_I2:%.*]] = and i1 [[VAL0_I2]], [[VAL1_I2]]
|
|
; CHECK-NEXT: [[VAL1_I3:%.*]] = extractelement <32 x i1> [[VAL1]], i64 3
|
|
; CHECK-NEXT: [[AND_I3:%.*]] = and i1 [[VAL0_I3]], [[VAL1_I3]]
|
|
; CHECK-NEXT: [[VAL1_I4:%.*]] = extractelement <32 x i1> [[VAL1]], i64 4
|
|
; CHECK-NEXT: [[AND_I4:%.*]] = and i1 [[VAL0_I4]], [[VAL1_I4]]
|
|
; CHECK-NEXT: [[VAL1_I5:%.*]] = extractelement <32 x i1> [[VAL1]], i64 5
|
|
; CHECK-NEXT: [[AND_I5:%.*]] = and i1 [[VAL0_I5]], [[VAL1_I5]]
|
|
; CHECK-NEXT: [[VAL1_I6:%.*]] = extractelement <32 x i1> [[VAL1]], i64 6
|
|
; CHECK-NEXT: [[AND_I6:%.*]] = and i1 [[VAL0_I6]], [[VAL1_I6]]
|
|
; CHECK-NEXT: [[VAL1_I7:%.*]] = extractelement <32 x i1> [[VAL1]], i64 7
|
|
; CHECK-NEXT: [[AND_I7:%.*]] = and i1 [[VAL0_I7]], [[VAL1_I7]]
|
|
; CHECK-NEXT: [[VAL1_I8:%.*]] = extractelement <32 x i1> [[VAL1]], i64 8
|
|
; CHECK-NEXT: [[AND_I8:%.*]] = and i1 [[VAL0_I8]], [[VAL1_I8]]
|
|
; CHECK-NEXT: [[VAL1_I9:%.*]] = extractelement <32 x i1> [[VAL1]], i64 9
|
|
; CHECK-NEXT: [[AND_I9:%.*]] = and i1 [[VAL0_I9]], [[VAL1_I9]]
|
|
; CHECK-NEXT: [[VAL1_I10:%.*]] = extractelement <32 x i1> [[VAL1]], i64 10
|
|
; CHECK-NEXT: [[AND_I10:%.*]] = and i1 [[VAL0_I10]], [[VAL1_I10]]
|
|
; CHECK-NEXT: [[VAL1_I11:%.*]] = extractelement <32 x i1> [[VAL1]], i64 11
|
|
; CHECK-NEXT: [[AND_I11:%.*]] = and i1 [[VAL0_I11]], [[VAL1_I11]]
|
|
; CHECK-NEXT: [[VAL1_I12:%.*]] = extractelement <32 x i1> [[VAL1]], i64 12
|
|
; CHECK-NEXT: [[AND_I12:%.*]] = and i1 [[VAL0_I12]], [[VAL1_I12]]
|
|
; CHECK-NEXT: [[VAL1_I13:%.*]] = extractelement <32 x i1> [[VAL1]], i64 13
|
|
; CHECK-NEXT: [[AND_I13:%.*]] = and i1 [[VAL0_I13]], [[VAL1_I13]]
|
|
; CHECK-NEXT: [[VAL1_I14:%.*]] = extractelement <32 x i1> [[VAL1]], i64 14
|
|
; CHECK-NEXT: [[AND_I14:%.*]] = and i1 [[VAL0_I14]], [[VAL1_I14]]
|
|
; CHECK-NEXT: [[VAL1_I15:%.*]] = extractelement <32 x i1> [[VAL1]], i64 15
|
|
; CHECK-NEXT: [[AND_I15:%.*]] = and i1 [[VAL0_I15]], [[VAL1_I15]]
|
|
; CHECK-NEXT: [[VAL1_I16:%.*]] = extractelement <32 x i1> [[VAL1]], i64 16
|
|
; CHECK-NEXT: [[AND_I16:%.*]] = and i1 [[VAL0_I16]], [[VAL1_I16]]
|
|
; CHECK-NEXT: [[VAL1_I17:%.*]] = extractelement <32 x i1> [[VAL1]], i64 17
|
|
; CHECK-NEXT: [[AND_I17:%.*]] = and i1 [[VAL0_I17]], [[VAL1_I17]]
|
|
; CHECK-NEXT: [[VAL1_I18:%.*]] = extractelement <32 x i1> [[VAL1]], i64 18
|
|
; CHECK-NEXT: [[AND_I18:%.*]] = and i1 [[VAL0_I18]], [[VAL1_I18]]
|
|
; CHECK-NEXT: [[VAL1_I19:%.*]] = extractelement <32 x i1> [[VAL1]], i64 19
|
|
; CHECK-NEXT: [[AND_I19:%.*]] = and i1 [[VAL0_I19]], [[VAL1_I19]]
|
|
; CHECK-NEXT: [[VAL1_I20:%.*]] = extractelement <32 x i1> [[VAL1]], i64 20
|
|
; CHECK-NEXT: [[AND_I20:%.*]] = and i1 [[VAL0_I20]], [[VAL1_I20]]
|
|
; CHECK-NEXT: [[VAL1_I21:%.*]] = extractelement <32 x i1> [[VAL1]], i64 21
|
|
; CHECK-NEXT: [[AND_I21:%.*]] = and i1 [[VAL0_I21]], [[VAL1_I21]]
|
|
; CHECK-NEXT: [[VAL1_I22:%.*]] = extractelement <32 x i1> [[VAL1]], i64 22
|
|
; CHECK-NEXT: [[AND_I22:%.*]] = and i1 [[VAL0_I22]], [[VAL1_I22]]
|
|
; CHECK-NEXT: [[VAL1_I23:%.*]] = extractelement <32 x i1> [[VAL1]], i64 23
|
|
; CHECK-NEXT: [[AND_I23:%.*]] = and i1 [[VAL0_I23]], [[VAL1_I23]]
|
|
; CHECK-NEXT: [[VAL1_I24:%.*]] = extractelement <32 x i1> [[VAL1]], i64 24
|
|
; CHECK-NEXT: [[AND_I24:%.*]] = and i1 [[VAL0_I24]], [[VAL1_I24]]
|
|
; CHECK-NEXT: [[VAL1_I25:%.*]] = extractelement <32 x i1> [[VAL1]], i64 25
|
|
; CHECK-NEXT: [[AND_I25:%.*]] = and i1 [[VAL0_I25]], [[VAL1_I25]]
|
|
; CHECK-NEXT: [[VAL1_I26:%.*]] = extractelement <32 x i1> [[VAL1]], i64 26
|
|
; CHECK-NEXT: [[AND_I26:%.*]] = and i1 [[VAL0_I26]], [[VAL1_I26]]
|
|
; CHECK-NEXT: [[VAL1_I27:%.*]] = extractelement <32 x i1> [[VAL1]], i64 27
|
|
; CHECK-NEXT: [[AND_I27:%.*]] = and i1 [[VAL0_I27]], [[VAL1_I27]]
|
|
; CHECK-NEXT: [[VAL1_I28:%.*]] = extractelement <32 x i1> [[VAL1]], i64 28
|
|
; CHECK-NEXT: [[AND_I28:%.*]] = and i1 [[VAL0_I28]], [[VAL1_I28]]
|
|
; CHECK-NEXT: [[VAL1_I29:%.*]] = extractelement <32 x i1> [[VAL1]], i64 29
|
|
; CHECK-NEXT: [[AND_I29:%.*]] = and i1 [[VAL0_I29]], [[VAL1_I29]]
|
|
; CHECK-NEXT: [[VAL1_I30:%.*]] = extractelement <32 x i1> [[VAL1]], i64 30
|
|
; CHECK-NEXT: [[AND_I30:%.*]] = and i1 [[VAL0_I30]], [[VAL1_I30]]
|
|
; CHECK-NEXT: [[VAL1_I31:%.*]] = extractelement <32 x i1> [[VAL1]], i64 31
|
|
; CHECK-NEXT: [[AND_I31:%.*]] = and i1 [[VAL0_I31]], [[VAL1_I31]]
|
|
; CHECK-NEXT: [[AND_UPTO0:%.*]] = insertelement <32 x i1> poison, i1 [[AND_I0]], i64 0
|
|
; CHECK-NEXT: [[AND_UPTO1:%.*]] = insertelement <32 x i1> [[AND_UPTO0]], i1 [[AND_I1]], i64 1
|
|
; CHECK-NEXT: [[AND_UPTO2:%.*]] = insertelement <32 x i1> [[AND_UPTO1]], i1 [[AND_I2]], i64 2
|
|
; CHECK-NEXT: [[AND_UPTO3:%.*]] = insertelement <32 x i1> [[AND_UPTO2]], i1 [[AND_I3]], i64 3
|
|
; CHECK-NEXT: [[AND_UPTO4:%.*]] = insertelement <32 x i1> [[AND_UPTO3]], i1 [[AND_I4]], i64 4
|
|
; CHECK-NEXT: [[AND_UPTO5:%.*]] = insertelement <32 x i1> [[AND_UPTO4]], i1 [[AND_I5]], i64 5
|
|
; CHECK-NEXT: [[AND_UPTO6:%.*]] = insertelement <32 x i1> [[AND_UPTO5]], i1 [[AND_I6]], i64 6
|
|
; CHECK-NEXT: [[AND_UPTO7:%.*]] = insertelement <32 x i1> [[AND_UPTO6]], i1 [[AND_I7]], i64 7
|
|
; CHECK-NEXT: [[AND_UPTO8:%.*]] = insertelement <32 x i1> [[AND_UPTO7]], i1 [[AND_I8]], i64 8
|
|
; CHECK-NEXT: [[AND_UPTO9:%.*]] = insertelement <32 x i1> [[AND_UPTO8]], i1 [[AND_I9]], i64 9
|
|
; CHECK-NEXT: [[AND_UPTO10:%.*]] = insertelement <32 x i1> [[AND_UPTO9]], i1 [[AND_I10]], i64 10
|
|
; CHECK-NEXT: [[AND_UPTO11:%.*]] = insertelement <32 x i1> [[AND_UPTO10]], i1 [[AND_I11]], i64 11
|
|
; CHECK-NEXT: [[AND_UPTO12:%.*]] = insertelement <32 x i1> [[AND_UPTO11]], i1 [[AND_I12]], i64 12
|
|
; CHECK-NEXT: [[AND_UPTO13:%.*]] = insertelement <32 x i1> [[AND_UPTO12]], i1 [[AND_I13]], i64 13
|
|
; CHECK-NEXT: [[AND_UPTO14:%.*]] = insertelement <32 x i1> [[AND_UPTO13]], i1 [[AND_I14]], i64 14
|
|
; CHECK-NEXT: [[AND_UPTO15:%.*]] = insertelement <32 x i1> [[AND_UPTO14]], i1 [[AND_I15]], i64 15
|
|
; CHECK-NEXT: [[AND_UPTO16:%.*]] = insertelement <32 x i1> [[AND_UPTO15]], i1 [[AND_I16]], i64 16
|
|
; CHECK-NEXT: [[AND_UPTO17:%.*]] = insertelement <32 x i1> [[AND_UPTO16]], i1 [[AND_I17]], i64 17
|
|
; CHECK-NEXT: [[AND_UPTO18:%.*]] = insertelement <32 x i1> [[AND_UPTO17]], i1 [[AND_I18]], i64 18
|
|
; CHECK-NEXT: [[AND_UPTO19:%.*]] = insertelement <32 x i1> [[AND_UPTO18]], i1 [[AND_I19]], i64 19
|
|
; CHECK-NEXT: [[AND_UPTO20:%.*]] = insertelement <32 x i1> [[AND_UPTO19]], i1 [[AND_I20]], i64 20
|
|
; CHECK-NEXT: [[AND_UPTO21:%.*]] = insertelement <32 x i1> [[AND_UPTO20]], i1 [[AND_I21]], i64 21
|
|
; CHECK-NEXT: [[AND_UPTO22:%.*]] = insertelement <32 x i1> [[AND_UPTO21]], i1 [[AND_I22]], i64 22
|
|
; CHECK-NEXT: [[AND_UPTO23:%.*]] = insertelement <32 x i1> [[AND_UPTO22]], i1 [[AND_I23]], i64 23
|
|
; CHECK-NEXT: [[AND_UPTO24:%.*]] = insertelement <32 x i1> [[AND_UPTO23]], i1 [[AND_I24]], i64 24
|
|
; CHECK-NEXT: [[AND_UPTO25:%.*]] = insertelement <32 x i1> [[AND_UPTO24]], i1 [[AND_I25]], i64 25
|
|
; CHECK-NEXT: [[AND_UPTO26:%.*]] = insertelement <32 x i1> [[AND_UPTO25]], i1 [[AND_I26]], i64 26
|
|
; CHECK-NEXT: [[AND_UPTO27:%.*]] = insertelement <32 x i1> [[AND_UPTO26]], i1 [[AND_I27]], i64 27
|
|
; CHECK-NEXT: [[AND_UPTO28:%.*]] = insertelement <32 x i1> [[AND_UPTO27]], i1 [[AND_I28]], i64 28
|
|
; CHECK-NEXT: [[AND_UPTO29:%.*]] = insertelement <32 x i1> [[AND_UPTO28]], i1 [[AND_I29]], i64 29
|
|
; CHECK-NEXT: [[AND_UPTO30:%.*]] = insertelement <32 x i1> [[AND_UPTO29]], i1 [[AND_I30]], i64 30
|
|
; CHECK-NEXT: [[AND:%.*]] = insertelement <32 x i1> [[AND_UPTO30]], i1 [[AND_I31]], i64 31
|
|
; CHECK-NEXT: store <32 x i1> [[AND]], ptr [[DEST:%.*]], align 4
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%src1 = getelementptr <32 x i1>, ptr %src0, i32 1
|
|
%val0 = load <32 x i1> , ptr %src0
|
|
%val1 = load <32 x i1> , ptr %src1
|
|
%and = and <32 x i1> %val0, %val1
|
|
store <32 x i1> %and, ptr %dest
|
|
ret void
|
|
}
|
|
|
|
; Test vector GEPs with more than one index.
|
|
define void @f13(ptr %dest, <4 x ptr> %ptr, <4 x i32> %i,
|
|
; CHECK-LABEL: @f13(
|
|
; CHECK-NEXT: [[DEST_I1:%.*]] = getelementptr ptr, ptr [[DEST:%.*]], i32 1
|
|
; CHECK-NEXT: [[DEST_I2:%.*]] = getelementptr ptr, ptr [[DEST]], i32 2
|
|
; CHECK-NEXT: [[DEST_I3:%.*]] = getelementptr ptr, ptr [[DEST]], i32 3
|
|
; CHECK-NEXT: [[PTR_I0:%.*]] = extractelement <4 x ptr> [[PTR:%.*]], i64 0
|
|
; CHECK-NEXT: [[I_I0:%.*]] = extractelement <4 x i32> [[I:%.*]], i64 0
|
|
; CHECK-NEXT: [[VAL_I0:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I0]], i32 0, i32 [[I_I0]]
|
|
; CHECK-NEXT: [[PTR_I1:%.*]] = extractelement <4 x ptr> [[PTR]], i64 1
|
|
; CHECK-NEXT: [[I_I1:%.*]] = extractelement <4 x i32> [[I]], i64 1
|
|
; CHECK-NEXT: [[VAL_I1:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I1]], i32 1, i32 [[I_I1]]
|
|
; CHECK-NEXT: [[PTR_I2:%.*]] = extractelement <4 x ptr> [[PTR]], i64 2
|
|
; CHECK-NEXT: [[I_I2:%.*]] = extractelement <4 x i32> [[I]], i64 2
|
|
; CHECK-NEXT: [[VAL_I2:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I2]], i32 2, i32 [[I_I2]]
|
|
; CHECK-NEXT: [[PTR_I3:%.*]] = extractelement <4 x ptr> [[PTR]], i64 3
|
|
; CHECK-NEXT: [[I_I3:%.*]] = extractelement <4 x i32> [[I]], i64 3
|
|
; CHECK-NEXT: [[VAL_I3:%.*]] = getelementptr inbounds [4 x float], ptr [[PTR_I3]], i32 3, i32 [[I_I3]]
|
|
; CHECK-NEXT: store ptr [[VAL_I0]], ptr [[DEST]], align 32
|
|
; CHECK-NEXT: store ptr [[VAL_I1]], ptr [[DEST_I1]], align 8
|
|
; CHECK-NEXT: store ptr [[VAL_I2]], ptr [[DEST_I2]], align 16
|
|
; CHECK-NEXT: store ptr [[VAL_I3]], ptr [[DEST_I3]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
ptr %other) {
|
|
%val = getelementptr inbounds [4 x float], <4 x ptr> %ptr,
|
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>,
|
|
<4 x i32> %i
|
|
store <4 x ptr> %val, ptr %dest
|
|
ret void
|
|
}
|
|
|
|
; Test combinations of vector and non-vector PHIs.
|
|
define <4 x float> @f14(<4 x float> %acc, i32 %count) {
|
|
; CHECK-LABEL: @f14(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[ACC_I0:%.*]] = extractelement <4 x float> [[ACC:%.*]], i64 0
|
|
; CHECK-NEXT: [[ACC_I1:%.*]] = extractelement <4 x float> [[ACC]], i64 1
|
|
; CHECK-NEXT: [[ACC_I2:%.*]] = extractelement <4 x float> [[ACC]], i64 2
|
|
; CHECK-NEXT: [[ACC_I3:%.*]] = extractelement <4 x float> [[ACC]], i64 3
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[THIS_ACC_I0:%.*]] = phi float [ [[ACC_I0]], [[ENTRY:%.*]] ], [ [[NEXT_ACC_I0:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[THIS_ACC_I1:%.*]] = phi float [ [[ACC_I1]], [[ENTRY]] ], [ [[NEXT_ACC_I1:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[THIS_ACC_I2:%.*]] = phi float [ [[ACC_I2]], [[ENTRY]] ], [ [[NEXT_ACC_I2:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[THIS_ACC_I3:%.*]] = phi float [ [[ACC_I3]], [[ENTRY]] ], [ [[NEXT_ACC_I3:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[THIS_COUNT:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY]] ], [ [[NEXT_COUNT:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[THIS_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[THIS_ACC_I0]], i64 0
|
|
; CHECK-NEXT: [[THIS_ACC_UPTO1:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO0]], float [[THIS_ACC_I1]], i64 1
|
|
; CHECK-NEXT: [[THIS_ACC_UPTO2:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO1]], float [[THIS_ACC_I2]], i64 2
|
|
; CHECK-NEXT: [[THIS_ACC:%.*]] = insertelement <4 x float> [[THIS_ACC_UPTO2]], float [[THIS_ACC_I3]], i64 3
|
|
; CHECK-NEXT: [[FOO:%.*]] = call <4 x float> @ext(<4 x float> [[THIS_ACC]])
|
|
; CHECK-NEXT: [[FOO_I0:%.*]] = extractelement <4 x float> [[FOO]], i64 0
|
|
; CHECK-NEXT: [[NEXT_ACC_I0]] = fadd float [[THIS_ACC_I0]], [[FOO_I0]]
|
|
; CHECK-NEXT: [[FOO_I1:%.*]] = extractelement <4 x float> [[FOO]], i64 1
|
|
; CHECK-NEXT: [[NEXT_ACC_I1]] = fadd float [[THIS_ACC_I1]], [[FOO_I1]]
|
|
; CHECK-NEXT: [[FOO_I2:%.*]] = extractelement <4 x float> [[FOO]], i64 2
|
|
; CHECK-NEXT: [[NEXT_ACC_I2]] = fadd float [[THIS_ACC_I2]], [[FOO_I2]]
|
|
; CHECK-NEXT: [[FOO_I3:%.*]] = extractelement <4 x float> [[FOO]], i64 3
|
|
; CHECK-NEXT: [[NEXT_ACC_I3]] = fadd float [[THIS_ACC_I3]], [[FOO_I3]]
|
|
; CHECK-NEXT: [[NEXT_ACC_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEXT_ACC_I0]], i64 0
|
|
; CHECK-NEXT: [[NEXT_ACC_UPTO1:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO0]], float [[NEXT_ACC_I1]], i64 1
|
|
; CHECK-NEXT: [[NEXT_ACC_UPTO2:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO1]], float [[NEXT_ACC_I2]], i64 2
|
|
; CHECK-NEXT: [[NEXT_ACC:%.*]] = insertelement <4 x float> [[NEXT_ACC_UPTO2]], float [[NEXT_ACC_I3]], i64 3
|
|
; CHECK-NEXT: [[NEXT_COUNT]] = sub i32 [[THIS_COUNT]], 1
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[NEXT_COUNT]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret <4 x float> [[NEXT_ACC]]
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%this_acc = phi <4 x float> [ %acc, %entry ], [ %next_acc, %loop ]
|
|
%this_count = phi i32 [ %count, %entry ], [ %next_count, %loop ]
|
|
%foo = call <4 x float> @ext(<4 x float> %this_acc)
|
|
%next_acc = fadd <4 x float> %this_acc, %foo
|
|
%next_count = sub i32 %this_count, 1
|
|
%cmp = icmp eq i32 %next_count, 0
|
|
br i1 %cmp, label %loop, label %exit
|
|
|
|
exit:
|
|
ret <4 x float> %next_acc
|
|
}
|
|
|
|
; Test unary operator scalarization.
|
|
define void @f15(<4 x float> %init, ptr %base, i32 %count) {
|
|
; CHECK-LABEL: @f15(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[NEXTI]] = sub i32 [[I]], 1
|
|
; CHECK-NEXT: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[BASE:%.*]], i32 [[I]]
|
|
; CHECK-NEXT: [[VAL_I0:%.*]] = load float, ptr [[PTR]], align 16
|
|
; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr float, ptr [[PTR]], i32 1
|
|
; CHECK-NEXT: [[VAL_I1:%.*]] = load float, ptr [[PTR_I1]], align 4
|
|
; CHECK-NEXT: [[PTR_I2:%.*]] = getelementptr float, ptr [[PTR]], i32 2
|
|
; CHECK-NEXT: [[VAL_I2:%.*]] = load float, ptr [[PTR_I2]], align 8
|
|
; CHECK-NEXT: [[PTR_I3:%.*]] = getelementptr float, ptr [[PTR]], i32 3
|
|
; CHECK-NEXT: [[VAL_I3:%.*]] = load float, ptr [[PTR_I3]], align 4
|
|
; CHECK-NEXT: [[NEG_I0:%.*]] = fneg float [[VAL_I0]]
|
|
; CHECK-NEXT: [[NEG_I1:%.*]] = fneg float [[VAL_I1]]
|
|
; CHECK-NEXT: [[NEG_I2:%.*]] = fneg float [[VAL_I2]]
|
|
; CHECK-NEXT: [[NEG_I3:%.*]] = fneg float [[VAL_I3]]
|
|
; CHECK-NEXT: [[NEG_UPTO0:%.*]] = insertelement <4 x float> poison, float [[NEG_I0]], i64 0
|
|
; CHECK-NEXT: [[NEG_UPTO1:%.*]] = insertelement <4 x float> [[NEG_UPTO0]], float [[NEG_I1]], i64 1
|
|
; CHECK-NEXT: [[NEG_UPTO2:%.*]] = insertelement <4 x float> [[NEG_UPTO1]], float [[NEG_I2]], i64 2
|
|
; CHECK-NEXT: [[NEG:%.*]] = insertelement <4 x float> [[NEG_UPTO2]], float [[NEG_I3]], i64 3
|
|
; CHECK-NEXT: [[CALL:%.*]] = call <4 x float> @ext(<4 x float> [[NEG]])
|
|
; CHECK-NEXT: [[CALL_I0:%.*]] = extractelement <4 x float> [[CALL]], i64 0
|
|
; CHECK-NEXT: [[CMP_I0:%.*]] = fcmp ogt float [[CALL_I0]], 1.000000e+00
|
|
; CHECK-NEXT: [[CALL_I1:%.*]] = extractelement <4 x float> [[CALL]], i64 1
|
|
; CHECK-NEXT: [[CMP_I1:%.*]] = fcmp ogt float [[CALL_I1]], 2.000000e+00
|
|
; CHECK-NEXT: [[CALL_I2:%.*]] = extractelement <4 x float> [[CALL]], i64 2
|
|
; CHECK-NEXT: [[CMP_I2:%.*]] = fcmp ogt float [[CALL_I2]], 3.000000e+00
|
|
; CHECK-NEXT: [[CALL_I3:%.*]] = extractelement <4 x float> [[CALL]], i64 3
|
|
; CHECK-NEXT: [[CMP_I3:%.*]] = fcmp ogt float [[CALL_I3]], 4.000000e+00
|
|
; CHECK-NEXT: [[SEL_I0:%.*]] = select i1 [[CMP_I0]], float [[CALL_I0]], float 5.000000e+00
|
|
; CHECK-NEXT: [[SEL_I1:%.*]] = select i1 [[CMP_I1]], float [[CALL_I1]], float 6.000000e+00
|
|
; CHECK-NEXT: [[SEL_I2:%.*]] = select i1 [[CMP_I2]], float [[CALL_I2]], float 7.000000e+00
|
|
; CHECK-NEXT: [[SEL_I3:%.*]] = select i1 [[CMP_I3]], float [[CALL_I3]], float 8.000000e+00
|
|
; CHECK-NEXT: store float [[SEL_I0]], ptr [[PTR]], align 16
|
|
; CHECK-NEXT: store float [[SEL_I1]], ptr [[PTR_I1]], align 4
|
|
; CHECK-NEXT: store float [[SEL_I2]], ptr [[PTR_I2]], align 8
|
|
; CHECK-NEXT: store float [[SEL_I3]], ptr [[PTR_I3]], align 4
|
|
; CHECK-NEXT: [[TEST:%.*]] = icmp eq i32 [[NEXTI]], 0
|
|
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %loop
|
|
|
|
loop:
|
|
%i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
|
|
%acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
|
|
%nexti = sub i32 %i, 1
|
|
|
|
%ptr = getelementptr <4 x float>, ptr %base, i32 %i
|
|
%val = load <4 x float> , ptr %ptr
|
|
%neg = fneg <4 x float> %val
|
|
%call = call <4 x float> @ext(<4 x float> %neg)
|
|
%cmp = fcmp ogt <4 x float> %call,
|
|
<float 1.0, float 2.0, float 3.0, float 4.0>
|
|
%sel = select <4 x i1> %cmp, <4 x float> %call,
|
|
<4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
|
|
store <4 x float> %sel, ptr %ptr
|
|
|
|
%test = icmp eq i32 %nexti, 0
|
|
br i1 %test, label %loop, label %exit
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
; Check that IR flags are preserved.
|
|
define <2 x i32> @f16(<2 x i32> %i, <2 x i32> %j) {
|
|
; CHECK-LABEL: @f16(
|
|
; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0
|
|
; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0
|
|
; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]]
|
|
; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1
|
|
; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1
|
|
; CHECK-NEXT: [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]]
|
|
; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0
|
|
; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1
|
|
; CHECK-NEXT: ret <2 x i32> [[RES]]
|
|
;
|
|
%res = add nuw nsw <2 x i32> %i, %j
|
|
ret <2 x i32> %res
|
|
}
|
|
define <2 x i32> @f17(<2 x i32> %i, <2 x i32> %j) {
|
|
; CHECK-LABEL: @f17(
|
|
; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i64 0
|
|
; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i64 0
|
|
; CHECK-NEXT: [[RES_I0:%.*]] = sdiv exact i32 [[I_I0]], [[J_I0]]
|
|
; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i64 1
|
|
; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i64 1
|
|
; CHECK-NEXT: [[RES_I1:%.*]] = sdiv exact i32 [[I_I1]], [[J_I1]]
|
|
; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[RES_I0]], i64 0
|
|
; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i64 1
|
|
; CHECK-NEXT: ret <2 x i32> [[RES]]
|
|
;
|
|
%res = sdiv exact <2 x i32> %i, %j
|
|
ret <2 x i32> %res
|
|
}
|
|
define <2 x float> @f18(<2 x float> %x, <2 x float> %y) {
|
|
; CHECK-LABEL: @f18(
|
|
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
|
|
; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
|
|
; CHECK-NEXT: [[RES_I0:%.*]] = fadd fast float [[X_I0]], [[Y_I0]]
|
|
; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
|
|
; CHECK-NEXT: [[RES_I1:%.*]] = fadd fast float [[X_I1]], [[Y_I1]]
|
|
; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0
|
|
; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
|
|
; CHECK-NEXT: ret <2 x float> [[RES]]
|
|
;
|
|
%res = fadd fast <2 x float> %x, %y
|
|
ret <2 x float> %res
|
|
}
|
|
define <2 x float> @f19(<2 x float> %x) {
|
|
; CHECK-LABEL: @f19(
|
|
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
|
|
; CHECK-NEXT: [[RES_I0:%.*]] = fneg fast float [[X_I0]]
|
|
; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; CHECK-NEXT: [[RES_I1:%.*]] = fneg fast float [[X_I1]]
|
|
; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0
|
|
; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
|
|
; CHECK-NEXT: ret <2 x float> [[RES]]
|
|
;
|
|
%res = fneg fast <2 x float> %x
|
|
ret <2 x float> %res
|
|
}
|
|
define <2 x i1> @f20(<2 x float> %x, <2 x float> %y) {
|
|
; CHECK-LABEL: @f20(
|
|
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
|
|
; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
|
|
; CHECK-NEXT: [[RES_I0:%.*]] = fcmp fast ogt float [[X_I0]], [[Y_I0]]
|
|
; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
|
|
; CHECK-NEXT: [[RES_I1:%.*]] = fcmp fast ogt float [[X_I1]], [[Y_I1]]
|
|
; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i1> poison, i1 [[RES_I0]], i64 0
|
|
; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i1> [[RES_UPTO0]], i1 [[RES_I1]], i64 1
|
|
; CHECK-NEXT: ret <2 x i1> [[RES]]
|
|
;
|
|
%res = fcmp fast ogt <2 x float> %x, %y
|
|
ret <2 x i1> %res
|
|
}
|
|
declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
|
|
define <2 x float> @f21(<2 x float> %x) {
|
|
; CHECK-LABEL: @f21(
|
|
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
|
|
; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I0]])
|
|
; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.sqrt.f32(float [[X_I1]])
|
|
; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0
|
|
; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
|
|
; CHECK-NEXT: ret <2 x float> [[RES]]
|
|
;
|
|
%res = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %x)
|
|
ret <2 x float> %res
|
|
}
|
|
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
|
|
define <2 x float> @f22(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
|
|
; CHECK-LABEL: @f22(
|
|
; CHECK-NEXT: [[X_I0:%.*]] = extractelement <2 x float> [[X:%.*]], i64 0
|
|
; CHECK-NEXT: [[Y_I0:%.*]] = extractelement <2 x float> [[Y:%.*]], i64 0
|
|
; CHECK-NEXT: [[Z_I0:%.*]] = extractelement <2 x float> [[Z:%.*]], i64 0
|
|
; CHECK-NEXT: [[RES_I0:%.*]] = call fast float @llvm.fma.f32(float [[X_I0]], float [[Y_I0]], float [[Z_I0]])
|
|
; CHECK-NEXT: [[X_I1:%.*]] = extractelement <2 x float> [[X]], i64 1
|
|
; CHECK-NEXT: [[Y_I1:%.*]] = extractelement <2 x float> [[Y]], i64 1
|
|
; CHECK-NEXT: [[Z_I1:%.*]] = extractelement <2 x float> [[Z]], i64 1
|
|
; CHECK-NEXT: [[RES_I1:%.*]] = call fast float @llvm.fma.f32(float [[X_I1]], float [[Y_I1]], float [[Z_I1]])
|
|
; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x float> poison, float [[RES_I0]], i64 0
|
|
; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x float> [[RES_UPTO0]], float [[RES_I1]], i64 1
|
|
; CHECK-NEXT: ret <2 x float> [[RES]]
|
|
;
|
|
%res = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z)
|
|
ret <2 x float> %res
|
|
}
|
|
|
|
; See https://reviews.llvm.org/D83101#2133062
|
|
define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) {
|
|
; CHECK-LABEL: @f23_crash(
|
|
; CHECK-NEXT: [[SRCVEC_I0:%.*]] = extractelement <2 x i32> [[SRCVEC:%.*]], i64 0
|
|
; CHECK-NEXT: [[T1_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[SRCVEC_I0]], i64 0
|
|
; CHECK-NEXT: [[T1:%.*]] = insertelement <2 x i32> [[T1_UPTO0]], i32 [[V1:%.*]], i64 1
|
|
; CHECK-NEXT: ret <2 x i32> [[T1]]
|
|
;
|
|
%v0 = extractelement <2 x i32> %srcvec, i32 0
|
|
%t0 = insertelement <2 x i32> undef, i32 %v0, i32 0
|
|
%t1 = insertelement <2 x i32> %t0, i32 %v1, i32 1
|
|
ret <2 x i32> %t1
|
|
}
|
|
|
|
define <2 x i32> @f24(<2 x i32> %src) {
|
|
; CHECK-LABEL: @f24(
|
|
; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x i32> [[SRC:%.*]], i64 0
|
|
; CHECK-NEXT: [[FRZ_I0:%.*]] = freeze i32 [[SRC_I0]]
|
|
; CHECK-NEXT: [[SRC_I1:%.*]] = extractelement <2 x i32> [[SRC]], i64 1
|
|
; CHECK-NEXT: [[FRZ_I1:%.*]] = freeze i32 [[SRC_I1]]
|
|
; CHECK-NEXT: [[FRZ_UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[FRZ_I0]], i64 0
|
|
; CHECK-NEXT: [[FRZ:%.*]] = insertelement <2 x i32> [[FRZ_UPTO0]], i32 [[FRZ_I1]], i64 1
|
|
; CHECK-NEXT: ret <2 x i32> [[FRZ]]
|
|
;
|
|
%frz = freeze <2 x i32> %src
|
|
ret <2 x i32> %frz
|
|
}
|
|
|
|
define <2 x float> @f25(<2 x float> %src) {
|
|
; CHECK-LABEL: @f25(
|
|
; CHECK-NEXT: [[SRC_I0:%.*]] = extractelement <2 x float> [[SRC:%.*]], i64 0
|
|
; CHECK-NEXT: [[ADD_I0:%.*]] = fadd float [[SRC_I0]], [[SRC_I0]]
|
|
; CHECK-NEXT: [[SRC_I1:%.*]] = extractelement <2 x float> [[SRC]], i64 1
|
|
; CHECK-NEXT: [[ADD_I1:%.*]] = fadd float [[SRC_I1]], [[SRC_I1]]
|
|
; CHECK-NEXT: [[FRZ_I0:%.*]] = freeze float [[ADD_I0]]
|
|
; CHECK-NEXT: [[FRZ_I1:%.*]] = freeze float [[ADD_I1]]
|
|
; CHECK-NEXT: [[MUL_I0:%.*]] = fmul float [[FRZ_I0]], [[FRZ_I0]]
|
|
; CHECK-NEXT: [[MUL_I1:%.*]] = fmul float [[FRZ_I1]], [[FRZ_I1]]
|
|
; CHECK-NEXT: [[MUL_UPTO0:%.*]] = insertelement <2 x float> poison, float [[MUL_I0]], i64 0
|
|
; CHECK-NEXT: [[MUL:%.*]] = insertelement <2 x float> [[MUL_UPTO0]], float [[MUL_I1]], i64 1
|
|
; CHECK-NEXT: ret <2 x float> [[MUL]]
|
|
;
|
|
%add = fadd <2 x float> %src, %src
|
|
%frz = freeze <2 x float> %add
|
|
%mul = fmul <2 x float> %frz, %frz
|
|
ret <2 x float> %mul
|
|
}
|
|
|
|
!0 = !{ !"root" }
|
|
!1 = !{ !"set1", !0 }
|
|
!2 = !{ !"set2", !0 }
|
|
!3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} }
|
|
!4 = !{ float 4.0 }
|
|
!5 = !{ i64 0, i64 8, null }
|
|
!13 = distinct !{}
|