D104809 changed `buildTree_rec` to check for extract element instructions with scalable types. However, if the extract is extended or truncated, these changes do not apply and we assert later on in isShuffle(), which attempts to cast the type of the extract to FixedVectorType. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D110640
200 lines
9.4 KiB
LLVM
200 lines
9.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -slp-vectorizer -S | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
; This test check that we are not crashing or changing the code.
|
|
|
|
define void @test() {
|
|
; CHECK-LABEL: @test(
|
|
; CHECK-NEXT: [[LOAD0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
|
|
; CHECK-NEXT: [[LOAD1:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
|
|
; CHECK-NEXT: [[ADD:%.*]] = add <vscale x 16 x i8> [[LOAD1]], [[LOAD0]]
|
|
; CHECK-NEXT: tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[ADD]], <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%load0 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
|
|
%load1 = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8> *undef, i32 1, <vscale x 16 x i1> undef, <vscale x 16 x i8> undef)
|
|
%add = add <vscale x 16 x i8> %load1, %load0
|
|
tail call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %add, <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> undef)
|
|
ret void
|
|
}
|
|
|
|
define <vscale x 4 x i32> @scalable_phi(<vscale x 4 x i32> %a, i32 %b) {
|
|
; CHECK-LABEL: @scalable_phi(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[B:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[END:%.*]]
|
|
; CHECK: if.then:
|
|
; CHECK-NEXT: br label [[END]]
|
|
; CHECK: end:
|
|
; CHECK-NEXT: [[RETVAL:%.*]] = phi <vscale x 4 x i32> [ [[A:%.*]], [[ENTRY:%.*]] ], [ zeroinitializer, [[IF_THEN]] ]
|
|
; CHECK-NEXT: ret <vscale x 4 x i32> [[RETVAL]]
|
|
;
|
|
entry:
|
|
%cmp = icmp eq i32 %b, 0
|
|
br i1 %cmp, label %if.then, label %end
|
|
|
|
if.then:
|
|
br label %end
|
|
|
|
end:
|
|
%retval = phi <vscale x 4 x i32> [ %a, %entry ], [ zeroinitializer, %if.then ]
|
|
ret <vscale x 4 x i32> %retval
|
|
}
|
|
|
|
define void @scalable_phi1() {
|
|
; CHECK-LABEL: @scalable_phi1(
|
|
; CHECK-NEXT: middle.block:
|
|
; CHECK-NEXT: [[EXTRACT1:%.*]] = extractelement <vscale x 8 x i16> undef, i32 undef
|
|
; CHECK-NEXT: [[EXTRACT2:%.*]] = extractelement <vscale x 8 x i16> undef, i32 undef
|
|
; CHECK-NEXT: br label [[FOR_BODY_I:%.*]]
|
|
; CHECK: for.body.i:
|
|
; CHECK-NEXT: [[RECUR1:%.*]] = phi i16 [ [[EXTRACT1]], [[MIDDLE_BLOCK:%.*]] ], [ undef, [[FOR_BODY_I]] ]
|
|
; CHECK-NEXT: [[RECUR2:%.*]] = phi i16 [ [[EXTRACT2]], [[MIDDLE_BLOCK]] ], [ undef, [[FOR_BODY_I]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY_I]]
|
|
;
|
|
middle.block:
|
|
%extract1 = extractelement <vscale x 8 x i16> undef, i32 undef
|
|
%extract2 = extractelement <vscale x 8 x i16> undef, i32 undef
|
|
br label %for.body.i
|
|
|
|
for.body.i: ; preds = %for.body.i, %middle.block
|
|
%recur1 = phi i16 [ %extract1, %middle.block ], [ undef, %for.body.i ]
|
|
%recur2 = phi i16 [ %extract2, %middle.block ], [ undef, %for.body.i ]
|
|
br label %for.body.i
|
|
}
|
|
|
|
define void @scalable_phi2() {
|
|
; CHECK-LABEL: @scalable_phi2(
|
|
; CHECK-NEXT: middle.block:
|
|
; CHECK-NEXT: [[EXTRACT1:%.*]] = extractelement <vscale x 8 x i16> undef, i32 undef
|
|
; CHECK-NEXT: [[EXTRACT2:%.*]] = extractelement <vscale x 8 x i16> undef, i32 undef
|
|
; CHECK-NEXT: br label [[FOR_BODY_I:%.*]]
|
|
; CHECK: for.body.i:
|
|
; CHECK-NEXT: [[RECUR1:%.*]] = phi i16 [ undef, [[FOR_BODY_I]] ], [ [[EXTRACT1]], [[MIDDLE_BLOCK:%.*]] ]
|
|
; CHECK-NEXT: [[RECUR2:%.*]] = phi i16 [ undef, [[FOR_BODY_I]] ], [ [[EXTRACT2]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY_I]]
|
|
;
|
|
middle.block:
|
|
%extract1 = extractelement <vscale x 8 x i16> undef, i32 undef
|
|
%extract2 = extractelement <vscale x 8 x i16> undef, i32 undef
|
|
br label %for.body.i
|
|
|
|
for.body.i: ; preds = %for.body.i, %middle.block
|
|
%recur1 = phi i16 [ undef, %for.body.i ], [ %extract1, %middle.block ]
|
|
%recur2 = phi i16 [ undef, %for.body.i ], [ %extract2, %middle.block ]
|
|
br label %for.body.i
|
|
}
|
|
|
|
define <vscale x 4 x i32> @build_vec_v4i32_reuse_0(<vscale x 2 x i32> %v0) {
|
|
; CHECK-LABEL: @build_vec_v4i32_reuse_0(
|
|
; CHECK-NEXT: [[V0_0:%.*]] = extractelement <vscale x 2 x i32> [[V0:%.*]], i32 0
|
|
; CHECK-NEXT: [[V0_1:%.*]] = extractelement <vscale x 2 x i32> [[V0]], i32 1
|
|
; CHECK-NEXT: [[TMP0_0:%.*]] = add i32 [[V0_0]], [[V0_0]]
|
|
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i32 [[V0_0]], [[V0_1]]
|
|
; CHECK-NEXT: [[TMP2_0:%.*]] = add i32 [[TMP0_0]], [[TMP1_0]]
|
|
; CHECK-NEXT: [[TMP3_0:%.*]] = insertelement <vscale x 4 x i32> undef, i32 [[TMP2_0]], i32 0
|
|
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3_0]]
|
|
;
|
|
%v0.0 = extractelement <vscale x 2 x i32> %v0, i32 0
|
|
%v0.1 = extractelement <vscale x 2 x i32> %v0, i32 1
|
|
%tmp0.0 = add i32 %v0.0, %v0.0
|
|
%tmp1.0 = sub i32 %v0.0, %v0.1
|
|
%tmp2.0 = add i32 %tmp0.0, %tmp1.0
|
|
%tmp3.0 = insertelement <vscale x 4 x i32> undef, i32 %tmp2.0, i32 0
|
|
ret <vscale x 4 x i32> %tmp3.0
|
|
}
|
|
|
|
define <vscale x 4 x i8> @shuffle(<4 x i8> %x, <4 x i8> %y) {
|
|
; CHECK-LABEL: @shuffle(
|
|
; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 0
|
|
; CHECK-NEXT: [[X3:%.*]] = extractelement <4 x i8> [[X]], i32 3
|
|
; CHECK-NEXT: [[Y1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i32 1
|
|
; CHECK-NEXT: [[Y2:%.*]] = extractelement <4 x i8> [[Y]], i32 2
|
|
; CHECK-NEXT: [[X0X0:%.*]] = mul i8 [[X0]], [[X0]]
|
|
; CHECK-NEXT: [[X3X3:%.*]] = mul i8 [[X3]], [[X3]]
|
|
; CHECK-NEXT: [[Y1Y1:%.*]] = mul i8 [[Y1]], [[Y1]]
|
|
; CHECK-NEXT: [[Y2Y2:%.*]] = mul i8 [[Y2]], [[Y2]]
|
|
; CHECK-NEXT: [[INS1:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[X0X0]], i32 0
|
|
; CHECK-NEXT: [[INS2:%.*]] = insertelement <vscale x 4 x i8> [[INS1]], i8 [[X3X3]], i32 1
|
|
; CHECK-NEXT: [[INS3:%.*]] = insertelement <vscale x 4 x i8> [[INS2]], i8 [[Y1Y1]], i32 2
|
|
; CHECK-NEXT: [[INS4:%.*]] = insertelement <vscale x 4 x i8> [[INS3]], i8 [[Y2Y2]], i32 3
|
|
; CHECK-NEXT: ret <vscale x 4 x i8> [[INS4]]
|
|
;
|
|
%x0 = extractelement <4 x i8> %x, i32 0
|
|
%x3 = extractelement <4 x i8> %x, i32 3
|
|
%y1 = extractelement <4 x i8> %y, i32 1
|
|
%y2 = extractelement <4 x i8> %y, i32 2
|
|
%x0x0 = mul i8 %x0, %x0
|
|
%x3x3 = mul i8 %x3, %x3
|
|
%y1y1 = mul i8 %y1, %y1
|
|
%y2y2 = mul i8 %y2, %y2
|
|
%ins1 = insertelement <vscale x 4 x i8> poison, i8 %x0x0, i32 0
|
|
%ins2 = insertelement <vscale x 4 x i8> %ins1, i8 %x3x3, i32 1
|
|
%ins3 = insertelement <vscale x 4 x i8> %ins2, i8 %y1y1, i32 2
|
|
%ins4 = insertelement <vscale x 4 x i8> %ins3, i8 %y2y2, i32 3
|
|
ret <vscale x 4 x i8> %ins4
|
|
}
|
|
|
|
define void @sext_scalable_extractelement() {
|
|
; CHECK-LABEL: @sext_scalable_extractelement(
|
|
; CHECK-NEXT: [[X0:%.*]] = extractelement <vscale x 2 x i32> undef, i32 undef
|
|
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[X0]] to i64
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <vscale x 2 x i32> undef, i32 undef
|
|
; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP4]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%x0 = extractelement <vscale x 2 x i32> undef, i32 undef
|
|
%1 = sext i32 %x0 to i64
|
|
%2 = getelementptr inbounds i64, i64* undef, i64 %1
|
|
%3 = extractelement <vscale x 2 x i32> undef, i32 undef
|
|
%4 = sext i32 %3 to i64
|
|
%5 = getelementptr inbounds i64, i64* undef, i64 %4
|
|
ret void
|
|
}
|
|
|
|
define void @zext_scalable_extractelement() {
|
|
; CHECK-LABEL: @zext_scalable_extractelement(
|
|
; CHECK-NEXT: [[X0:%.*]] = extractelement <vscale x 2 x i32> undef, i32 undef
|
|
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[X0]] to i64
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <vscale x 2 x i32> undef, i32 undef
|
|
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64* undef, i64 [[TMP4]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%x0 = extractelement <vscale x 2 x i32> undef, i32 undef
|
|
%1 = zext i32 %x0 to i64
|
|
%2 = getelementptr inbounds i64, i64* undef, i64 %1
|
|
%3 = extractelement <vscale x 2 x i32> undef, i32 undef
|
|
%4 = zext i32 %3 to i64
|
|
%5 = getelementptr inbounds i64, i64* undef, i64 %4
|
|
ret void
|
|
}
|
|
|
|
define void @trunc_scalable_extractelement() {
|
|
; CHECK-LABEL: @trunc_scalable_extractelement(
|
|
; CHECK-NEXT: [[X0:%.*]] = extractelement <vscale x 2 x i64> undef, i32 undef
|
|
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X0]] to i32
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* undef, i32 [[TMP1]]
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <vscale x 2 x i64> undef, i32 undef
|
|
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
|
|
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* undef, i32 [[TMP4]]
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%x0 = extractelement <vscale x 2 x i64> undef, i32 undef
|
|
%1 = trunc i64 %x0 to i32
|
|
%2 = getelementptr inbounds i32, i32* undef, i32 %1
|
|
%3 = extractelement <vscale x 2 x i64> undef, i32 undef
|
|
%4 = trunc i64 %3 to i32
|
|
%5 = getelementptr inbounds i32, i32* undef, i32 %4
|
|
ret void
|
|
}
|
|
|
|
declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
|
|
declare void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>)
|