LoopAccessAnalysis currently does not check/track aliasing from the
output pointers, but assumes vectorizing library calls with a mapping is
safe.
This can result in incorrect codegen if something like the following is
vectorized:
```
for(int i=0; i<N; i++) {
// No aliasing between input and output pointers detected.
sincos(cos_out[0], sin_out+i, cos_out+i);
}
```
Where for VF >= 2 `cos_out[1]` to `cos_out[VF-1]` is the cosine of the
original value of `cos_out[0]` not the updated value.
407 lines
21 KiB
LLVM
407 lines
21 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(foo|bar|baz|quux|goo)|extractelement" --version 2
|
|
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON
|
|
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=2 -S | FileCheck %s --check-prefixes=NEON_INTERLEAVE
|
|
; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=SVE_OR_NEON
|
|
; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=2 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_OR_NEON_INTERLEAVE
|
|
; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF
|
|
; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=2 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF_INTERLEAVE
|
|
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
; A call whose argument can remain a scalar because it's sequential and only the
|
|
; starting value is required.
|
|
define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) {
|
|
; NEON-LABEL: define void @test_linear8
|
|
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
|
|
; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]]
|
|
;
|
|
; NEON_INTERLEAVE-LABEL: define void @test_linear8
|
|
; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
|
|
; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON-LABEL: define void @test_linear8
|
|
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8
|
|
; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
|
|
;
|
|
; SVE_TF-LABEL: define void @test_linear8
|
|
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
|
|
;
|
|
; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8
|
|
; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%gepb = getelementptr i64, ptr %b, i64 %indvars.iv
|
|
%data = call i64 @foo(ptr %gepb) #0
|
|
%gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
|
|
store i64 %data, ptr %gepa
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %n
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
}
|
|
|
|
define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) {
|
|
; NEON-LABEL: define void @test_vector_linear4
|
|
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
|
|
; NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
|
|
;
|
|
; NEON_INTERLEAVE-LABEL: define void @test_vector_linear4
|
|
; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
|
|
; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON-LABEL: define void @test_vector_linear4
|
|
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_vector_linear4
|
|
; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
|
|
;
|
|
; SVE_TF-LABEL: define void @test_vector_linear4
|
|
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
|
|
;
|
|
; SVE_TF_INTERLEAVE-LABEL: define void @test_vector_linear4
|
|
; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%gepc = getelementptr i32, ptr %c, i64 %indvars.iv
|
|
%input = load i32, ptr %gepc, align 8
|
|
%gepb = getelementptr i32, ptr %b, i64 %indvars.iv
|
|
%data = call i32 @baz(i32 %input, ptr %gepb) #1
|
|
%gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
|
|
store i32 %data, ptr %gepa, align 8
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %n
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
}
|
|
|
|
define void @test_linear8_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) {
|
|
; NEON-LABEL: define void @test_linear8_bad_stride
|
|
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
|
|
; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
|
|
;
|
|
; NEON_INTERLEAVE-LABEL: define void @test_linear8_bad_stride
|
|
; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
|
|
; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON-LABEL: define void @test_linear8_bad_stride
|
|
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8_bad_stride
|
|
; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
|
|
;
|
|
; SVE_TF-LABEL: define void @test_linear8_bad_stride
|
|
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
|
|
;
|
|
; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8_bad_stride
|
|
; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%gepb = getelementptr i64, ptr %b, i64 %indvars.iv
|
|
%data = call i64 @foo(ptr %gepb) #2
|
|
%gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
|
|
store i64 %data, ptr %gepa
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %n
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
}
|
|
|
|
define void @test_linear16_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) {
|
|
; NEON-LABEL: define void @test_linear16_wide_stride
|
|
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
|
|
; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]]
|
|
;
|
|
; NEON_INTERLEAVE-LABEL: define void @test_linear16_wide_stride
|
|
; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) {
|
|
; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]]
|
|
;
|
|
; SVE_OR_NEON-LABEL: define void @test_linear16_wide_stride
|
|
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4]]
|
|
;
|
|
; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear16_wide_stride
|
|
; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]]
|
|
;
|
|
; SVE_TF-LABEL: define void @test_linear16_wide_stride
|
|
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]]
|
|
;
|
|
; SVE_TF_INTERLEAVE-LABEL: define void @test_linear16_wide_stride
|
|
; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%double = mul i64 %indvars.iv, 2
|
|
%gepb = getelementptr i64, ptr %b, i64 %double
|
|
%data = call i64 @foo(ptr %gepb) #2
|
|
%gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
|
|
store i64 %data, ptr %gepa
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %n
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
}
|
|
|
|
define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) {
|
|
; NEON-LABEL: define void @test_linear4_linear8
|
|
; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
|
|
; NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
|
|
;
|
|
; NEON_INTERLEAVE-LABEL: define void @test_linear4_linear8
|
|
; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) {
|
|
; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON-LABEL: define void @test_linear4_linear8
|
|
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear4_linear8
|
|
; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
|
|
;
|
|
; SVE_TF-LABEL: define void @test_linear4_linear8
|
|
; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
|
|
;
|
|
; SVE_TF_INTERLEAVE-LABEL: define void @test_linear4_linear8
|
|
; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%gepc = getelementptr i32, ptr %c, i64 %indvars.iv
|
|
%gepb = getelementptr i64, ptr %b, i64 %indvars.iv
|
|
%data = call i32 @quux(ptr %gepc, ptr %gepb) #3
|
|
%gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
|
|
store i32 %data, ptr %gepa, align 8
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %n
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
}
|
|
|
|
define void @test_linear3_non_ptr(ptr noalias %a, i64 %n) {
|
|
; NEON-LABEL: define void @test_linear3_non_ptr
|
|
; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) {
|
|
; NEON: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1:%.*]], i32 0
|
|
; NEON: [[TMP3:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP2]])
|
|
; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]]
|
|
;
|
|
; NEON_INTERLEAVE-LABEL: define void @test_linear3_non_ptr
|
|
; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) {
|
|
; NEON_INTERLEAVE: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2:%.*]], i32 0
|
|
; NEON_INTERLEAVE: [[TMP5:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP4]])
|
|
; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3:%.*]], i32 0
|
|
; NEON_INTERLEAVE: [[TMP7:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP6]])
|
|
; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON-LABEL: define void @test_linear3_non_ptr
|
|
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON: [[TMP13:%.*]] = extractelement <vscale x 4 x i32> [[TMP12:%.*]], i32 0
|
|
; SVE_OR_NEON: [[TMP14:%.*]] = call <vscale x 4 x i32> @vec_bar_linear3_nomask_sve(i32 [[TMP13]])
|
|
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR6:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear3_non_ptr
|
|
; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]]
|
|
;
|
|
; SVE_TF-LABEL: define void @test_linear3_non_ptr
|
|
; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]]
|
|
;
|
|
; SVE_TF_INTERLEAVE-LABEL: define void @test_linear3_non_ptr
|
|
; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%little.iv = trunc i64 %indvars.iv to i32
|
|
%trebled = mul i32 %little.iv, 3
|
|
%data = call i32 @bar(i32 %trebled) #4
|
|
%gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
|
|
store i32 %data, ptr %gepa
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %n
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
}
|
|
|
|
define void @test_linearn5_non_ptr_neg_stride(ptr noalias %a, i64 %n) {
|
|
; NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride
|
|
; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) {
|
|
; NEON: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1:%.*]], i32 0
|
|
; NEON: [[TMP3:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP2]])
|
|
; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]]
|
|
;
|
|
; NEON_INTERLEAVE-LABEL: define void @test_linearn5_non_ptr_neg_stride
|
|
; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) {
|
|
; NEON_INTERLEAVE: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2:%.*]], i32 0
|
|
; NEON_INTERLEAVE: [[TMP5:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP4]])
|
|
; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3:%.*]], i32 0
|
|
; NEON_INTERLEAVE: [[TMP7:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP6]])
|
|
; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride
|
|
; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON: [[TMP13:%.*]] = extractelement <vscale x 4 x i32> [[TMP12:%.*]], i32 0
|
|
; SVE_OR_NEON: [[TMP14:%.*]] = call <vscale x 4 x i32> @vec_bar_linearn5_nomask_sve(i32 [[TMP13]])
|
|
; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR7:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linearn5_non_ptr_neg_stride
|
|
; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]]
|
|
;
|
|
; SVE_TF-LABEL: define void @test_linearn5_non_ptr_neg_stride
|
|
; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]]
|
|
;
|
|
; SVE_TF_INTERLEAVE-LABEL: define void @test_linearn5_non_ptr_neg_stride
|
|
; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%little.iv = trunc i64 %indvars.iv to i32
|
|
%negstride = mul i32 %little.iv, -5
|
|
%data = call i32 @bar(i32 %negstride) #5
|
|
%gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
|
|
store i32 %data, ptr %gepa
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %n
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
}
|
|
|
|
define void @test_linear8_return_void(ptr noalias %in, ptr noalias %out, i64 %n) {
|
|
; NEON-LABEL: define void @test_linear8_return_void
|
|
; NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) {
|
|
; NEON: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR6:[0-9]+]]
|
|
;
|
|
; NEON_INTERLEAVE-LABEL: define void @test_linear8_return_void
|
|
; NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) {
|
|
; NEON_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR6:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON-LABEL: define void @test_linear8_return_void
|
|
; SVE_OR_NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR8:[0-9]+]]
|
|
;
|
|
; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8_return_void
|
|
; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_OR_NEON_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR7:[0-9]+]]
|
|
;
|
|
; SVE_TF-LABEL: define void @test_linear8_return_void
|
|
; SVE_TF-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR7:[0-9]+]]
|
|
;
|
|
; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8_return_void
|
|
; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
|
|
; SVE_TF_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR7:[0-9]+]]
|
|
;
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
|
%gep.in = getelementptr i64, ptr %in, i64 %indvars.iv
|
|
%num = load i64, ptr %gep.in, align 8
|
|
%gep.out = getelementptr i64, ptr %out, i64 %indvars.iv
|
|
call void @goo(i64 %num, ptr %gep.out) #6
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%exitcond = icmp eq i64 %indvars.iv.next, %n
|
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
}
|
|
|
|
; Note: Vectorizing pointer arguments is currently disabled as LAA cannot detect
|
|
; aliasing from output/input pointers.
|
|
|
|
declare i64 @foo(ptr)
|
|
declare i32 @baz(i32, ptr)
|
|
declare i32 @quux(ptr, ptr)
|
|
declare i32 @bar(i32)
|
|
declare void @goo(i64, ptr)
|
|
|
|
; neon vector variants of foo
|
|
declare <2 x i64> @vec_foo_linear8_nomask_neon(ptr)
|
|
declare <2 x i64> @vec_foo_linear16_nomask_neon(ptr)
|
|
declare <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32>, ptr)
|
|
declare <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr, ptr)
|
|
declare <4 x i32> @vec_bar_linear3_nomask_neon(i32)
|
|
declare <4 x i32> @vec_bar_linearn5_nomask_neon(i32)
|
|
declare void @vec_goo_linear8_nomask_neon(<2 x i64>, ptr)
|
|
|
|
; scalable vector variants of foo
|
|
declare <vscale x 2 x i64> @vec_foo_linear8_mask_sve(ptr, <vscale x 2 x i1>)
|
|
declare <vscale x 2 x i64> @vec_foo_linear8_nomask_sve(ptr)
|
|
declare <vscale x 2 x i64> @vec_foo_linear16_nomask_sve(ptr)
|
|
declare <vscale x 4 x i32> @vec_baz_vector_linear4_nomask_sve(<vscale x 4 x i32>, ptr)
|
|
declare <vscale x 4 x i32> @vec_quux_linear4_linear8_mask_sve(ptr, ptr, <vscale x 4 x i1>)
|
|
declare <vscale x 4 x i32> @vec_bar_linear3_nomask_sve(i32)
|
|
declare <vscale x 4 x i32> @vec_bar_linearn5_nomask_sve(i32)
|
|
declare void @vec_goo_linear8_nomask_sve(<vscale x 2 x i64>, ptr)
|
|
declare void @vec_goo_linear8_mask_sve(<vscale x 2 x i64>, ptr, <vscale x 2 x i1>)
|
|
|
|
attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsNxl8_foo(vec_foo_linear8_nomask_sve),_ZGVsMxl8_foo(vec_foo_linear8_mask_sve),_ZGVnN2l8_foo(vec_foo_linear8_nomask_neon)" }
|
|
attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl4_baz(vec_baz_vector_linear4_nomask_sve),_ZGVnN4vl4_baz(vec_baz_vector_linear4_nomask_neon)" }
|
|
attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsNxl16_foo(vec_foo_linear16_nomask_sve),_ZGVnN2l16_foo(vec_foo_linear16_nomask_neon)" }
|
|
attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxl4l8_quux(vec_quux_linear4_linear8_mask_sve),_ZGVnN4l4l8_quux(vec_quux_linear4_linear8_nomask_neon)" }
|
|
attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVsNxl3_bar(vec_bar_linear3_nomask_sve),_ZGVnN4l3_bar(vec_bar_linear3_nomask_neon)" }
|
|
attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVsNxln5_bar(vec_bar_linearn5_nomask_sve),_ZGVnN4ln5_bar(vec_bar_linearn5_nomask_neon)" }
|
|
attributes #6 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl8_goo(vec_goo_linear8_nomask_sve),_ZGVsMxvl8_goo(vec_goo_linear8_mask_sve),_ZGVsN2vl8_goo(vec_goo_linear8_nomask_neon)" }
|