; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(foo|bar|baz|quux|goo)|extractelement" --version 2 ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=2 -S | FileCheck %s --check-prefixes=NEON_INTERLEAVE ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=SVE_OR_NEON ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=2 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_OR_NEON_INTERLEAVE ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=2 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF_INTERLEAVE target triple = "aarch64-unknown-linux-gnu" ; A call whose argument can remain a scalar because it's sequential and only the ; starting value is required. define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) { ; NEON-LABEL: define void @test_linear8 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { ; NEON: [[TMP2:%.*]] = extractelement <2 x ptr> [[TMP1:%.*]], i32 0 ; NEON: [[TMP3:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP2]]) ; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] ; ; NEON_INTERLEAVE-LABEL: define void @test_linear8 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP2:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP5:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP4]]) ; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP3:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP7:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP6]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear8 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; SVE_OR_NEON: [[TMP14:%.*]] = extractelement [[TMP13:%.*]], i32 0 ; SVE_OR_NEON: [[TMP15:%.*]] = call @vec_foo_linear8_nomask_sve(ptr [[TMP14]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; SVE_OR_NEON_INTERLEAVE: [[TMP33:%.*]] = extractelement [[TMP31:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: [[TMP34:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP33]], [[ACTIVE_LANE_MASK:%.*]]) ; SVE_OR_NEON_INTERLEAVE: [[TMP35:%.*]] = extractelement [[TMP32:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: [[TMP36:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP35]], [[ACTIVE_LANE_MASK2:%.*]]) ; SVE_OR_NEON_INTERLEAVE: [[TMP48:%.*]] = extractelement [[TMP46:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear8 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; SVE_TF: [[TMP20:%.*]] = extractelement [[TMP19:%.*]], i32 0 ; SVE_TF: [[TMP21:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP20]], [[ACTIVE_LANE_MASK:%.*]]) ; SVE_TF: [[TMP25:%.*]] = extractelement [[TMP24:%.*]], i32 0 ; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; SVE_TF_INTERLEAVE: [[TMP33:%.*]] = extractelement [[TMP31:%.*]], i32 0 ; SVE_TF_INTERLEAVE: [[TMP34:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP33]], [[ACTIVE_LANE_MASK:%.*]]) ; SVE_TF_INTERLEAVE: [[TMP35:%.*]] = extractelement [[TMP32:%.*]], i32 0 ; SVE_TF_INTERLEAVE: [[TMP36:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP35]], [[ACTIVE_LANE_MASK2:%.*]]) ; SVE_TF_INTERLEAVE: [[TMP48:%.*]] = extractelement [[TMP46:%.*]], i32 0 ; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %gepb = getelementptr i64, ptr %b, i64 %indvars.iv %data = call i64 @foo(ptr %gepb) #0 %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv store i64 %data, ptr %gepa %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: ret void } define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { ; NEON-LABEL: define void @test_vector_linear4 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { ; NEON: [[TMP4:%.*]] = extractelement <4 x ptr> [[TMP3:%.*]], i32 0 ; NEON: [[TMP5:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP4]]) ; NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] ; ; NEON_INTERLEAVE-LABEL: define void @test_vector_linear4 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP8:%.*]] = extractelement <4 x ptr> [[TMP6:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP9:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP8]]) ; NEON_INTERLEAVE: [[TMP10:%.*]] = extractelement <4 x ptr> [[TMP7:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP11:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD2:%.*]], ptr [[TMP10]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_vector_linear4 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON: [[TMP16:%.*]] = extractelement [[TMP15:%.*]], i32 0 ; SVE_OR_NEON: [[TMP17:%.*]] = call @vec_baz_vector_linear4_nomask_sve( [[WIDE_LOAD:%.*]], ptr [[TMP16]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_vector_linear4 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_vector_linear4 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_vector_linear4 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %gepc = getelementptr i32, ptr %c, i64 %indvars.iv %input = load i32, ptr %gepc, align 8 %gepb = getelementptr i32, ptr %b, i64 %indvars.iv %data = call i32 @baz(i32 %input, ptr %gepb) #1 %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv store i32 %data, ptr %gepa, align 8 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: ret void } define void @test_linear8_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) { ; NEON-LABEL: define void @test_linear8_bad_stride ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { ; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] ; ; NEON_INTERLEAVE-LABEL: define void @test_linear8_bad_stride ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP4:%.*]] = call i64 @foo(ptr [[TMP2:%.*]]) #[[ATTR2:[0-9]+]] ; NEON_INTERLEAVE: [[TMP5:%.*]] = call i64 @foo(ptr [[TMP3:%.*]]) #[[ATTR2]] ; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]] ; ; SVE_OR_NEON-LABEL: define void @test_linear8_bad_stride ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8_bad_stride ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear8_bad_stride ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8_bad_stride ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]] ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %gepb = getelementptr i64, ptr %b, i64 %indvars.iv %data = call i64 @foo(ptr %gepb) #2 %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv store i64 %data, ptr %gepa %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: ret void } define void @test_linear16_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) { ; NEON-LABEL: define void @test_linear16_wide_stride ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { ; NEON: [[TMP3:%.*]] = extractelement <2 x ptr> [[TMP2:%.*]], i32 0 ; NEON: [[TMP4:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP3]]) ; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]] ; ; NEON_INTERLEAVE-LABEL: define void @test_linear16_wide_stride ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP4:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP7:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP6]]) ; NEON_INTERLEAVE: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP5:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP9:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP8]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]] ; ; SVE_OR_NEON-LABEL: define void @test_linear16_wide_stride ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON: [[TMP15:%.*]] = extractelement [[TMP14:%.*]], i32 0 ; SVE_OR_NEON: [[TMP16:%.*]] = call @vec_foo_linear16_nomask_sve(ptr [[TMP15]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear16_wide_stride ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6]] ; ; SVE_TF-LABEL: define void @test_linear16_wide_stride ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear16_wide_stride ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR6]] ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %double = mul i64 %indvars.iv, 2 %gepb = getelementptr i64, ptr %b, i64 %double %data = call i64 @foo(ptr %gepb) #2 %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv store i64 %data, ptr %gepa %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: ret void } define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { ; NEON-LABEL: define void @test_linear4_linear8 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { ; NEON: [[TMP3:%.*]] = extractelement <4 x ptr> [[TMP1:%.*]], i32 0 ; NEON: [[TMP4:%.*]] = extractelement <4 x ptr> [[TMP2:%.*]], i32 0 ; NEON: [[TMP5:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP3]], ptr [[TMP4]]) ; NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; ; NEON_INTERLEAVE-LABEL: define void @test_linear4_linear8 ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP2:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP7:%.*]] = extractelement <4 x ptr> [[TMP4:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP8:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP6]], ptr [[TMP7]]) ; NEON_INTERLEAVE: [[TMP9:%.*]] = extractelement <4 x ptr> [[TMP3:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP10:%.*]] = extractelement <4 x ptr> [[TMP5:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP11:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP9]], ptr [[TMP10]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear4_linear8 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON: [[TMP15:%.*]] = extractelement [[TMP13:%.*]], i32 0 ; SVE_OR_NEON: [[TMP16:%.*]] = extractelement [[TMP14:%.*]], i32 0 ; SVE_OR_NEON: [[TMP17:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP15]], ptr [[TMP16]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; SVE_OR_NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear4_linear8 ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON_INTERLEAVE: [[TMP35:%.*]] = extractelement [[TMP31:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: [[TMP36:%.*]] = extractelement [[TMP33:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: [[TMP37:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], [[ACTIVE_LANE_MASK:%.*]]) ; SVE_OR_NEON_INTERLEAVE: [[TMP38:%.*]] = extractelement [[TMP32:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: [[TMP39:%.*]] = extractelement [[TMP34:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: [[TMP40:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP38]], ptr [[TMP39]], [[ACTIVE_LANE_MASK2:%.*]]) ; SVE_OR_NEON_INTERLEAVE: [[TMP52:%.*]] = extractelement [[TMP50:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear4_linear8 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF: [[TMP21:%.*]] = extractelement [[TMP19:%.*]], i32 0 ; SVE_TF: [[TMP22:%.*]] = extractelement [[TMP20:%.*]], i32 0 ; SVE_TF: [[TMP23:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP21]], ptr [[TMP22]], [[ACTIVE_LANE_MASK:%.*]]) ; SVE_TF: [[TMP27:%.*]] = extractelement [[TMP26:%.*]], i32 0 ; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear4_linear8 ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF_INTERLEAVE: [[TMP35:%.*]] = extractelement [[TMP31:%.*]], i32 0 ; SVE_TF_INTERLEAVE: [[TMP36:%.*]] = extractelement [[TMP33:%.*]], i32 0 ; SVE_TF_INTERLEAVE: [[TMP37:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], [[ACTIVE_LANE_MASK:%.*]]) ; SVE_TF_INTERLEAVE: [[TMP38:%.*]] = extractelement [[TMP32:%.*]], i32 0 ; SVE_TF_INTERLEAVE: [[TMP39:%.*]] = extractelement [[TMP34:%.*]], i32 0 ; SVE_TF_INTERLEAVE: [[TMP40:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP38]], ptr [[TMP39]], [[ACTIVE_LANE_MASK2:%.*]]) ; SVE_TF_INTERLEAVE: [[TMP52:%.*]] = extractelement [[TMP50:%.*]], i32 0 ; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]] ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %gepc = getelementptr i32, ptr %c, i64 %indvars.iv %gepb = getelementptr i64, ptr %b, i64 %indvars.iv %data = call i32 @quux(ptr %gepc, ptr %gepb) #3 %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv store i32 %data, ptr %gepa, align 8 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: ret void } define void @test_linear3_non_ptr(ptr noalias %a, i64 %n) { ; NEON-LABEL: define void @test_linear3_non_ptr ; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { ; NEON: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1:%.*]], i32 0 ; NEON: [[TMP3:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP2]]) ; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]] ; ; NEON_INTERLEAVE-LABEL: define void @test_linear3_non_ptr ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP5:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP4]]) ; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP7:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP6]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear3_non_ptr ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON: [[TMP14:%.*]] = extractelement [[TMP13:%.*]], i32 0 ; SVE_OR_NEON: [[TMP15:%.*]] = call @vec_bar_linear3_nomask_sve(i32 [[TMP14]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR6:[0-9]+]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear3_non_ptr ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR8:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear3_non_ptr ; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR8:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear3_non_ptr ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR8:[0-9]+]] ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %little.iv = trunc i64 %indvars.iv to i32 %trebled = mul i32 %little.iv, 3 %data = call i32 @bar(i32 %trebled) #4 %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv store i32 %data, ptr %gepa %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: ret void } define void @test_linearn5_non_ptr_neg_stride(ptr noalias %a, i64 %n) { ; NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride ; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { ; NEON: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1:%.*]], i32 0 ; NEON: [[TMP3:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP2]]) ; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]] ; ; NEON_INTERLEAVE-LABEL: define void @test_linearn5_non_ptr_neg_stride ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP5:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP4]]) ; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP7:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP6]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON: [[TMP14:%.*]] = extractelement [[TMP13:%.*]], i32 0 ; SVE_OR_NEON: [[TMP15:%.*]] = call @vec_bar_linearn5_nomask_sve(i32 [[TMP14]]) ; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR7:[0-9]+]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linearn5_non_ptr_neg_stride ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR9:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linearn5_non_ptr_neg_stride ; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR9:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linearn5_non_ptr_neg_stride ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR9:[0-9]+]] ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %little.iv = trunc i64 %indvars.iv to i32 %negstride = mul i32 %little.iv, -5 %data = call i32 @bar(i32 %negstride) #5 %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv store i32 %data, ptr %gepa %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: ret void } define void @test_linear8_return_void(ptr noalias %in, ptr noalias %out, i64 %n) { ; NEON-LABEL: define void @test_linear8_return_void ; NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) { ; NEON: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3:%.*]], i32 0 ; NEON: call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD:%.*]], ptr [[TMP4]]) ; NEON: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR6:[0-9]+]] ; ; NEON_INTERLEAVE-LABEL: define void @test_linear8_return_void ; NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP6:%.*]], i32 0 ; NEON_INTERLEAVE: call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD:%.*]], ptr [[TMP8]]) ; NEON_INTERLEAVE: [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP7:%.*]], i32 0 ; NEON_INTERLEAVE: call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD2:%.*]], ptr [[TMP9]]) ; NEON_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR6:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear8_return_void ; SVE_OR_NEON-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON: [[TMP16:%.*]] = extractelement [[TMP15:%.*]], i32 0 ; SVE_OR_NEON: call void @vec_goo_linear8_nomask_sve( [[WIDE_LOAD:%.*]], ptr [[TMP16]]) ; SVE_OR_NEON: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR8:[0-9]+]] ; ; SVE_OR_NEON_INTERLEAVE-LABEL: define void @test_linear8_return_void ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON_INTERLEAVE: [[TMP39:%.*]] = extractelement [[TMP37:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP39]], [[ACTIVE_LANE_MASK:%.*]]) ; SVE_OR_NEON_INTERLEAVE: [[TMP40:%.*]] = extractelement [[TMP38:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP40]], [[ACTIVE_LANE_MASK2:%.*]]) ; SVE_OR_NEON_INTERLEAVE: [[TMP46:%.*]] = extractelement [[TMP44:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear8_return_void ; SVE_TF-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF: [[TMP22:%.*]] = extractelement [[TMP21:%.*]], i32 0 ; SVE_TF: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP22]], [[ACTIVE_LANE_MASK:%.*]]) ; SVE_TF: [[TMP24:%.*]] = extractelement [[TMP23:%.*]], i32 0 ; SVE_TF: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]] ; ; SVE_TF_INTERLEAVE-LABEL: define void @test_linear8_return_void ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF_INTERLEAVE: [[TMP39:%.*]] = extractelement [[TMP37:%.*]], i32 0 ; SVE_TF_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP39]], [[ACTIVE_LANE_MASK:%.*]]) ; SVE_TF_INTERLEAVE: [[TMP40:%.*]] = extractelement [[TMP38:%.*]], i32 0 ; SVE_TF_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP40]], [[ACTIVE_LANE_MASK2:%.*]]) ; SVE_TF_INTERLEAVE: [[TMP46:%.*]] = extractelement [[TMP44:%.*]], i32 0 ; SVE_TF_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]] ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %gep.in = getelementptr i64, ptr %in, i64 %indvars.iv %num = load i64, ptr %gep.in, align 8 %gep.out = getelementptr i64, ptr %out, i64 %indvars.iv call void @goo(i64 %num, ptr %gep.out) #6 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: ret void } declare i64 @foo(ptr) declare i32 @baz(i32, ptr) declare i32 @quux(ptr, ptr) declare i32 @bar(i32) declare void @goo(i64, ptr) ; neon vector variants of foo declare <2 x i64> @vec_foo_linear8_nomask_neon(ptr) declare <2 x i64> @vec_foo_linear16_nomask_neon(ptr) declare <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32>, ptr) declare <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr, ptr) declare <4 x i32> @vec_bar_linear3_nomask_neon(i32) declare <4 x i32> @vec_bar_linearn5_nomask_neon(i32) declare void @vec_goo_linear8_nomask_neon(<2 x i64>, ptr) ; scalable vector variants of foo declare @vec_foo_linear8_mask_sve(ptr, ) declare @vec_foo_linear8_nomask_sve(ptr) declare @vec_foo_linear16_nomask_sve(ptr) declare @vec_baz_vector_linear4_nomask_sve(, ptr) declare @vec_quux_linear4_linear8_mask_sve(ptr, ptr, ) declare @vec_bar_linear3_nomask_sve(i32) declare @vec_bar_linearn5_nomask_sve(i32) declare void @vec_goo_linear8_nomask_sve(, ptr) declare void @vec_goo_linear8_mask_sve(, ptr, ) attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsNxl8_foo(vec_foo_linear8_nomask_sve),_ZGVsMxl8_foo(vec_foo_linear8_mask_sve),_ZGVnN2l8_foo(vec_foo_linear8_nomask_neon)" } attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl4_baz(vec_baz_vector_linear4_nomask_sve),_ZGVnN4vl4_baz(vec_baz_vector_linear4_nomask_neon)" } attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsNxl16_foo(vec_foo_linear16_nomask_sve),_ZGVnN2l16_foo(vec_foo_linear16_nomask_neon)" } attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxl4l8_quux(vec_quux_linear4_linear8_mask_sve),_ZGVnN4l4l8_quux(vec_quux_linear4_linear8_nomask_neon)" } attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVsNxl3_bar(vec_bar_linear3_nomask_sve),_ZGVnN4l3_bar(vec_bar_linear3_nomask_neon)" } attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVsNxln5_bar(vec_bar_linearn5_nomask_sve),_ZGVnN4ln5_bar(vec_bar_linearn5_nomask_neon)" } attributes #6 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl8_goo(vec_goo_linear8_nomask_sve),_ZGVsMxvl8_goo(vec_goo_linear8_mask_sve),_ZGVsN2vl8_goo(vec_goo_linear8_nomask_neon)" }