The idea behind this canonicalization is that it allows us to handle less patterns, because we know that some will be canonicalized away. This is indeed very useful to e.g. know that constants are always on the right. However, this is only useful if the canonicalization is actually reliable. This is the case for constants, but not for arguments: Moving these to the right makes it look like the "more complex" expression is guaranteed to be on the left, but this is not actually the case in practice. It fails as soon as you replace the argument with another instruction. The end result is that it looks like things correctly work in tests, while they actually don't. We use the "thwart complexity-based canonicalization" trick to handle this in tests, but it's often a challenge for new contributors to get this right, and based on the regressions this PR originally exposed, we clearly don't get this right in many cases. For this reason, I think that it's better to remove this complexity canonicalization. It will make it much easier to write tests for commuted cases and make sure that they are handled.
540 lines
34 KiB
LLVM
540 lines
34 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes='default<O3>' -enable-matrix -S %s | FileCheck %s
|
|
|
|
target triple = "arm64-apple-ios"
|
|
|
|
define void @matrix_extract_insert_scalar(i32 %i, i32 %k, i32 %j, ptr nonnull align 8 dereferenceable(1800) %A, ptr nonnull align 8 dereferenceable(1800) %B) #0 {
|
|
; CHECK-LABEL: @matrix_extract_insert_scalar(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[K:%.*]] to i64
|
|
; CHECK-NEXT: [[CONV1:%.*]] = zext i32 [[J:%.*]] to i64
|
|
; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw i64 [[CONV1]], 15
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], [[CONV]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 225
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]])
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <225 x double>, ptr [[A:%.*]], i64 0, i64 [[TMP1]]
|
|
; CHECK-NEXT: [[MATRIXEXT:%.*]] = load double, ptr [[TMP3]], align 8
|
|
; CHECK-NEXT: [[CONV2:%.*]] = zext i32 [[I:%.*]] to i64
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP0]], [[CONV2]]
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 225
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP5]])
|
|
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds <225 x double>, ptr [[B:%.*]], i64 0, i64 [[TMP4]]
|
|
; CHECK-NEXT: [[MATRIXEXT4:%.*]] = load double, ptr [[TMP6]], align 8
|
|
; CHECK-NEXT: [[MUL:%.*]] = fmul double [[MATRIXEXT]], [[MATRIXEXT4]]
|
|
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP1]]
|
|
; CHECK-NEXT: [[MATRIXEXT7:%.*]] = load double, ptr [[TMP7]], align 8
|
|
; CHECK-NEXT: [[SUB:%.*]] = fsub double [[MATRIXEXT7]], [[MUL]]
|
|
; CHECK-NEXT: store double [[SUB]], ptr [[TMP7]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%i.addr = alloca i32, align 4
|
|
%k.addr = alloca i32, align 4
|
|
%j.addr = alloca i32, align 4
|
|
%A.addr = alloca ptr, align 8
|
|
%B.addr = alloca ptr, align 8
|
|
store i32 %i, ptr %i.addr, align 4
|
|
store i32 %k, ptr %k.addr, align 4
|
|
store i32 %j, ptr %j.addr, align 4
|
|
store ptr %A, ptr %A.addr, align 8
|
|
store ptr %B, ptr %B.addr, align 8
|
|
%0 = load i32, ptr %k.addr, align 4
|
|
%conv = zext i32 %0 to i64
|
|
%1 = load i32, ptr %j.addr, align 4
|
|
%conv1 = zext i32 %1 to i64
|
|
%2 = mul i64 %conv1, 15
|
|
%3 = add i64 %2, %conv
|
|
%4 = icmp ult i64 %3, 225
|
|
call void @llvm.assume(i1 %4)
|
|
%5 = load ptr, ptr %A.addr, align 8
|
|
%6 = load <225 x double>, ptr %5, align 8
|
|
%matrixext = extractelement <225 x double> %6, i64 %3
|
|
%7 = load i32, ptr %i.addr, align 4
|
|
%conv2 = zext i32 %7 to i64
|
|
%8 = load i32, ptr %j.addr, align 4
|
|
%conv3 = zext i32 %8 to i64
|
|
%9 = mul i64 %conv3, 15
|
|
%10 = add i64 %9, %conv2
|
|
%11 = icmp ult i64 %10, 225
|
|
call void @llvm.assume(i1 %11)
|
|
%12 = load ptr, ptr %B.addr, align 8
|
|
%13 = load <225 x double>, ptr %12, align 8
|
|
%matrixext4 = extractelement <225 x double> %13, i64 %10
|
|
%mul = fmul double %matrixext, %matrixext4
|
|
%14 = load ptr, ptr %B.addr, align 8
|
|
%15 = load i32, ptr %k.addr, align 4
|
|
%conv5 = zext i32 %15 to i64
|
|
%16 = load i32, ptr %j.addr, align 4
|
|
%conv6 = zext i32 %16 to i64
|
|
%17 = mul i64 %conv6, 15
|
|
%18 = add i64 %17, %conv5
|
|
%19 = icmp ult i64 %18, 225
|
|
call void @llvm.assume(i1 %19)
|
|
%20 = load <225 x double>, ptr %14, align 8
|
|
%matrixext7 = extractelement <225 x double> %20, i64 %18
|
|
%sub = fsub double %matrixext7, %mul
|
|
%21 = icmp ult i64 %18, 225
|
|
call void @llvm.assume(i1 %21)
|
|
%22 = load <225 x double>, ptr %14, align 8
|
|
%matins = insertelement <225 x double> %22, double %sub, i64 %18
|
|
store <225 x double> %matins, ptr %14, align 8
|
|
ret void
|
|
}
|
|
define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferenceable(1800) %A, ptr nonnull align 8 dereferenceable(1800) %B) {
|
|
; CHECK-LABEL: @matrix_extract_insert_loop(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP210_NOT:%.*]] = icmp eq i32 [[I:%.*]], 0
|
|
; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[I]] to i64
|
|
; CHECK-NEXT: br i1 [[CMP210_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
|
|
; CHECK: for.cond1.preheader.us.preheader:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[CONV6]], 3
|
|
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 360
|
|
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP1]]
|
|
; CHECK-NEXT: [[SCEVGEP20:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[I]], 225
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP2]])
|
|
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[CONV6]]
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_MEMCHECK:%.*]]
|
|
; CHECK: vector.memcheck:
|
|
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP20]]
|
|
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
|
|
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
|
|
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY4_US_PREHEADER]], label [[VECTOR_PH:%.*]]
|
|
; CHECK: vector.ph:
|
|
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[CONV6]], 252
|
|
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
|
|
; CHECK: vector.body:
|
|
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 2
|
|
; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[INDEX]], 3
|
|
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0
|
|
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP4]], i64 1
|
|
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
|
|
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP6]], i64 1
|
|
; CHECK-NEXT: [[TMP11:%.*]] = icmp ult <2 x i64> [[TMP8]], <i64 225, i64 225>
|
|
; CHECK-NEXT: [[TMP12:%.*]] = icmp ult <2 x i64> [[TMP10]], <i64 225, i64 225>
|
|
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i64 0
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]])
|
|
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i64 1
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]])
|
|
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP12]], i64 0
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP15]])
|
|
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP12]], i64 1
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP16]])
|
|
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[INDEX]]
|
|
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i64 16
|
|
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP17]], align 8, !alias.scope [[META0:![0-9]+]]
|
|
; CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <2 x double>, ptr [[TMP18]], align 8, !alias.scope [[META0]]
|
|
; CHECK-NEXT: [[TMP19:%.*]] = load double, ptr [[TMP3]], align 8, !alias.scope [[META3:![0-9]+]]
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <2 x double> poison, double [[TMP19]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT22]], <2 x double> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP20:%.*]] = fmul <2 x double> [[WIDE_LOAD]], [[BROADCAST_SPLAT23]]
|
|
; CHECK-NEXT: [[TMP21:%.*]] = fmul <2 x double> [[WIDE_LOAD21]], [[BROADCAST_SPLAT23]]
|
|
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[INDEX]]
|
|
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i64 16
|
|
; CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <2 x double>, ptr [[TMP22]], align 8, !alias.scope [[META5:![0-9]+]], !noalias [[META0]]
|
|
; CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <2 x double>, ptr [[TMP23]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: [[TMP24:%.*]] = fsub <2 x double> [[WIDE_LOAD24]], [[TMP20]]
|
|
; CHECK-NEXT: [[TMP25:%.*]] = fsub <2 x double> [[WIDE_LOAD25]], [[TMP21]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP24]], ptr [[TMP22]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP25]], ptr [[TMP23]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
|
|
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
|
; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
|
|
; CHECK: middle.block:
|
|
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[CONV6]]
|
|
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]], label [[FOR_BODY4_US_PREHEADER]]
|
|
; CHECK: for.body4.us.preheader:
|
|
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]]
|
|
; CHECK: for.body4.us:
|
|
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY4_US_PREHEADER]] ]
|
|
; CHECK-NEXT: [[TMP27:%.*]] = icmp ult i64 [[INDVARS_IV]], 225
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP27]])
|
|
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[MATRIXEXT_US:%.*]] = load double, ptr [[TMP28]], align 8
|
|
; CHECK-NEXT: [[MATRIXEXT8_US:%.*]] = load double, ptr [[TMP3]], align 8
|
|
; CHECK-NEXT: [[MUL_US:%.*]] = fmul double [[MATRIXEXT_US]], [[MATRIXEXT8_US]]
|
|
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[INDVARS_IV]]
|
|
; CHECK-NEXT: [[MATRIXEXT11_US:%.*]] = load double, ptr [[TMP29]], align 8
|
|
; CHECK-NEXT: [[SUB_US:%.*]] = fsub double [[MATRIXEXT11_US]], [[MUL_US]]
|
|
; CHECK-NEXT: store double [[SUB_US]], ptr [[TMP29]], align 8
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[CONV6]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label [[FOR_BODY4_US]], !llvm.loop [[LOOP10:![0-9]+]]
|
|
; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us:
|
|
; CHECK-NEXT: [[TMP30:%.*]] = add nuw nsw i64 [[CONV6]], 15
|
|
; CHECK-NEXT: [[TMP31:%.*]] = icmp ult i32 [[I]], 210
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP31]])
|
|
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP30]]
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK_1:%.*]] = icmp ult i32 [[I]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_1]], label [[FOR_BODY4_US_PREHEADER_1:%.*]], label [[VECTOR_MEMCHECK_1:%.*]]
|
|
; CHECK: vector.memcheck.1:
|
|
; CHECK-NEXT: [[BOUND0_1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP20]]
|
|
; CHECK-NEXT: [[BOUND1_1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
|
|
; CHECK-NEXT: [[FOUND_CONFLICT_1:%.*]] = and i1 [[BOUND0_1]], [[BOUND1_1]]
|
|
; CHECK-NEXT: br i1 [[FOUND_CONFLICT_1]], label [[FOR_BODY4_US_PREHEADER_1]], label [[VECTOR_PH_1:%.*]]
|
|
; CHECK: vector.ph.1:
|
|
; CHECK-NEXT: [[N_VEC_1:%.*]] = and i64 [[CONV6]], 252
|
|
; CHECK-NEXT: br label [[VECTOR_BODY_1:%.*]]
|
|
; CHECK: vector.body.1:
|
|
; CHECK-NEXT: [[INDEX_1:%.*]] = phi i64 [ 0, [[VECTOR_PH_1]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY_1]] ]
|
|
; CHECK-NEXT: [[TMP33:%.*]] = add nuw nsw i64 [[INDEX_1]], 15
|
|
; CHECK-NEXT: [[TMP34:%.*]] = add nuw nsw i64 [[INDEX_1]], 16
|
|
; CHECK-NEXT: [[TMP35:%.*]] = insertelement <2 x i64> poison, i64 [[TMP33]], i64 0
|
|
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i64> [[TMP35]], i64 [[TMP34]], i64 1
|
|
; CHECK-NEXT: [[TMP37:%.*]] = add nuw nsw i64 [[INDEX_1]], 17
|
|
; CHECK-NEXT: [[TMP38:%.*]] = add nuw nsw i64 [[INDEX_1]], 18
|
|
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x i64> poison, i64 [[TMP37]], i64 0
|
|
; CHECK-NEXT: [[TMP40:%.*]] = insertelement <2 x i64> [[TMP39]], i64 [[TMP38]], i64 1
|
|
; CHECK-NEXT: [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], <i64 225, i64 225>
|
|
; CHECK-NEXT: [[TMP42:%.*]] = icmp ult <2 x i64> [[TMP40]], <i64 225, i64 225>
|
|
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x i1> [[TMP41]], i64 0
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP43]])
|
|
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP41]], i64 1
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP44]])
|
|
; CHECK-NEXT: [[TMP45:%.*]] = extractelement <2 x i1> [[TMP42]], i64 0
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP45]])
|
|
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <2 x i1> [[TMP42]], i64 1
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP46]])
|
|
; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP33]]
|
|
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr [[TMP47]], i64 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <2 x double>, ptr [[TMP47]], align 8, !alias.scope [[META0]]
|
|
; CHECK-NEXT: [[WIDE_LOAD21_1:%.*]] = load <2 x double>, ptr [[TMP48]], align 8, !alias.scope [[META0]]
|
|
; CHECK-NEXT: [[TMP49:%.*]] = load double, ptr [[TMP32]], align 8, !alias.scope [[META3]]
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT22_1:%.*]] = insertelement <2 x double> poison, double [[TMP49]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT23_1:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT22_1]], <2 x double> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP50:%.*]] = fmul <2 x double> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT23_1]]
|
|
; CHECK-NEXT: [[TMP51:%.*]] = fmul <2 x double> [[WIDE_LOAD21_1]], [[BROADCAST_SPLAT23_1]]
|
|
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP33]]
|
|
; CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds i8, ptr [[TMP52]], i64 16
|
|
; CHECK-NEXT: [[WIDE_LOAD24_1:%.*]] = load <2 x double>, ptr [[TMP52]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: [[WIDE_LOAD25_1:%.*]] = load <2 x double>, ptr [[TMP53]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: [[TMP54:%.*]] = fsub <2 x double> [[WIDE_LOAD24_1]], [[TMP50]]
|
|
; CHECK-NEXT: [[TMP55:%.*]] = fsub <2 x double> [[WIDE_LOAD25_1]], [[TMP51]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP54]], ptr [[TMP52]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP55]], ptr [[TMP53]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: [[INDEX_NEXT_1]] = add nuw i64 [[INDEX_1]], 4
|
|
; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i64 [[INDEX_NEXT_1]], [[N_VEC_1]]
|
|
; CHECK-NEXT: br i1 [[TMP56]], label [[MIDDLE_BLOCK_1:%.*]], label [[VECTOR_BODY_1]], !llvm.loop [[LOOP7]]
|
|
; CHECK: middle.block.1:
|
|
; CHECK-NEXT: [[CMP_N_1:%.*]] = icmp eq i64 [[N_VEC_1]], [[CONV6]]
|
|
; CHECK-NEXT: br i1 [[CMP_N_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1:%.*]], label [[FOR_BODY4_US_PREHEADER_1]]
|
|
; CHECK: for.body4.us.preheader.1:
|
|
; CHECK-NEXT: [[INDVARS_IV_PH_1:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK_1]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[N_VEC_1]], [[MIDDLE_BLOCK_1]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY4_US_1:%.*]]
|
|
; CHECK: for.body4.us.1:
|
|
; CHECK-NEXT: [[INDVARS_IV_1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1:%.*]], [[FOR_BODY4_US_1]] ], [ [[INDVARS_IV_PH_1]], [[FOR_BODY4_US_PREHEADER_1]] ]
|
|
; CHECK-NEXT: [[TMP57:%.*]] = add nuw nsw i64 [[INDVARS_IV_1]], 15
|
|
; CHECK-NEXT: [[TMP58:%.*]] = icmp ult i64 [[INDVARS_IV_1]], 210
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP58]])
|
|
; CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP57]]
|
|
; CHECK-NEXT: [[MATRIXEXT_US_1:%.*]] = load double, ptr [[TMP59]], align 8
|
|
; CHECK-NEXT: [[MATRIXEXT8_US_1:%.*]] = load double, ptr [[TMP32]], align 8
|
|
; CHECK-NEXT: [[MUL_US_1:%.*]] = fmul double [[MATRIXEXT_US_1]], [[MATRIXEXT8_US_1]]
|
|
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP57]]
|
|
; CHECK-NEXT: [[MATRIXEXT11_US_1:%.*]] = load double, ptr [[TMP60]], align 8
|
|
; CHECK-NEXT: [[SUB_US_1:%.*]] = fsub double [[MATRIXEXT11_US_1]], [[MUL_US_1]]
|
|
; CHECK-NEXT: store double [[SUB_US_1]], ptr [[TMP60]], align 8
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV_1]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT_1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1]], [[CONV6]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]], label [[FOR_BODY4_US_1]], !llvm.loop [[LOOP10]]
|
|
; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.1:
|
|
; CHECK-NEXT: [[TMP61:%.*]] = add nuw nsw i64 [[CONV6]], 30
|
|
; CHECK-NEXT: [[TMP62:%.*]] = icmp ult i32 [[I]], 195
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP62]])
|
|
; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP61]]
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK_2:%.*]] = icmp ult i32 [[I]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_2]], label [[FOR_BODY4_US_PREHEADER_2:%.*]], label [[VECTOR_MEMCHECK_2:%.*]]
|
|
; CHECK: vector.memcheck.2:
|
|
; CHECK-NEXT: [[BOUND0_2:%.*]] = icmp ult ptr [[B]], [[SCEVGEP20]]
|
|
; CHECK-NEXT: [[BOUND1_2:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
|
|
; CHECK-NEXT: [[FOUND_CONFLICT_2:%.*]] = and i1 [[BOUND0_2]], [[BOUND1_2]]
|
|
; CHECK-NEXT: br i1 [[FOUND_CONFLICT_2]], label [[FOR_BODY4_US_PREHEADER_2]], label [[VECTOR_PH_2:%.*]]
|
|
; CHECK: vector.ph.2:
|
|
; CHECK-NEXT: [[N_VEC_2:%.*]] = and i64 [[CONV6]], 252
|
|
; CHECK-NEXT: br label [[VECTOR_BODY_2:%.*]]
|
|
; CHECK: vector.body.2:
|
|
; CHECK-NEXT: [[INDEX_2:%.*]] = phi i64 [ 0, [[VECTOR_PH_2]] ], [ [[INDEX_NEXT_2:%.*]], [[VECTOR_BODY_2]] ]
|
|
; CHECK-NEXT: [[TMP64:%.*]] = add nuw nsw i64 [[INDEX_2]], 30
|
|
; CHECK-NEXT: [[TMP65:%.*]] = add nuw nsw i64 [[INDEX_2]], 31
|
|
; CHECK-NEXT: [[TMP66:%.*]] = insertelement <2 x i64> poison, i64 [[TMP64]], i64 0
|
|
; CHECK-NEXT: [[TMP67:%.*]] = insertelement <2 x i64> [[TMP66]], i64 [[TMP65]], i64 1
|
|
; CHECK-NEXT: [[TMP68:%.*]] = add nuw nsw i64 [[INDEX_2]], 32
|
|
; CHECK-NEXT: [[TMP69:%.*]] = add nuw nsw i64 [[INDEX_2]], 33
|
|
; CHECK-NEXT: [[TMP70:%.*]] = insertelement <2 x i64> poison, i64 [[TMP68]], i64 0
|
|
; CHECK-NEXT: [[TMP71:%.*]] = insertelement <2 x i64> [[TMP70]], i64 [[TMP69]], i64 1
|
|
; CHECK-NEXT: [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], <i64 225, i64 225>
|
|
; CHECK-NEXT: [[TMP73:%.*]] = icmp ult <2 x i64> [[TMP71]], <i64 225, i64 225>
|
|
; CHECK-NEXT: [[TMP74:%.*]] = extractelement <2 x i1> [[TMP72]], i64 0
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP74]])
|
|
; CHECK-NEXT: [[TMP75:%.*]] = extractelement <2 x i1> [[TMP72]], i64 1
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP75]])
|
|
; CHECK-NEXT: [[TMP76:%.*]] = extractelement <2 x i1> [[TMP73]], i64 0
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP76]])
|
|
; CHECK-NEXT: [[TMP77:%.*]] = extractelement <2 x i1> [[TMP73]], i64 1
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP77]])
|
|
; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP64]]
|
|
; CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds i8, ptr [[TMP78]], i64 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <2 x double>, ptr [[TMP78]], align 8, !alias.scope [[META0]]
|
|
; CHECK-NEXT: [[WIDE_LOAD21_2:%.*]] = load <2 x double>, ptr [[TMP79]], align 8, !alias.scope [[META0]]
|
|
; CHECK-NEXT: [[TMP80:%.*]] = load double, ptr [[TMP63]], align 8, !alias.scope [[META3]]
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT22_2:%.*]] = insertelement <2 x double> poison, double [[TMP80]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT23_2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT22_2]], <2 x double> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP81:%.*]] = fmul <2 x double> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT23_2]]
|
|
; CHECK-NEXT: [[TMP82:%.*]] = fmul <2 x double> [[WIDE_LOAD21_2]], [[BROADCAST_SPLAT23_2]]
|
|
; CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP64]]
|
|
; CHECK-NEXT: [[TMP84:%.*]] = getelementptr inbounds i8, ptr [[TMP83]], i64 16
|
|
; CHECK-NEXT: [[WIDE_LOAD24_2:%.*]] = load <2 x double>, ptr [[TMP83]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: [[WIDE_LOAD25_2:%.*]] = load <2 x double>, ptr [[TMP84]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: [[TMP85:%.*]] = fsub <2 x double> [[WIDE_LOAD24_2]], [[TMP81]]
|
|
; CHECK-NEXT: [[TMP86:%.*]] = fsub <2 x double> [[WIDE_LOAD25_2]], [[TMP82]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP85]], ptr [[TMP83]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP86]], ptr [[TMP84]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: [[INDEX_NEXT_2]] = add nuw i64 [[INDEX_2]], 4
|
|
; CHECK-NEXT: [[TMP87:%.*]] = icmp eq i64 [[INDEX_NEXT_2]], [[N_VEC_2]]
|
|
; CHECK-NEXT: br i1 [[TMP87]], label [[MIDDLE_BLOCK_2:%.*]], label [[VECTOR_BODY_2]], !llvm.loop [[LOOP7]]
|
|
; CHECK: middle.block.2:
|
|
; CHECK-NEXT: [[CMP_N_2:%.*]] = icmp eq i64 [[N_VEC_2]], [[CONV6]]
|
|
; CHECK-NEXT: br i1 [[CMP_N_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2:%.*]], label [[FOR_BODY4_US_PREHEADER_2]]
|
|
; CHECK: for.body4.us.preheader.2:
|
|
; CHECK-NEXT: [[INDVARS_IV_PH_2:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK_2]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[N_VEC_2]], [[MIDDLE_BLOCK_2]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY4_US_2:%.*]]
|
|
; CHECK: for.body4.us.2:
|
|
; CHECK-NEXT: [[INDVARS_IV_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_2:%.*]], [[FOR_BODY4_US_2]] ], [ [[INDVARS_IV_PH_2]], [[FOR_BODY4_US_PREHEADER_2]] ]
|
|
; CHECK-NEXT: [[TMP88:%.*]] = add nuw nsw i64 [[INDVARS_IV_2]], 30
|
|
; CHECK-NEXT: [[TMP89:%.*]] = icmp ult i64 [[INDVARS_IV_2]], 195
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP89]])
|
|
; CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP88]]
|
|
; CHECK-NEXT: [[MATRIXEXT_US_2:%.*]] = load double, ptr [[TMP90]], align 8
|
|
; CHECK-NEXT: [[MATRIXEXT8_US_2:%.*]] = load double, ptr [[TMP63]], align 8
|
|
; CHECK-NEXT: [[MUL_US_2:%.*]] = fmul double [[MATRIXEXT_US_2]], [[MATRIXEXT8_US_2]]
|
|
; CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP88]]
|
|
; CHECK-NEXT: [[MATRIXEXT11_US_2:%.*]] = load double, ptr [[TMP91]], align 8
|
|
; CHECK-NEXT: [[SUB_US_2:%.*]] = fsub double [[MATRIXEXT11_US_2]], [[MUL_US_2]]
|
|
; CHECK-NEXT: store double [[SUB_US_2]], ptr [[TMP91]], align 8
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT_2]] = add nuw nsw i64 [[INDVARS_IV_2]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT_2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2]], [[CONV6]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]], label [[FOR_BODY4_US_2]], !llvm.loop [[LOOP10]]
|
|
; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.2:
|
|
; CHECK-NEXT: [[TMP92:%.*]] = add nuw nsw i64 [[CONV6]], 45
|
|
; CHECK-NEXT: [[TMP93:%.*]] = icmp ult i32 [[I]], 180
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP93]])
|
|
; CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP92]]
|
|
; CHECK-NEXT: [[MIN_ITERS_CHECK_3:%.*]] = icmp ult i32 [[I]], 4
|
|
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK_3]], label [[FOR_BODY4_US_PREHEADER_3:%.*]], label [[VECTOR_MEMCHECK_3:%.*]]
|
|
; CHECK: vector.memcheck.3:
|
|
; CHECK-NEXT: [[BOUND0_3:%.*]] = icmp ult ptr [[B]], [[SCEVGEP20]]
|
|
; CHECK-NEXT: [[BOUND1_3:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]]
|
|
; CHECK-NEXT: [[FOUND_CONFLICT_3:%.*]] = and i1 [[BOUND0_3]], [[BOUND1_3]]
|
|
; CHECK-NEXT: br i1 [[FOUND_CONFLICT_3]], label [[FOR_BODY4_US_PREHEADER_3]], label [[VECTOR_PH_3:%.*]]
|
|
; CHECK: vector.ph.3:
|
|
; CHECK-NEXT: [[N_VEC_3:%.*]] = and i64 [[CONV6]], 252
|
|
; CHECK-NEXT: br label [[VECTOR_BODY_3:%.*]]
|
|
; CHECK: vector.body.3:
|
|
; CHECK-NEXT: [[INDEX_3:%.*]] = phi i64 [ 0, [[VECTOR_PH_3]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY_3]] ]
|
|
; CHECK-NEXT: [[TMP95:%.*]] = add nuw nsw i64 [[INDEX_3]], 45
|
|
; CHECK-NEXT: [[TMP96:%.*]] = add nuw nsw i64 [[INDEX_3]], 46
|
|
; CHECK-NEXT: [[TMP97:%.*]] = insertelement <2 x i64> poison, i64 [[TMP95]], i64 0
|
|
; CHECK-NEXT: [[TMP98:%.*]] = insertelement <2 x i64> [[TMP97]], i64 [[TMP96]], i64 1
|
|
; CHECK-NEXT: [[TMP99:%.*]] = add nuw nsw i64 [[INDEX_3]], 47
|
|
; CHECK-NEXT: [[TMP100:%.*]] = add nuw nsw i64 [[INDEX_3]], 48
|
|
; CHECK-NEXT: [[TMP101:%.*]] = insertelement <2 x i64> poison, i64 [[TMP99]], i64 0
|
|
; CHECK-NEXT: [[TMP102:%.*]] = insertelement <2 x i64> [[TMP101]], i64 [[TMP100]], i64 1
|
|
; CHECK-NEXT: [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], <i64 225, i64 225>
|
|
; CHECK-NEXT: [[TMP104:%.*]] = icmp ult <2 x i64> [[TMP102]], <i64 225, i64 225>
|
|
; CHECK-NEXT: [[TMP105:%.*]] = extractelement <2 x i1> [[TMP103]], i64 0
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP105]])
|
|
; CHECK-NEXT: [[TMP106:%.*]] = extractelement <2 x i1> [[TMP103]], i64 1
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP106]])
|
|
; CHECK-NEXT: [[TMP107:%.*]] = extractelement <2 x i1> [[TMP104]], i64 0
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP107]])
|
|
; CHECK-NEXT: [[TMP108:%.*]] = extractelement <2 x i1> [[TMP104]], i64 1
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP108]])
|
|
; CHECK-NEXT: [[TMP109:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP95]]
|
|
; CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds i8, ptr [[TMP109]], i64 16
|
|
; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <2 x double>, ptr [[TMP109]], align 8, !alias.scope [[META0]]
|
|
; CHECK-NEXT: [[WIDE_LOAD21_3:%.*]] = load <2 x double>, ptr [[TMP110]], align 8, !alias.scope [[META0]]
|
|
; CHECK-NEXT: [[TMP111:%.*]] = load double, ptr [[TMP94]], align 8, !alias.scope [[META3]]
|
|
; CHECK-NEXT: [[BROADCAST_SPLATINSERT22_3:%.*]] = insertelement <2 x double> poison, double [[TMP111]], i64 0
|
|
; CHECK-NEXT: [[BROADCAST_SPLAT23_3:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT22_3]], <2 x double> poison, <2 x i32> zeroinitializer
|
|
; CHECK-NEXT: [[TMP112:%.*]] = fmul <2 x double> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT23_3]]
|
|
; CHECK-NEXT: [[TMP113:%.*]] = fmul <2 x double> [[WIDE_LOAD21_3]], [[BROADCAST_SPLAT23_3]]
|
|
; CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP95]]
|
|
; CHECK-NEXT: [[TMP115:%.*]] = getelementptr inbounds i8, ptr [[TMP114]], i64 16
|
|
; CHECK-NEXT: [[WIDE_LOAD24_3:%.*]] = load <2 x double>, ptr [[TMP114]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: [[WIDE_LOAD25_3:%.*]] = load <2 x double>, ptr [[TMP115]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: [[TMP116:%.*]] = fsub <2 x double> [[WIDE_LOAD24_3]], [[TMP112]]
|
|
; CHECK-NEXT: [[TMP117:%.*]] = fsub <2 x double> [[WIDE_LOAD25_3]], [[TMP113]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP116]], ptr [[TMP114]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: store <2 x double> [[TMP117]], ptr [[TMP115]], align 8, !alias.scope [[META5]], !noalias [[META0]]
|
|
; CHECK-NEXT: [[INDEX_NEXT_3]] = add nuw i64 [[INDEX_3]], 4
|
|
; CHECK-NEXT: [[TMP118:%.*]] = icmp eq i64 [[INDEX_NEXT_3]], [[N_VEC_3]]
|
|
; CHECK-NEXT: br i1 [[TMP118]], label [[MIDDLE_BLOCK_3:%.*]], label [[VECTOR_BODY_3]], !llvm.loop [[LOOP7]]
|
|
; CHECK: middle.block.3:
|
|
; CHECK-NEXT: [[CMP_N_3:%.*]] = icmp eq i64 [[N_VEC_3]], [[CONV6]]
|
|
; CHECK-NEXT: br i1 [[CMP_N_3]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_US_PREHEADER_3]]
|
|
; CHECK: for.body4.us.preheader.3:
|
|
; CHECK-NEXT: [[INDVARS_IV_PH_3:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK_3]] ], [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[N_VEC_3]], [[MIDDLE_BLOCK_3]] ]
|
|
; CHECK-NEXT: br label [[FOR_BODY4_US_3:%.*]]
|
|
; CHECK: for.body4.us.3:
|
|
; CHECK-NEXT: [[INDVARS_IV_3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY4_US_3]] ], [ [[INDVARS_IV_PH_3]], [[FOR_BODY4_US_PREHEADER_3]] ]
|
|
; CHECK-NEXT: [[TMP119:%.*]] = add nuw nsw i64 [[INDVARS_IV_3]], 45
|
|
; CHECK-NEXT: [[TMP120:%.*]] = icmp ult i64 [[INDVARS_IV_3]], 180
|
|
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP120]])
|
|
; CHECK-NEXT: [[TMP121:%.*]] = getelementptr inbounds <225 x double>, ptr [[A]], i64 0, i64 [[TMP119]]
|
|
; CHECK-NEXT: [[MATRIXEXT_US_3:%.*]] = load double, ptr [[TMP121]], align 8
|
|
; CHECK-NEXT: [[MATRIXEXT8_US_3:%.*]] = load double, ptr [[TMP94]], align 8
|
|
; CHECK-NEXT: [[MUL_US_3:%.*]] = fmul double [[MATRIXEXT_US_3]], [[MATRIXEXT8_US_3]]
|
|
; CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds <225 x double>, ptr [[B]], i64 0, i64 [[TMP119]]
|
|
; CHECK-NEXT: [[MATRIXEXT11_US_3:%.*]] = load double, ptr [[TMP122]], align 8
|
|
; CHECK-NEXT: [[SUB_US_3:%.*]] = fsub double [[MATRIXEXT11_US_3]], [[MUL_US_3]]
|
|
; CHECK-NEXT: store double [[SUB_US_3]], ptr [[TMP122]], align 8
|
|
; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV_3]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT_3:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], [[CONV6]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT_3]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_US_3]], !llvm.loop [[LOOP10]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%i.addr = alloca i32, align 4
|
|
%A.addr = alloca ptr, align 8
|
|
%B.addr = alloca ptr, align 8
|
|
%j = alloca i32, align 4
|
|
%cleanup.dest.slot = alloca i32, align 4
|
|
%k = alloca i32, align 4
|
|
store i32 %i, ptr %i.addr, align 4
|
|
store ptr %A, ptr %A.addr, align 8
|
|
store ptr %B, ptr %B.addr, align 8
|
|
call void @llvm.lifetime.start.p0(i64 4, ptr %j) #3
|
|
store i32 0, ptr %j, align 4
|
|
br label %for.cond
|
|
|
|
for.cond: ; preds = %for.inc12, %entry
|
|
%0 = load i32, ptr %j, align 4
|
|
%cmp = icmp ult i32 %0, 4
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.cond
|
|
store i32 2, ptr %cleanup.dest.slot, align 4
|
|
call void @llvm.lifetime.end.p0(i64 4, ptr %j) #3
|
|
br label %for.end14
|
|
|
|
for.body: ; preds = %for.cond
|
|
call void @llvm.lifetime.start.p0(i64 4, ptr %k) #3
|
|
store i32 0, ptr %k, align 4
|
|
br label %for.cond1
|
|
|
|
for.cond1: ; preds = %for.inc, %for.body
|
|
%1 = load i32, ptr %k, align 4
|
|
%2 = load i32, ptr %i.addr, align 4
|
|
%cmp2 = icmp ult i32 %1, %2
|
|
br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
|
|
|
|
for.cond.cleanup3: ; preds = %for.cond1
|
|
store i32 5, ptr %cleanup.dest.slot, align 4
|
|
call void @llvm.lifetime.end.p0(i64 4, ptr %k) #3
|
|
br label %for.end
|
|
|
|
for.body4: ; preds = %for.cond1
|
|
%3 = load i32, ptr %k, align 4
|
|
%conv = zext i32 %3 to i64
|
|
%4 = load i32, ptr %j, align 4
|
|
%conv5 = zext i32 %4 to i64
|
|
%5 = mul i64 %conv5, 15
|
|
%6 = add i64 %5, %conv
|
|
%7 = icmp ult i64 %6, 225
|
|
call void @llvm.assume(i1 %7)
|
|
%8 = load ptr, ptr %A.addr, align 8
|
|
%9 = load <225 x double>, ptr %8, align 8
|
|
%matrixext = extractelement <225 x double> %9, i64 %6
|
|
%10 = load i32, ptr %i.addr, align 4
|
|
%conv6 = zext i32 %10 to i64
|
|
%11 = load i32, ptr %j, align 4
|
|
%conv7 = zext i32 %11 to i64
|
|
%12 = mul i64 %conv7, 15
|
|
%13 = add i64 %12, %conv6
|
|
%14 = icmp ult i64 %13, 225
|
|
call void @llvm.assume(i1 %14)
|
|
%15 = load ptr, ptr %B.addr, align 8
|
|
%16 = load <225 x double>, ptr %15, align 8
|
|
%matrixext8 = extractelement <225 x double> %16, i64 %13
|
|
%mul = fmul double %matrixext, %matrixext8
|
|
%17 = load ptr, ptr %B.addr, align 8
|
|
%18 = load i32, ptr %k, align 4
|
|
%conv9 = zext i32 %18 to i64
|
|
%19 = load i32, ptr %j, align 4
|
|
%conv10 = zext i32 %19 to i64
|
|
%20 = mul i64 %conv10, 15
|
|
%21 = add i64 %20, %conv9
|
|
%22 = icmp ult i64 %21, 225
|
|
call void @llvm.assume(i1 %22)
|
|
%23 = load <225 x double>, ptr %17, align 8
|
|
%matrixext11 = extractelement <225 x double> %23, i64 %21
|
|
%sub = fsub double %matrixext11, %mul
|
|
%24 = icmp ult i64 %21, 225
|
|
call void @llvm.assume(i1 %24)
|
|
%25 = load <225 x double>, ptr %17, align 8
|
|
%matins = insertelement <225 x double> %25, double %sub, i64 %21
|
|
store <225 x double> %matins, ptr %17, align 8
|
|
br label %for.inc
|
|
|
|
for.inc: ; preds = %for.body4
|
|
%26 = load i32, ptr %k, align 4
|
|
%inc = add i32 %26, 1
|
|
store i32 %inc, ptr %k, align 4
|
|
br label %for.cond1
|
|
|
|
for.end: ; preds = %for.cond.cleanup3
|
|
br label %for.inc12
|
|
|
|
for.inc12: ; preds = %for.end
|
|
%27 = load i32, ptr %j, align 4
|
|
%inc13 = add i32 %27, 1
|
|
store i32 %inc13, ptr %j, align 4
|
|
br label %for.cond
|
|
|
|
for.end14: ; preds = %for.cond.cleanup
|
|
ret void
|
|
}
|
|
|
|
; Function Attrs: argmemonly nofree nosync nounwind willreturn
|
|
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
|
|
|
|
; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn
|
|
declare void @llvm.assume(i1 noundef) #2
|
|
|
|
; Function Attrs: argmemonly nofree nosync nounwind willreturn
|
|
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
|
|
|
|
; Function Attrs: nounwind ssp uwtable mustprogress
|
|
|
|
define <4 x float> @reverse_hadd_v4f32(<4 x float> %a, <4 x float> %b) {
|
|
; CHECK-LABEL: @reverse_hadd_v4f32(
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[A:%.*]], <4 x i32> <i32 2, i32 0, i32 6, i32 4>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> [[A]], <4 x i32> <i32 3, i32 1, i32 7, i32 5>
|
|
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
|
|
; CHECK-NEXT: ret <4 x float> [[TMP3]]
|
|
;
|
|
%vecext = extractelement <4 x float> %a, i32 0
|
|
%vecext1 = extractelement <4 x float> %a, i32 1
|
|
%add = fadd float %vecext, %vecext1
|
|
%vecinit = insertelement <4 x float> undef, float %add, i32 0
|
|
%vecext2 = extractelement <4 x float> %a, i32 2
|
|
%vecext3 = extractelement <4 x float> %a, i32 3
|
|
%add4 = fadd float %vecext2, %vecext3
|
|
%vecinit5 = insertelement <4 x float> %vecinit, float %add4, i32 1
|
|
%vecext6 = extractelement <4 x float> %b, i32 0
|
|
%vecext7 = extractelement <4 x float> %b, i32 1
|
|
%add8 = fadd float %vecext6, %vecext7
|
|
%vecinit9 = insertelement <4 x float> %vecinit5, float %add8, i32 2
|
|
%vecext10 = extractelement <4 x float> %b, i32 2
|
|
%vecext11 = extractelement <4 x float> %b, i32 3
|
|
%add12 = fadd float %vecext10, %vecext11
|
|
%vecinit13 = insertelement <4 x float> %vecinit9, float %add12, i32 3
|
|
%shuffle = shufflevector <4 x float> %vecinit13, <4 x float> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
|
|
ret <4 x float> %shuffle
|
|
}
|