These tests rely on SCEV looking recognizing an "or" with no common bits as an "add". Add the disjoint flag to relevant or instructions in preparation for switching SCEV to use the flag instead of the ValueTracking query. The IR with disjoint flag matches what InstCombine would produce.
143 lines
6.7 KiB
LLVM
143 lines
6.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt < %s -passes=slp-vectorizer,dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
|
|
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
|
|
target triple = "i386-apple-macosx10.8.0"
|
|
|
|
; int foo(ptr A, int n, int m) {
|
|
; double sum = 0, v1 = 2, v0 = 3;
|
|
; for (int i=0; i < n; ++i)
|
|
; sum += 7*A[i*2] + 7*A[i*2+1];
|
|
; return sum;
|
|
; }
|
|
|
|
define i32 @reduce(ptr nocapture %A, i32 %n, i32 %m) {
|
|
; CHECK-LABEL: @reduce(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: br i1 [[CMP13]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
|
|
; CHECK: for.body:
|
|
; CHECK-NEXT: [[I_015:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[SUM_014:%.*]] = phi double [ [[ADD6:%.*]], [[FOR_BODY]] ], [ 0.000000e+00, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[I_015]], 1
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 [[MUL]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x double> [[TMP0]], <double 7.000000e+00, double 7.000000e+00>
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
|
|
; CHECK-NEXT: [[ADD5:%.*]] = fadd double [[TMP2]], [[TMP3]]
|
|
; CHECK-NEXT: [[ADD6]] = fadd double [[SUM_014]], [[ADD5]]
|
|
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_015]], 1
|
|
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
|
|
; CHECK: for.cond.for.end_crit_edge:
|
|
; CHECK-NEXT: [[PHITMP:%.*]] = fptosi double [[ADD6]] to i32
|
|
; CHECK-NEXT: br label [[FOR_END]]
|
|
; CHECK: for.end:
|
|
; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[PHITMP]], [[FOR_COND_FOR_END_CRIT_EDGE]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]]
|
|
;
|
|
entry:
|
|
%cmp13 = icmp sgt i32 %n, 0
|
|
br i1 %cmp13, label %for.body, label %for.end
|
|
|
|
for.body: ; preds = %entry, %for.body
|
|
%i.015 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
|
%sum.014 = phi double [ %add6, %for.body ], [ 0.000000e+00, %entry ]
|
|
%mul = shl nsw i32 %i.015, 1
|
|
%arrayidx = getelementptr inbounds double, ptr %A, i32 %mul
|
|
%0 = load double, ptr %arrayidx, align 4
|
|
%mul1 = fmul double %0, 7.000000e+00
|
|
%add12 = or disjoint i32 %mul, 1
|
|
%arrayidx3 = getelementptr inbounds double, ptr %A, i32 %add12
|
|
%1 = load double, ptr %arrayidx3, align 4
|
|
%mul4 = fmul double %1, 7.000000e+00
|
|
%add5 = fadd double %mul1, %mul4
|
|
%add6 = fadd double %sum.014, %add5
|
|
%inc = add nsw i32 %i.015, 1
|
|
%exitcond = icmp eq i32 %inc, %n
|
|
br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
|
|
|
|
for.cond.for.end_crit_edge: ; preds = %for.body
|
|
%phitmp = fptosi double %add6 to i32
|
|
br label %for.end
|
|
|
|
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
|
|
%sum.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
|
|
ret i32 %sum.0.lcssa
|
|
}
|
|
|
|
; PR43948 - https://bugs.llvm.org/show_bug.cgi?id=43948
|
|
; The extra use of a non-vectorized element of a reduction must not be killed.
|
|
|
|
define i32 @horiz_max_multiple_uses(ptr %x, ptr %p) {
|
|
; CHECK-LABEL: @horiz_max_multiple_uses(
|
|
; CHECK-NEXT: [[X4:%.*]] = getelementptr [32 x i32], ptr [[X:%.*]], i64 0, i64 4
|
|
; CHECK-NEXT: [[X5:%.*]] = getelementptr [32 x i32], ptr [[X]], i64 0, i64 5
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[X]], align 4
|
|
; CHECK-NEXT: [[T4:%.*]] = load i32, ptr [[X4]], align 4
|
|
; CHECK-NEXT: [[T5:%.*]] = load i32, ptr [[X5]], align 4
|
|
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP1]])
|
|
; CHECK-NEXT: [[MAX_ROOT_CMP:%.*]] = icmp sgt i32 [[TMP2]], [[T4]]
|
|
; CHECK-NEXT: [[MAX_ROOT_SEL:%.*]] = select i1 [[MAX_ROOT_CMP]], i32 [[TMP2]], i32 [[T4]]
|
|
; CHECK-NEXT: [[C012345:%.*]] = icmp sgt i32 [[MAX_ROOT_SEL]], [[T5]]
|
|
; CHECK-NEXT: [[T17:%.*]] = select i1 [[C012345]], i32 [[MAX_ROOT_SEL]], i32 [[T5]]
|
|
; CHECK-NEXT: [[THREE_OR_FOUR:%.*]] = select i1 [[MAX_ROOT_CMP]], i32 3, i32 4
|
|
; CHECK-NEXT: store i32 [[THREE_OR_FOUR]], ptr [[P:%.*]], align 8
|
|
; CHECK-NEXT: ret i32 [[T17]]
|
|
;
|
|
%x1 = getelementptr [32 x i32], ptr %x, i64 0, i64 1
|
|
%x2 = getelementptr [32 x i32], ptr %x, i64 0, i64 2
|
|
%x3 = getelementptr [32 x i32], ptr %x, i64 0, i64 3
|
|
%x4 = getelementptr [32 x i32], ptr %x, i64 0, i64 4
|
|
%x5 = getelementptr [32 x i32], ptr %x, i64 0, i64 5
|
|
|
|
%t0 = load i32, ptr %x
|
|
%t1 = load i32, ptr %x1
|
|
%t2 = load i32, ptr %x2
|
|
%t3 = load i32, ptr %x3
|
|
%t4 = load i32, ptr %x4
|
|
%t5 = load i32, ptr %x5
|
|
|
|
%c01 = icmp sgt i32 %t0, %t1
|
|
%s5 = select i1 %c01, i32 %t0, i32 %t1
|
|
%c012 = icmp sgt i32 %s5, %t2
|
|
%t8 = select i1 %c012, i32 %s5, i32 %t2
|
|
%c0123 = icmp sgt i32 %t8, %t3
|
|
%rdx4 = select i1 %c0123, i32 %t8, i32 %t3
|
|
%MAX_ROOT_CMP = icmp sgt i32 %rdx4, %t4
|
|
%MAX_ROOT_SEL = select i1 %MAX_ROOT_CMP, i32 %rdx4, i32 %t4
|
|
%c012345 = icmp sgt i32 %MAX_ROOT_SEL, %t5
|
|
%t17 = select i1 %c012345, i32 %MAX_ROOT_SEL, i32 %t5
|
|
%three_or_four = select i1 %MAX_ROOT_CMP, i32 3, i32 4
|
|
store i32 %three_or_four, ptr %p, align 8
|
|
ret i32 %t17
|
|
}
|
|
|
|
; This is a miscompile (see the undef operand) and/or test for invalid IR.
|
|
|
|
define i1 @bad_insertpoint_rdx(ptr %p) #0 {
|
|
; CHECK-LABEL: @bad_insertpoint_rdx(
|
|
; CHECK-NEXT: [[T0:%.*]] = load i32, ptr [[P:%.*]], align 16
|
|
; CHECK-NEXT: [[CMP23:%.*]] = icmp sgt i32 [[T0]], 0
|
|
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP23]], i32 [[T0]], i32 0
|
|
; CHECK-NEXT: [[ARRAYIDX22_1:%.*]] = getelementptr inbounds [8 x i32], ptr [[P]], i64 0, i64 1
|
|
; CHECK-NEXT: [[T1:%.*]] = load i32, ptr [[ARRAYIDX22_1]], align 4
|
|
; CHECK-NEXT: [[CMP23_1:%.*]] = icmp sgt i32 [[T1]], [[SPEC_SELECT]]
|
|
; CHECK-NEXT: [[SPEC_STORE_SELECT87:%.*]] = zext i1 [[CMP23_1]] to i32
|
|
; CHECK-NEXT: [[SPEC_SELECT88:%.*]] = select i1 [[CMP23_1]], i32 [[T1]], i32 [[SPEC_SELECT]]
|
|
; CHECK-NEXT: [[CMP23_2:%.*]] = icmp sgt i32 [[SPEC_STORE_SELECT87]], [[SPEC_SELECT88]]
|
|
; CHECK-NEXT: ret i1 [[CMP23_2]]
|
|
;
|
|
%t0 = load i32, ptr %p, align 16
|
|
%cmp23 = icmp sgt i32 %t0, 0
|
|
%spec.select = select i1 %cmp23, i32 %t0, i32 0
|
|
%arrayidx22.1 = getelementptr inbounds [8 x i32], ptr %p, i64 0, i64 1
|
|
%t1 = load i32, ptr %arrayidx22.1, align 4
|
|
%cmp23.1 = icmp sgt i32 %t1, %spec.select
|
|
%spec.store.select87 = zext i1 %cmp23.1 to i32
|
|
%spec.select88 = select i1 %cmp23.1, i32 %t1, i32 %spec.select
|
|
%cmp23.2 = icmp sgt i32 %spec.store.select87, %spec.select88
|
|
ret i1 %cmp23.2
|
|
}
|