Files
clang-p2996/llvm/test/Transforms/LoopVectorize/AArch64/maximize-bandwidth-invalidate.ll
Nikita Popov 2fab927546 [LoopVectorize] Convert some tests to opaque pointers (NFC)
Check lines for some of these tests were regenerated. The difference
is that with opaque pointers SCEVExpander always emits i8 GEPs,
making the address calculation explicit. This is a known problem
that will be solved long term by making all address calculations
explicit.
2023-01-04 17:25:42 +01:00

130 lines
7.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; REQUIRES: asserts
; RUN: opt < %s -passes=loop-vectorize -vectorizer-maximize-bandwidth -S 2>&1 | FileCheck %s
; RUN: opt < %s -passes=loop-vectorize -vectorizer-maximize-bandwidth -S -debug-only=loop-vectorize 2>&1 -disable-output | FileCheck %s --check-prefix=COST
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-none-unknown-eabi"
; Check that the maximize vector bandwidth option does not give incorrect costs
; due to invalid cost decisions. The loop below has a low maximum trip count,
; so will be masked.
; COST: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %0 = load
; COST: LV: Found an estimated cost of 3000000 for VF 4 For instruction: %0 = load
; COST: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %0 = load
; COST: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %0 = load
; COST: LV: Selecting VF: 1.
define i32 @test(ptr nocapture noundef readonly %pInVec, ptr nocapture noundef readonly %pInA1, ptr nocapture noundef readonly %pInA2, ptr nocapture noundef readonly %pInA3, ptr nocapture noundef readonly %pInA4, i32 noundef %numCols) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AND:%.*]] = and i32 [[NUMCOLS:%.*]], 3
; CHECK-NEXT: [[CMP_NOT32:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: br i1 [[CMP_NOT32]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
; CHECK: while.body.preheader:
; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
; CHECK: while.body:
; CHECK-NEXT: [[PINVEC_ADDR_042:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[WHILE_BODY]] ], [ [[PINVEC:%.*]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[SUM4_041:%.*]] = phi i32 [ [[ADD14:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[SUM3_040:%.*]] = phi i32 [ [[ADD10:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[SUM2_039:%.*]] = phi i32 [ [[ADD6:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[SUM1_038:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[COLCNT_037:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[AND]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[PINA1_ADDR_036:%.*]] = phi ptr [ [[INCDEC_PTR1:%.*]], [[WHILE_BODY]] ], [ [[PINA1:%.*]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[PINA4_ADDR_035:%.*]] = phi ptr [ [[INCDEC_PTR11:%.*]], [[WHILE_BODY]] ], [ [[PINA4:%.*]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[PINA3_ADDR_034:%.*]] = phi ptr [ [[INCDEC_PTR7:%.*]], [[WHILE_BODY]] ], [ [[PINA3:%.*]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[PINA2_ADDR_033:%.*]] = phi ptr [ [[INCDEC_PTR3:%.*]], [[WHILE_BODY]] ], [ [[PINA2:%.*]], [[WHILE_BODY_PREHEADER]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PINVEC_ADDR_042]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[PINVEC_ADDR_042]], align 1
; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32
; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i8, ptr [[PINA1_ADDR_036]], i64 1
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[PINA1_ADDR_036]], align 1
; CHECK-NEXT: [[CONV2:%.*]] = sext i8 [[TMP1]] to i32
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
; CHECK-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[SUM1_038]]
; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i8, ptr [[PINA2_ADDR_033]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[PINA2_ADDR_033]], align 1
; CHECK-NEXT: [[CONV4:%.*]] = sext i8 [[TMP2]] to i32
; CHECK-NEXT: [[MUL5:%.*]] = mul nsw i32 [[CONV4]], [[CONV]]
; CHECK-NEXT: [[ADD6]] = add nsw i32 [[MUL5]], [[SUM2_039]]
; CHECK-NEXT: [[INCDEC_PTR7]] = getelementptr inbounds i8, ptr [[PINA3_ADDR_034]], i64 1
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[PINA3_ADDR_034]], align 1
; CHECK-NEXT: [[CONV8:%.*]] = sext i8 [[TMP3]] to i32
; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[CONV8]], [[CONV]]
; CHECK-NEXT: [[ADD10]] = add nsw i32 [[MUL9]], [[SUM3_040]]
; CHECK-NEXT: [[INCDEC_PTR11]] = getelementptr inbounds i8, ptr [[PINA4_ADDR_035]], i64 1
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[PINA4_ADDR_035]], align 1
; CHECK-NEXT: [[CONV12:%.*]] = sext i8 [[TMP4]] to i32
; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[CONV12]], [[CONV]]
; CHECK-NEXT: [[ADD14]] = add nsw i32 [[MUL13]], [[SUM4_041]]
; CHECK-NEXT: [[DEC]] = add nsw i32 [[COLCNT_037]], -1
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[DEC]], 0
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]
; CHECK: while.end.loopexit:
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[WHILE_BODY]] ]
; CHECK-NEXT: [[ADD6_LCSSA:%.*]] = phi i32 [ [[ADD6]], [[WHILE_BODY]] ]
; CHECK-NEXT: [[ADD10_LCSSA:%.*]] = phi i32 [ [[ADD10]], [[WHILE_BODY]] ]
; CHECK-NEXT: [[ADD14_LCSSA:%.*]] = phi i32 [ [[ADD14]], [[WHILE_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[ADD6_LCSSA]], [[ADD_LCSSA]]
; CHECK-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP5]], [[ADD10_LCSSA]]
; CHECK-NEXT: [[TMP7:%.*]] = add nsw i32 [[TMP6]], [[ADD14_LCSSA]]
; CHECK-NEXT: br label [[WHILE_END]]
; CHECK: while.end:
; CHECK-NEXT: [[ADD17:%.*]] = phi i32 [ [[TMP7]], [[WHILE_END_LOOPEXIT]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: ret i32 [[ADD17]]
;
entry:
%and = and i32 %numCols, 3
%cmp.not32 = icmp eq i32 %and, 0
br i1 %cmp.not32, label %while.end, label %while.body
while.body: ; preds = %entry, %while.body
%pInVec.addr.042 = phi ptr [ %incdec.ptr, %while.body ], [ %pInVec, %entry ]
%sum4.041 = phi i32 [ %add14, %while.body ], [ 0, %entry ]
%sum3.040 = phi i32 [ %add10, %while.body ], [ 0, %entry ]
%sum2.039 = phi i32 [ %add6, %while.body ], [ 0, %entry ]
%sum1.038 = phi i32 [ %add, %while.body ], [ 0, %entry ]
%colCnt.037 = phi i32 [ %dec, %while.body ], [ %and, %entry ]
%pInA1.addr.036 = phi ptr [ %incdec.ptr1, %while.body ], [ %pInA1, %entry ]
%pInA4.addr.035 = phi ptr [ %incdec.ptr11, %while.body ], [ %pInA4, %entry ]
%pInA3.addr.034 = phi ptr [ %incdec.ptr7, %while.body ], [ %pInA3, %entry ]
%pInA2.addr.033 = phi ptr [ %incdec.ptr3, %while.body ], [ %pInA2, %entry ]
%incdec.ptr = getelementptr inbounds i8, ptr %pInVec.addr.042, i64 1
%0 = load i8, ptr %pInVec.addr.042, align 1
%conv = sext i8 %0 to i32
%incdec.ptr1 = getelementptr inbounds i8, ptr %pInA1.addr.036, i64 1
%1 = load i8, ptr %pInA1.addr.036, align 1
%conv2 = sext i8 %1 to i32
%mul = mul nsw i32 %conv2, %conv
%add = add nsw i32 %mul, %sum1.038
%incdec.ptr3 = getelementptr inbounds i8, ptr %pInA2.addr.033, i64 1
%2 = load i8, ptr %pInA2.addr.033, align 1
%conv4 = sext i8 %2 to i32
%mul5 = mul nsw i32 %conv4, %conv
%add6 = add nsw i32 %mul5, %sum2.039
%incdec.ptr7 = getelementptr inbounds i8, ptr %pInA3.addr.034, i64 1
%3 = load i8, ptr %pInA3.addr.034, align 1
%conv8 = sext i8 %3 to i32
%mul9 = mul nsw i32 %conv8, %conv
%add10 = add nsw i32 %mul9, %sum3.040
%incdec.ptr11 = getelementptr inbounds i8, ptr %pInA4.addr.035, i64 1
%4 = load i8, ptr %pInA4.addr.035, align 1
%conv12 = sext i8 %4 to i32
%mul13 = mul nsw i32 %conv12, %conv
%add14 = add nsw i32 %mul13, %sum4.041
%dec = add nsw i32 %colCnt.037, -1
%cmp.not = icmp eq i32 %dec, 0
br i1 %cmp.not, label %while.end.loopexit, label %while.body
while.end.loopexit: ; preds = %while.body
%5 = add nsw i32 %add6, %add
%6 = add nsw i32 %5, %add10
%7 = add nsw i32 %6, %add14
br label %while.end
while.end: ; preds = %while.end.loopexit, %entry
%add17 = phi i32 [ %7, %while.end.loopexit ], [ 0, %entry ]
ret i32 %add17
}