We are replacing a narrow IV increment with a wider one. If the original (narrow) increment did not wrap, the wider one should not wrap either. Set the flags to be the union of both wide increment and original increment; this ensures we preserve flags SCEV could infer for the wider increment. Fixes https://github.com/llvm/llvm-project/issues/71517.
65 lines
2.5 KiB
LLVM
65 lines
2.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes='default<O3>' -enable-loop-flatten -loop-flatten-cost-threshold=3 -S %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
target triple = "aarch64"
|
|
|
|
define dso_local void @_Z3fooPiii(ptr %A, i32 %N, i32 %M) #0 {
|
|
; CHECK-LABEL: @_Z3fooPiii(
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[N:%.*]], 0
|
|
; CHECK-NEXT: [[CMP21:%.*]] = icmp sgt i32 [[M:%.*]], 0
|
|
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP3]], i1 [[CMP21]], i1 false
|
|
; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US:%.*]], label [[FOR_COND_CLEANUP:%.*]]
|
|
; CHECK: for.cond1.preheader.lr.ph.split.us:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[M]] to i64
|
|
; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[N]] to i64
|
|
; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul nuw nsw i64 [[TMP0]], [[TMP1]]
|
|
; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]]
|
|
; CHECK: for.cond1.preheader.us:
|
|
; CHECK-NEXT: [[INDVAR6:%.*]] = phi i64 [ [[INDVAR_NEXT7:%.*]], [[FOR_COND1_PREHEADER_US]] ], [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US]] ]
|
|
; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVAR6]]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4
|
|
; CHECK-NEXT: tail call void @_Z1fi(i32 [[TMP2]])
|
|
; CHECK-NEXT: [[INDVAR_NEXT7]] = add nuw i64 [[INDVAR6]], 1
|
|
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVAR_NEXT7]], [[FLATTEN_TRIPCOUNT]]
|
|
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]]
|
|
; CHECK: for.cond.cleanup:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %for.cond
|
|
|
|
for.cond:
|
|
%i.0 = phi i32 [ 0, %entry ], [ %inc6, %for.cond.cleanup3 ]
|
|
%cmp = icmp slt i32 %i.0, %N
|
|
br i1 %cmp, label %for.body, label %for.cond.cleanup
|
|
|
|
for.cond.cleanup:
|
|
ret void
|
|
|
|
for.body:
|
|
br label %for.cond1
|
|
|
|
for.cond1:
|
|
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.body4 ]
|
|
%cmp2 = icmp slt i32 %j.0, %M
|
|
br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
|
|
|
|
for.cond.cleanup3:
|
|
%inc6 = add nsw i32 %i.0, 1
|
|
br label %for.cond
|
|
|
|
for.body4:
|
|
%mul = mul nsw i32 %i.0, %M
|
|
%add = add nsw i32 %mul, %j.0
|
|
%idxprom = sext i32 %add to i64
|
|
%arrayidx = getelementptr inbounds i32, ptr %A, i64 %idxprom
|
|
%0 = load i32, ptr %arrayidx, align 4
|
|
call void @_Z1fi(i32 %0)
|
|
%inc = add nsw i32 %j.0, 1
|
|
br label %for.cond1
|
|
}
|
|
|
|
declare dso_local void @_Z1fi(i32) #2
|