Files
clang-p2996/llvm/test/Transforms/LoopVectorize/reduction-order.ll
David Green be6e8e50f4 [LV] Tail folded inloop reductions.
This expands upon the inloop reductions added in e9761688e41cb9e976,
allowing them to be inserted into tail folded loops. Reductions are
generates with the form:

  x = select(mask, vecop, zero)
  v = vecreduce.add(x)
  c = add chain, v

Where zero here is chosen as the identity value for add reductions. The
backend is then expected to fold the select and the vecreduce into a
single predicated instruction.

Most of the code is fairly straight forward, except for the creation of
blockmasks which need to ensure they are created in dominance order. The
order they are added is altered to be after any phis, keeping the
requirements for the underlying IR.

Differential Revision: https://reviews.llvm.org/D84451
2020-10-11 16:58:34 +01:00

42 lines
1.5 KiB
LLVM

; RUN: opt -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S < %s 2>&1 | FileCheck %s
; RUN: opt -passes='loop-vectorize' -force-vector-width=4 -force-vector-interleave=1 -S < %s 2>&1 | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
; Make sure the selects generated from reduction are always emitted
; in deterministic order.
; CHECK-LABEL: @foo(
; CHECK: vector.body:
; CHECK: icmp ule <4 x i64>
; CHECK-NEXT: %[[VAR1:.*]] = add <4 x i32> <i32 3, i32 3, i32 3, i32 3>, %vec.phi1
; CHECK-NEXT: %[[VAR2:.*]] = add <4 x i32> %vec.phi, <i32 5, i32 5, i32 5, i32 5>
; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> %[[VAR2]], <4 x i32>
; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> %[[VAR1]], <4 x i32>
; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
;
define internal i64 @foo(i32* %t0) !prof !1 {
t16:
br label %t20
t17: ; preds = %t20
%t18 = phi i32 [ %t24, %t20 ]
%t19 = phi i32 [ %t28, %t20 ]
br label %t31
t20: ; preds = %t20, %t16
%t21 = phi i64 [ 0, %t16 ], [ %t29, %t20 ]
%t22 = phi i32 [ 0, %t16 ], [ %t28, %t20 ]
%t23 = phi i32 [ 0, %t16 ], [ %t24, %t20 ]
%t24 = add i32 3, %t23
%t28 = add i32 %t22, 5
%t29 = add nuw nsw i64 %t21, 1
%t30 = icmp eq i64 %t29, undef
br i1 %t30, label %t17, label %t20, !prof !2
t31:
ret i64 undef
}
!1 = !{!"function_entry_count", i64 801}
!2 = !{!"branch_weights", i32 746, i32 1}