Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/ordering-bug.ll
Alexey Bataev b10ecfa914 [SLP]Represent externally used values as original scalars, if profitable.
Currently SLP vectorizer tries to keep only GEPs as scalar, if they are
vectorized but used externally. Same approach can be used for all scalar
values. This patch tries to keep original scalars if all its operands
remain scalar or externally used, the cost of the original scalar is
lower than the cost of the extractelement instruction, or if the number
of externally used scalars in the same entry is power of 2. Last
criterion allows better revectorization for multiply used scalars.

Reviewers: RKSimon

Reviewed By: RKSimon

Pull Request: https://github.com/llvm/llvm-project/pull/100904
2024-08-12 10:15:02 -04:00

73 lines
3.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -mtriple=i386-linux-gnu -passes=slp-vectorizer -S %s | FileCheck %s
%struct.a = type { [2 x i64] }
@a = external global %struct.a
@b = external global %struct.a
@c = external global %struct.a
define void @f(i1 %x) #0 {
; CHECK-LABEL: @f(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A1:%.*]] = load i64, ptr getelementptr inbounds ([[STRUCT_A:%.*]], ptr @a, i32 0, i32 0, i32 1), align 8
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @a, align 8
; CHECK-NEXT: br i1 [[X:%.*]], label [[WHILE_BODY_LR_PH:%.*]], label [[WHILE_END:%.*]]
; CHECK: while.body.lr.ph:
; CHECK-NEXT: [[ICMP_A1:%.*]] = icmp eq i64 [[A1]], 0
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @b, align 8
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i1> poison, i1 [[ICMP_A1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = select <2 x i1> [[TMP3]], <2 x i64> [[TMP1]], <2 x i64> [[TMP0]]
; CHECK-NEXT: br label [[WHILE_END]]
; CHECK: while.end:
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x i64> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP4]], [[WHILE_BODY_LR_PH]] ]
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr @c, align 8
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
; CHECK-NEXT: [[ICMP_D0:%.*]] = icmp eq i64 [[TMP7]], 0
; CHECK-NEXT: br i1 [[ICMP_D0]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[AND0_TMP:%.*]] = and i64 [[TMP7]], 8
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[AND0_TMP]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP5]], <2 x i32> <i32 0, i32 3>
; CHECK-NEXT: [[TMP10:%.*]] = and <2 x i64> [[TMP9]], [[TMP6]]
; CHECK-NEXT: store <2 x i64> [[TMP10]], ptr @a, align 8
; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: ret void
;
entry:
%a0 = load i64, ptr @a, align 8
%a1 = load i64, ptr getelementptr inbounds (%struct.a, ptr @a, i32 0, i32 0, i32 1), align 8
br i1 %x, label %while.body.lr.ph, label %while.end
while.body.lr.ph:
%icmp.a1 = icmp eq i64 %a1, 0
%b0 = load i64, ptr @b, align 8
%b1 = load i64, ptr getelementptr inbounds (%struct.a, ptr @b, i32 0, i32 0, i32 1), align 8
%c0 = select i1 %icmp.a1, i64 %b0, i64 %a0
%c1 = select i1 %icmp.a1, i64 %b1, i64 %a1
br label %while.end
while.end:
%d0 = phi i64 [ %a0, %entry ], [ %c0, %while.body.lr.ph ]
%d1 = phi i64 [ %a1, %entry ], [ %c1, %while.body.lr.ph ]
%e0 = load i64, ptr @c, align 8
%e1 = load i64, ptr getelementptr inbounds (%struct.a, ptr @c, i32 0, i32 0, i32 1), align 8
%icmp.d0 = icmp eq i64 %d0, 0
br i1 %icmp.d0, label %if.end, label %if.then
if.then:
%and0.tmp = and i64 %d0, 8
%and0 = and i64 %and0.tmp, %e0
%and1 = and i64 %e1, %d1
store i64 %and0, ptr @a, align 8
store i64 %and1, ptr getelementptr inbounds (%struct.a, ptr @a, i32 0, i32 0, i32 1), align 8
br label %if.end
if.end:
ret void
}
attributes #0 = { "target-features"="+sse2" }