Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/reduced-gathered-vectorized.ll
Alexey Bataev 2e972ea056 [SLP]Integrate looking through shuffles logic into ShuffleInstructionBuilder.
Added BaseShuffleAnalysis as a base class for ShuffleInstructionBuilder
and integrated shuffle logic from shuffles for externally used scalars
into this class. This class is used as the main container that
implements smart shuffle instruction builder logic.
ShuffleInstructionBuilder uses this logic.
ShuffleInstructionBuilder is also used in building of the shuffle for
the externally used scalars instead of lambdas, which are now part of BaseShuffleAnalysis class.

Differential Revision: https://reviews.llvm.org/D140100
2022-12-21 06:12:53 -08:00

101 lines
6.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
define i16 @test() {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 5
; CHECK-NEXT: [[A1:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 6
; CHECK-NEXT: [[A2:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 7
; CHECK-NEXT: [[A3:%.*]] = getelementptr [1000 x i64], ptr null, i64 0, i64 8
; CHECK-NEXT: br label [[WHILE:%.*]]
; CHECK: while:
; CHECK-NEXT: [[PH:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX12:%.*]], [[WHILE]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr null, align 8
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A2]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr null, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 0>
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i64> poison, i64 [[TMP3]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i64> [[TMP6]], <16 x i64> [[TMP8]], <16 x i32> <i32 0, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <16 x i64> [[TMP9]], <16 x i64> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i64> [[TMP11]], <16 x i64> [[TMP8]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 17, i32 17, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x i64> [[TMP13]], i64 [[TMP0]], i32 9
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i64> [[TMP14]], i64 [[TMP0]], i32 10
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x i64> [[TMP15]], i64 [[TMP0]], i32 11
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i64> [[TMP16]], <16 x i64> [[TMP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[A1]], align 16
; CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr [[A2]], align 8
; CHECK-NEXT: [[TMP21:%.*]] = load i64, ptr [[A3]], align 16
; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> [[TMP18]])
; CHECK-NEXT: [[OP_RDX:%.*]] = xor i64 [[TMP22]], [[TMP3]]
; CHECK-NEXT: [[OP_RDX1:%.*]] = xor i64 [[TMP3]], [[TMP3]]
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
; CHECK-NEXT: [[OP_RDX2:%.*]] = xor i64 [[TMP3]], [[TMP23]]
; CHECK-NEXT: [[OP_RDX3:%.*]] = xor i64 [[TMP23]], [[TMP19]]
; CHECK-NEXT: [[OP_RDX4:%.*]] = xor i64 [[TMP19]], [[TMP19]]
; CHECK-NEXT: [[OP_RDX5:%.*]] = xor i64 [[TMP20]], [[TMP20]]
; CHECK-NEXT: [[OP_RDX6:%.*]] = xor i64 [[TMP21]], [[TMP21]]
; CHECK-NEXT: [[OP_RDX7:%.*]] = xor i64 [[OP_RDX]], [[OP_RDX1]]
; CHECK-NEXT: [[OP_RDX8:%.*]] = xor i64 [[OP_RDX2]], [[OP_RDX3]]
; CHECK-NEXT: [[OP_RDX9:%.*]] = xor i64 [[OP_RDX4]], [[OP_RDX5]]
; CHECK-NEXT: [[OP_RDX10:%.*]] = xor i64 [[OP_RDX7]], [[OP_RDX8]]
; CHECK-NEXT: [[OP_RDX11:%.*]] = xor i64 [[OP_RDX9]], [[OP_RDX6]]
; CHECK-NEXT: [[OP_RDX12]] = xor i64 [[OP_RDX10]], [[OP_RDX11]]
; CHECK-NEXT: br label [[WHILE]]
;
entry:
%a = getelementptr [1000 x i64], ptr null, i64 0, i64 5
%a1 = getelementptr [1000 x i64], ptr null, i64 0, i64 6
%a2 = getelementptr [1000 x i64], ptr null, i64 0, i64 7
%a3 = getelementptr [1000 x i64], ptr null, i64 0, i64 8
br label %while
while:
%ph = phi i64 [ 0, %entry ], [ %xor, %while ]
%0 = load i64, ptr null, align 8
%1 = load i64, ptr %a1, align 16
%2 = load i64, ptr %a2, align 8
%3 = load i64, ptr %a3, align 16
%4 = load i64, ptr null, align 8
%5 = load i64, ptr %a, align 8
%6 = load i64, ptr %a1, align 16
%7 = load i64, ptr %a2, align 8
%8 = load i64, ptr %a3, align 16
%9 = xor i64 %0, %1
%10 = xor i64 %9, %2
%11 = xor i64 %10, %3
%12 = xor i64 %11, %4
%13 = xor i64 %12, %0
%14 = xor i64 %13, %1
%15 = xor i64 %14, %2
%16 = xor i64 %15, %3
%17 = xor i64 %16, %4
%18 = xor i64 %17, %0
%19 = xor i64 %18, %1
%20 = xor i64 %19, %2
%21 = xor i64 %20, %3
%22 = xor i64 %21, %4
%23 = xor i64 %22, %5
%24 = xor i64 %23, %6
%25 = xor i64 %24, %2
%26 = xor i64 %25, %3
%27 = xor i64 %26, %4
%28 = xor i64 %27, %5
%29 = xor i64 %28, %6
%30 = xor i64 %29, %7
%31 = xor i64 %30, %8
%32 = xor i64 %31, %4
%33 = xor i64 %32, %5
%34 = xor i64 %33, %6
%35 = xor i64 %34, %7
%xor = xor i64 %35, %8
br label %while
}