Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
ManuelJBrito 8b56da5e9f [IR] Change shufflevector undef mask to poison
With this patch an undefined mask in a shufflevector will be printed as poison.
This change is done to support the new shufflevector semantics
for undefined mask elements.

Differential Revision: https://reviews.llvm.org/D149210
2023-04-27 14:41:10 +01:00

118 lines
6.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-apple-macosx10.11.0 -passes=slp-vectorizer -S -mattr=+sse2 | FileCheck %s --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-apple-macosx10.11.0 -passes=slp-vectorizer -S -mattr=+avx | FileCheck %s --check-prefix=AVX
; RUN: opt < %s -mtriple=x86_64-apple-macosx10.11.0 -passes=slp-vectorizer -S -mattr=+avx2 | FileCheck %s --check-prefix=AVX
; Verify that the SLP vectorizer is able to figure out that commutativity
; offers the possibility to splat/broadcast %c and thus make it profitable
; to vectorize this case
@cle = external unnamed_addr global [32 x i8], align 16
@cle32 = external unnamed_addr global [32 x i32], align 16
; Check that we correctly detect a splat/broadcast by leveraging the
; commutativity property of `xor`.
define void @splat(i8 %a, i8 %b, i8 %c) {
; SSE-LABEL: @splat(
; SSE-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
; SSE-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[B:%.*]], i32 1
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
; SSE-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
; SSE-NEXT: [[TMP5:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> poison, <16 x i32> zeroinitializer
; SSE-NEXT: [[TMP6:%.*]] = xor <16 x i8> [[TMP3]], [[TMP5]]
; SSE-NEXT: store <16 x i8> [[TMP6]], ptr @cle, align 16
; SSE-NEXT: ret void
;
; AVX-LABEL: @splat(
; AVX-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[A:%.*]], i32 0
; AVX-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[B:%.*]], i32 1
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
; AVX-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> poison, i8 [[C:%.*]], i32 0
; AVX-NEXT: [[TMP5:%.*]] = shufflevector <16 x i8> [[TMP4]], <16 x i8> poison, <16 x i32> zeroinitializer
; AVX-NEXT: [[TMP6:%.*]] = xor <16 x i8> [[TMP3]], [[TMP5]]
; AVX-NEXT: store <16 x i8> [[TMP6]], ptr @cle, align 16
; AVX-NEXT: ret void
;
%1 = xor i8 %c, %a
store i8 %1, ptr @cle, align 16
%2 = xor i8 %a, %c
store i8 %2, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 1)
%3 = xor i8 %a, %c
store i8 %3, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 2)
%4 = xor i8 %a, %c
store i8 %4, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 3)
%5 = xor i8 %c, %a
store i8 %5, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 4)
%6 = xor i8 %c, %b
store i8 %6, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 5)
%7 = xor i8 %c, %a
store i8 %7, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 6)
%8 = xor i8 %c, %b
store i8 %8, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 7)
%9 = xor i8 %a, %c
store i8 %9, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 8)
%10 = xor i8 %a, %c
store i8 %10, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 9)
%11 = xor i8 %a, %c
store i8 %11, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 10)
%12 = xor i8 %a, %c
store i8 %12, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 11)
%13 = xor i8 %a, %c
store i8 %13, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 12)
%14 = xor i8 %a, %c
store i8 %14, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 13)
%15 = xor i8 %a, %c
store i8 %15, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 14)
%16 = xor i8 %a, %c
store i8 %16, ptr getelementptr inbounds ([32 x i8], ptr @cle, i64 0, i64 15)
ret void
}
; Check that we correctly detect that we can have the same opcode on one side by
; leveraging the commutativity property of `xor`.
define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
; SSE-LABEL: @same_opcode_on_one_side(
; SSE-NEXT: [[ADD1:%.*]] = add i32 [[C:%.*]], [[A:%.*]]
; SSE-NEXT: [[ADD2:%.*]] = add i32 [[C]], [[A]]
; SSE-NEXT: [[ADD3:%.*]] = add i32 [[A]], [[C]]
; SSE-NEXT: [[ADD4:%.*]] = add i32 [[C]], [[A]]
; SSE-NEXT: [[TMP1:%.*]] = xor i32 [[ADD1]], [[A]]
; SSE-NEXT: store i32 [[TMP1]], ptr @cle32, align 16
; SSE-NEXT: [[TMP2:%.*]] = xor i32 [[B:%.*]], [[ADD2]]
; SSE-NEXT: store i32 [[TMP2]], ptr getelementptr inbounds ([32 x i32], ptr @cle32, i64 0, i64 1), align 4
; SSE-NEXT: [[TMP3:%.*]] = xor i32 [[C]], [[ADD3]]
; SSE-NEXT: store i32 [[TMP3]], ptr getelementptr inbounds ([32 x i32], ptr @cle32, i64 0, i64 2), align 4
; SSE-NEXT: [[TMP4:%.*]] = xor i32 [[A]], [[ADD4]]
; SSE-NEXT: store i32 [[TMP4]], ptr getelementptr inbounds ([32 x i32], ptr @cle32, i64 0, i64 3), align 4
; SSE-NEXT: ret void
;
; AVX-LABEL: @same_opcode_on_one_side(
; AVX-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[C:%.*]], i32 0
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; AVX-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i32 0
; AVX-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer
; AVX-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
; AVX-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 poison, i32 4, i32 0>
; AVX-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[B:%.*]], i32 1
; AVX-NEXT: [[TMP8:%.*]] = xor <4 x i32> [[TMP5]], [[TMP7]]
; AVX-NEXT: store <4 x i32> [[TMP8]], ptr @cle32, align 16
; AVX-NEXT: ret void
;
%add1 = add i32 %c, %a
%add2 = add i32 %c, %a
%add3 = add i32 %a, %c
%add4 = add i32 %c, %a
%1 = xor i32 %add1, %a
store i32 %1, ptr @cle32, align 16
%2 = xor i32 %b, %add2
store i32 %2, ptr getelementptr inbounds ([32 x i32], ptr @cle32, i64 0, i64 1)
%3 = xor i32 %c, %add3
store i32 %3, ptr getelementptr inbounds ([32 x i32], ptr @cle32, i64 0, i64 2)
%4 = xor i32 %a, %add4
store i32 %4, ptr getelementptr inbounds ([32 x i32], ptr @cle32, i64 0, i64 3)
ret void
}