Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/reduction-extracted-value.ll
Alexey Bataev 5f53e85f8a [SLP]Fix a crash when trying to find reduced ops for the reduced value.
Need to use original reduced value, not the one the compiler gets after
reduction, it may be replaced by the extractelement instruction already.
2023-02-27 07:32:36 -08:00

59 lines
2.3 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s
define i32 @foo() {
; CHECK-LABEL: @foo(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> zeroinitializer, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> zeroinitializer, zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 0, [[TMP5]]
; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], 0
; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[TMP0]], [[TMP0]]
; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[TMP0]], [[TMP0]]
; CHECK-NEXT: [[OP_RDX4:%.*]] = mul i32 [[OP_RDX1]], [[OP_RDX2]]
; CHECK-NEXT: [[OP_RDX5:%.*]] = mul i32 [[OP_RDX3]], [[TMP2]]
; CHECK-NEXT: [[OP_RDX6:%.*]] = mul i32 [[OP_RDX4]], [[OP_RDX5]]
; CHECK-NEXT: ret i32 [[OP_RDX6]]
;
bb:
%inst5 = add i32 0, 0
%0 = extractelement <2 x i32> zeroinitializer, i32 0
%inst7 = mul i32 %0, %inst5
%1 = extractelement <2 x i32> zeroinitializer, i32 0
%inst13 = mul i32 %1, %inst7
%inst14 = mul i32 %inst13, 0
%2 = extractelement <2 x i32> zeroinitializer, i32 0
%inst19 = mul i32 %2, %inst14
%inst20 = mul i32 %inst19, 0
%3 = extractelement <2 x i32> zeroinitializer, i32 0
%inst26 = mul i32 %3, %inst20
%inst27 = mul i32 %inst26, 0
%4 = or <4 x i32> zeroinitializer, zeroinitializer
%5 = extractelement <4 x i32> %4, i32 0
%inst31 = mul i32 %5, 0
%inst32 = add i32 %inst31, 0
%inst33 = mul i32 %5, %inst27
%inst34 = mul i32 %inst33, %inst32
%6 = extractelement <4 x i32> %4, i32 1
%inst39 = mul i32 %6, 0
%inst40 = add i32 %inst39, 0
%inst41 = mul i32 0, %inst34
%inst42 = mul i32 %inst41, %inst40
%7 = extractelement <4 x i32> %4, i32 2
%inst47 = mul i32 %7, 0
%inst48 = add i32 %inst47, 0
%inst49 = mul i32 0, %inst42
%inst50 = mul i32 %inst49, %inst48
%8 = extractelement <4 x i32> %4, i32 3
%inst55 = mul i32 %8, 0
%inst56 = add i32 %inst55, 0
%inst57 = mul i32 0, %inst50
%inst58 = mul i32 %inst57, %inst56
ret i32 %inst58
}