Need to use original reduced value, not the one the compiler gets after reduction, it may be replaced by the extractelement instruction already.
59 lines
2.3 KiB
LLVM
59 lines
2.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s
|
|
|
|
define i32 @foo() {
|
|
; CHECK-LABEL: @foo(
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> zeroinitializer, i32 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> zeroinitializer, zeroinitializer
|
|
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
|
|
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[TMP1]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4]])
|
|
; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 0, [[TMP5]]
|
|
; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], 0
|
|
; CHECK-NEXT: [[OP_RDX2:%.*]] = mul i32 [[TMP0]], [[TMP0]]
|
|
; CHECK-NEXT: [[OP_RDX3:%.*]] = mul i32 [[TMP0]], [[TMP0]]
|
|
; CHECK-NEXT: [[OP_RDX4:%.*]] = mul i32 [[OP_RDX1]], [[OP_RDX2]]
|
|
; CHECK-NEXT: [[OP_RDX5:%.*]] = mul i32 [[OP_RDX3]], [[TMP2]]
|
|
; CHECK-NEXT: [[OP_RDX6:%.*]] = mul i32 [[OP_RDX4]], [[OP_RDX5]]
|
|
; CHECK-NEXT: ret i32 [[OP_RDX6]]
|
|
;
|
|
bb:
|
|
%inst5 = add i32 0, 0
|
|
%0 = extractelement <2 x i32> zeroinitializer, i32 0
|
|
%inst7 = mul i32 %0, %inst5
|
|
%1 = extractelement <2 x i32> zeroinitializer, i32 0
|
|
%inst13 = mul i32 %1, %inst7
|
|
%inst14 = mul i32 %inst13, 0
|
|
%2 = extractelement <2 x i32> zeroinitializer, i32 0
|
|
%inst19 = mul i32 %2, %inst14
|
|
%inst20 = mul i32 %inst19, 0
|
|
%3 = extractelement <2 x i32> zeroinitializer, i32 0
|
|
%inst26 = mul i32 %3, %inst20
|
|
%inst27 = mul i32 %inst26, 0
|
|
%4 = or <4 x i32> zeroinitializer, zeroinitializer
|
|
%5 = extractelement <4 x i32> %4, i32 0
|
|
%inst31 = mul i32 %5, 0
|
|
%inst32 = add i32 %inst31, 0
|
|
%inst33 = mul i32 %5, %inst27
|
|
%inst34 = mul i32 %inst33, %inst32
|
|
%6 = extractelement <4 x i32> %4, i32 1
|
|
%inst39 = mul i32 %6, 0
|
|
%inst40 = add i32 %inst39, 0
|
|
%inst41 = mul i32 0, %inst34
|
|
%inst42 = mul i32 %inst41, %inst40
|
|
%7 = extractelement <4 x i32> %4, i32 2
|
|
%inst47 = mul i32 %7, 0
|
|
%inst48 = add i32 %inst47, 0
|
|
%inst49 = mul i32 0, %inst42
|
|
%inst50 = mul i32 %inst49, %inst48
|
|
%8 = extractelement <4 x i32> %4, i32 3
|
|
%inst55 = mul i32 %8, 0
|
|
%inst56 = add i32 %inst55, 0
|
|
%inst57 = mul i32 0, %inst50
|
|
%inst58 = mul i32 %inst57, %inst56
|
|
ret i32 %inst58
|
|
}
|
|
|