Need to fix the way the cost is calculated, otherwise wrong cast opcode can be selected and lead to the over-optimistic vector cost. Plus, need to take into account reduction type size. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/87528
50 lines
2.2 KiB
LLVM
50 lines
2.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
|
|
|
|
define i32 @test() {
|
|
; CHECK-LABEL: define i32 @test() {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i8, ptr null, align 1
|
|
; CHECK-NEXT: [[DEC_4:%.*]] = add i8 [[A_PROMOTED]], 0
|
|
; CHECK-NEXT: [[CONV_I_4:%.*]] = zext i8 [[DEC_4]] to i32
|
|
; CHECK-NEXT: [[SUB_I_4:%.*]] = add nuw nsw i32 [[CONV_I_4]], 0
|
|
; CHECK-NEXT: [[DEC_5:%.*]] = add i8 [[A_PROMOTED]], 0
|
|
; CHECK-NEXT: [[CONV_I_5:%.*]] = zext i8 [[DEC_5]] to i32
|
|
; CHECK-NEXT: [[SUB_I_5:%.*]] = add nuw nsw i32 [[CONV_I_5]], 65535
|
|
; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[SUB_I_4]], [[SUB_I_5]]
|
|
; CHECK-NEXT: [[DEC_6:%.*]] = or i8 [[A_PROMOTED]], 0
|
|
; CHECK-NEXT: [[CONV_I_6:%.*]] = zext i8 [[DEC_6]] to i32
|
|
; CHECK-NEXT: [[SUB_I_6:%.*]] = add nuw nsw i32 [[CONV_I_6]], 0
|
|
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP0]], [[SUB_I_6]]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = or i8 [[A_PROMOTED]], 0
|
|
; CHECK-NEXT: [[CONV_I_7:%.*]] = zext i8 [[TMP10]] to i32
|
|
; CHECK-NEXT: [[SUB_I_7:%.*]] = add nuw nsw i32 [[CONV_I_7]], 0
|
|
; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP1]], [[SUB_I_7]]
|
|
; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 65535
|
|
; CHECK-NEXT: store i8 [[TMP10]], ptr null, align 1
|
|
; CHECK-NEXT: [[CALL3:%.*]] = tail call i32 (ptr, ...) null(ptr null, i32 [[TMP9]])
|
|
; CHECK-NEXT: ret i32 0
|
|
;
|
|
entry:
|
|
%a.promoted = load i8, ptr null, align 1
|
|
%dec.4 = add i8 %a.promoted, 0
|
|
%conv.i.4 = zext i8 %dec.4 to i32
|
|
%sub.i.4 = add nuw nsw i32 %conv.i.4, 0
|
|
%dec.5 = add i8 %a.promoted, 0
|
|
%conv.i.5 = zext i8 %dec.5 to i32
|
|
%sub.i.5 = add nuw nsw i32 %conv.i.5, 65535
|
|
%0 = or i32 %sub.i.4, %sub.i.5
|
|
%dec.6 = or i8 %a.promoted, 0
|
|
%conv.i.6 = zext i8 %dec.6 to i32
|
|
%sub.i.6 = add nuw nsw i32 %conv.i.6, 0
|
|
%1 = or i32 %0, %sub.i.6
|
|
%dec.7 = or i8 %a.promoted, 0
|
|
%conv.i.7 = zext i8 %dec.7 to i32
|
|
%sub.i.7 = add nuw nsw i32 %conv.i.7, 0
|
|
%2 = or i32 %1, %sub.i.7
|
|
%3 = and i32 %2, 65535
|
|
store i8 %dec.7, ptr null, align 1
|
|
%call3 = tail call i32 (ptr, ...) null(ptr null, i32 %3)
|
|
ret i32 0
|
|
}
|