Files
clang-p2996/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll
Alexey Bataev a612524197 [SLP]Fix the cost of the reduction result to the final type.
Need to fix the way the cost is calculated, otherwise wrong cast opcode
can be selected and lead to the over-optimistic vector cost. Plus, need
to take into account reduction type size.

Reviewers: RKSimon

Reviewed By: RKSimon

Pull Request: https://github.com/llvm/llvm-project/pull/87528
2024-04-07 09:51:47 -04:00

50 lines
2.2 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
define i32 @test() {
; CHECK-LABEL: define i32 @test() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_PROMOTED:%.*]] = load i8, ptr null, align 1
; CHECK-NEXT: [[DEC_4:%.*]] = add i8 [[A_PROMOTED]], 0
; CHECK-NEXT: [[CONV_I_4:%.*]] = zext i8 [[DEC_4]] to i32
; CHECK-NEXT: [[SUB_I_4:%.*]] = add nuw nsw i32 [[CONV_I_4]], 0
; CHECK-NEXT: [[DEC_5:%.*]] = add i8 [[A_PROMOTED]], 0
; CHECK-NEXT: [[CONV_I_5:%.*]] = zext i8 [[DEC_5]] to i32
; CHECK-NEXT: [[SUB_I_5:%.*]] = add nuw nsw i32 [[CONV_I_5]], 65535
; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[SUB_I_4]], [[SUB_I_5]]
; CHECK-NEXT: [[DEC_6:%.*]] = or i8 [[A_PROMOTED]], 0
; CHECK-NEXT: [[CONV_I_6:%.*]] = zext i8 [[DEC_6]] to i32
; CHECK-NEXT: [[SUB_I_6:%.*]] = add nuw nsw i32 [[CONV_I_6]], 0
; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP0]], [[SUB_I_6]]
; CHECK-NEXT: [[TMP10:%.*]] = or i8 [[A_PROMOTED]], 0
; CHECK-NEXT: [[CONV_I_7:%.*]] = zext i8 [[TMP10]] to i32
; CHECK-NEXT: [[SUB_I_7:%.*]] = add nuw nsw i32 [[CONV_I_7]], 0
; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP1]], [[SUB_I_7]]
; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 65535
; CHECK-NEXT: store i8 [[TMP10]], ptr null, align 1
; CHECK-NEXT: [[CALL3:%.*]] = tail call i32 (ptr, ...) null(ptr null, i32 [[TMP9]])
; CHECK-NEXT: ret i32 0
;
entry:
%a.promoted = load i8, ptr null, align 1
%dec.4 = add i8 %a.promoted, 0
%conv.i.4 = zext i8 %dec.4 to i32
%sub.i.4 = add nuw nsw i32 %conv.i.4, 0
%dec.5 = add i8 %a.promoted, 0
%conv.i.5 = zext i8 %dec.5 to i32
%sub.i.5 = add nuw nsw i32 %conv.i.5, 65535
%0 = or i32 %sub.i.4, %sub.i.5
%dec.6 = or i8 %a.promoted, 0
%conv.i.6 = zext i8 %dec.6 to i32
%sub.i.6 = add nuw nsw i32 %conv.i.6, 0
%1 = or i32 %0, %sub.i.6
%dec.7 = or i8 %a.promoted, 0
%conv.i.7 = zext i8 %dec.7 to i32
%sub.i.7 = add nuw nsw i32 %conv.i.7, 0
%2 = or i32 %1, %sub.i.7
%3 = and i32 %2, 65535
store i8 %dec.7, ptr null, align 1
%call3 = tail call i32 (ptr, ...) null(ptr null, i32 %3)
ret i32 0
}