SLP includes analysis for the minimum bitwidth, the actual integer operations can be emitted. It allows to reduce register pressure and improve perf. Currently, it includes only cost model and the next transformation relies on InstructionCombiner. Better to do it directly in SLP, it allows to reduce compile time and fix cost model issues.
30 lines
1.4 KiB
LLVM
30 lines
1.4 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt --passes=slp-vectorizer -slp-threshold=-25 -mtriple=x86_64-unknown-linux-gnu -S < %s | FileCheck %s
|
|
|
|
define ptr @test(i8 %0) {
|
|
; CHECK-LABEL: define ptr @test(
|
|
; CHECK-SAME: i8 [[TMP0:%.*]]) {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: [[CONV12_I:%.*]] = zext i8 [[TMP0]] to i32
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> <i32 0, i32 poison>, i32 [[CONV12_I]], i32 1
|
|
; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], zeroinitializer
|
|
; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
|
|
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i8> [[TMP3]], i32 0
|
|
; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX50_I:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]]
|
|
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i8> [[TMP3]], i32 1
|
|
; CHECK-NEXT: [[TMP7:%.*]] = zext i8 [[TMP6]] to i64
|
|
; CHECK-NEXT: [[ARRAYIDX16_I:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]]
|
|
; CHECK-NEXT: ret ptr null
|
|
;
|
|
entry:
|
|
%shr48.i = lshr i32 0, 0
|
|
%idxprom49.i = zext nneg i32 %shr48.i to i64
|
|
%arrayidx50.i = getelementptr i8, ptr null, i64 %idxprom49.i
|
|
%conv12.i = zext i8 %0 to i32
|
|
%shr14.i = lshr i32 %conv12.i, 0
|
|
%idxprom15.i = zext nneg i32 %shr14.i to i64
|
|
%arrayidx16.i = getelementptr i8, ptr null, i64 %idxprom15.i
|
|
ret ptr null
|
|
}
|