[InstCombine] Add oneuse checks to shr + cmp constant folds.
This change has virtually no code size regressions on the llvm test suite (+ SPECs) while having these improvements (measured with -Os on Darwin arm64): External/S.../CFP2006/450.soplex/450.soplex 214024.00 213920.00 -0.0% External/S...7speed/641.leela_s/641.leela_s 93412.00 93348.00 -0.1% External/S...17rate/541.leela_r/541.leela_r 93412.00 93348.00 -0.1% MultiSourc.../Applications/JM/lencod/lencod 426044.00 425748.00 -0.1% MultiSourc...rks/mediabench/gsm/toast/toast 20436.00 20416.00 -0.1% MultiSourc...ench/telecomm-gsm/telecomm-gsm 20436.00 20416.00 -0.1% MultiSourc...Prolangs-C/assembler/assembler 16172.00 16156.00 -0.1% MultiSourc...nch/mpeg2/mpeg2dec/mpeg2decode 35332.00 35256.00 -0.2% SingleSour...Adobe-C++/stepanov_abstraction 6904.00 6888.00 -0.2% External/SPEC/CINT2000/254.gap/254.gap 366060.00 365132.00 -0.3% MultiSourc...-ProxyApps-C++/PENNANT/PENNANT 79688.00 79484.00 -0.3% External/S...NT2006/464.h264ref/464.h264ref 352044.00 351132.00 -0.3% SingleSour...arks/Adobe-C++/functionobjects 15524.00 15480.00 -0.3% SingleSour...arks/Adobe-C++/stepanov_vector 10728.00 10696.00 -0.3% SingleSour...ks/Misc-C++/stepanov_container 16900.00 16848.00 -0.3% MultiSource/Applications/oggenc/oggenc 124184.00 123780.00 -0.3% SingleSour...tout-C++/Shootout-C++-wordfreq 7060.00 7036.00 -0.3% MultiSourc...ity-rijndael/security-rijndael 8976.00 8936.00 -0.4% MultiSource/Benchmarks/McCat/18-imp/imp 9816.00 9772.00 -0.4% SingleSour...chmarks/Misc-C++/stepanov_v1p2 1772.00 1764.00 -0.5% MultiSourc...iabench/g721/g721encode/encode 5492.00 5464.00 -0.5% MultiSourc...rks/McCat/03-testtrie/testtrie 1364.00 1344.00 -1.5% SingleSour.../execute/GCC-C-execute-pr42833 400.00 364.00 -9.0% Doing so also prevents a regression described in https://reviews.llvm.org/D143624 Differential Revision: https://reviews.llvm.org/D149918
This commit is contained in:
@@ -2451,7 +2451,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
|
||||
// constant-value-based preconditions in the folds below, then we could assert
|
||||
// those conditions rather than checking them. This is difficult because of
|
||||
// undef/poison (PR34838).
|
||||
if (IsAShr) {
|
||||
if (IsAShr && Shr->hasOneUse()) {
|
||||
if (IsExact || Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) {
|
||||
// When ShAmtC can be shifted losslessly:
|
||||
// icmp PRED (ashr exact X, ShAmtC), C --> icmp PRED X, (C << ShAmtC)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
||||
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
|
||||
|
||||
; This test is pre-committed to show sub-optimal codegen due to
|
||||
; min/max idiom breakage. On AArch64, these constants are also expensive to materialize,
|
||||
; Check we don't have sub-optimal codegen due to min/max idiom breakage.
|
||||
; On AArch64, these constants are also expensive to materialize,
|
||||
; and therefore generate poor code vs maintaining the min/max idiom.
|
||||
|
||||
define i64 @dont_break_minmax_i64(i64 %conv, i64 %conv2) {
|
||||
@@ -10,8 +10,7 @@ define i64 @dont_break_minmax_i64(i64 %conv, i64 %conv2) {
|
||||
; CHECK-SAME: (i64 [[CONV:%.*]], i64 [[CONV2:%.*]]) {
|
||||
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV2]]
|
||||
; CHECK-NEXT: [[SHR:%.*]] = ashr i64 [[MUL]], 4
|
||||
; CHECK-NEXT: [[CMP4_I:%.*]] = icmp slt i64 [[MUL]], 5579712
|
||||
; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CMP4_I]], i64 [[SHR]], i64 348731
|
||||
; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = call i64 @llvm.smin.i64(i64 [[SHR]], i64 348731)
|
||||
; CHECK-NEXT: ret i64 [[SPEC_SELECT_I]]
|
||||
;
|
||||
%mul = mul nsw i64 %conv, %conv2
|
||||
|
||||
@@ -900,7 +900,7 @@ define i1 @ashrsgt_01_00(i4 %x) {
|
||||
define i1 @ashrsgt_01_00_multiuse(i4 %x, ptr %p) {
|
||||
; CHECK-LABEL: @ashrsgt_01_00_multiuse(
|
||||
; CHECK-NEXT: [[S:%.*]] = ashr i4 [[X:%.*]], 1
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[X]], 1
|
||||
; CHECK-NEXT: [[C:%.*]] = icmp sgt i4 [[S]], 0
|
||||
; CHECK-NEXT: store i4 [[S]], ptr [[P:%.*]], align 1
|
||||
; CHECK-NEXT: ret i1 [[C]]
|
||||
;
|
||||
|
||||
@@ -5,8 +5,7 @@ define i32 @testa(i32 %mul) {
|
||||
; CHECK-LABEL: define i32 @testa(
|
||||
; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 15
|
||||
; CHECK-NEXT: [[CMP4_I:%.*]] = icmp slt i32 [[MUL]], 1073741824
|
||||
; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CMP4_I]], i32 [[SHR]], i32 32767
|
||||
; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = tail call i32 @llvm.smin.i32(i32 [[SHR]], i32 32767)
|
||||
; CHECK-NEXT: ret i32 [[SPEC_SELECT_I]]
|
||||
;
|
||||
%shr = ashr i32 %mul, 15
|
||||
@@ -20,11 +19,8 @@ define i32 @testb(i32 %mul) {
|
||||
; CHECK-LABEL: define i32 @testb(
|
||||
; CHECK-SAME: i32 [[MUL:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[SHR102:%.*]] = ashr i32 [[MUL]], 7
|
||||
; CHECK-NEXT: [[CMP4_I:%.*]] = icmp sgt i32 [[MUL]], 16383
|
||||
; CHECK-NEXT: [[RETVAL_0_I:%.*]] = select i1 [[CMP4_I]], i32 127, i32 -128
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[MUL]], 16384
|
||||
; CHECK-NEXT: [[CLEANUP_DEST_SLOT_0_I:%.*]] = icmp ult i32 [[TMP1]], 32768
|
||||
; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CLEANUP_DEST_SLOT_0_I]], i32 [[SHR102]], i32 [[RETVAL_0_I]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[SHR102]], i32 -128)
|
||||
; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = tail call i32 @llvm.smin.i32(i32 [[TMP1]], i32 127)
|
||||
; CHECK-NEXT: ret i32 [[SPEC_SELECT_I]]
|
||||
;
|
||||
%shr102 = ashr i32 %mul, 7
|
||||
|
||||
Reference in New Issue
Block a user