Files
clang-p2996/llvm/test/CodeGen/X86/pr34381.ll
Sanjay Patel f0dd12ec5c [x86] use zero-extending load of a byte outside of loops too (2nd try)
The first attempt missed changing test files for tools
(update_llc_test_checks.py).

Original commit message:

This implements the main suggested change from issue #56498.
Using the shorter (non-extending) instruction with only
-Oz ("minsize") rather than -Os ("optsize") is left as a
possible follow-up.

As noted in the bug report, the zero-extending load may have
shorter latency/better throughput across a wide range of x86
micro-arches, and it avoids a potential false dependency.
The cost is an extra instruction byte.

This could cause perf ups and downs from secondary effects,
but I don't think it is possible to account for those in
advance, and that will likely also depend on exact micro-arch.
This does bring LLVM x86 codegen more in line with existing
gcc codegen, so if problems are exposed they are more likely
to occur for both compilers.

Differential Revision: https://reviews.llvm.org/D129775
2022-07-19 21:27:08 -04:00

42 lines
1.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
;RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=slow-incdec | FileCheck %s
@var_21 = external dso_local constant i32, align 4
@var_29 = external dso_local constant i8, align 1
@var_390 = external dso_local global i32, align 4
@var_11 = external dso_local constant i8, align 1
@var_370 = external dso_local global i8, align 1
; Function Attrs: noinline nounwind optnone uwtable
define void @_Z3foov() {
; CHECK-LABEL: _Z3foov:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movsbl var_29(%rip), %eax
; CHECK-NEXT: negl %eax
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: cmpl %eax, var_21(%rip)
; CHECK-NEXT: setb %cl
; CHECK-NEXT: movl %ecx, var_390(%rip)
; CHECK-NEXT: movzbl var_11(%rip), %eax
; CHECK-NEXT: movb %al, var_370(%rip)
; CHECK-NEXT: retq
entry:
%0 = load i32, ptr @var_21, align 4
%1 = load i8, ptr @var_29, align 1
%conv = sext i8 %1 to i32
%sub = sub nsw i32 0, %conv
%cmp = icmp ult i32 %0, %sub
%conv1 = zext i1 %cmp to i32
%add = add nsw i32 %conv1, -1
%conv2 = trunc i32 %add to i8
%tobool = icmp ne i8 %conv2, 0
%lnot = xor i1 %tobool, true
%conv3 = zext i1 %lnot to i32
store i32 %conv3, ptr @var_390, align 4
%2 = load i8, ptr @var_11, align 1
%conv4 = sext i8 %2 to i16
%conv5 = trunc i16 %conv4 to i8
store i8 %conv5, ptr @var_370, align 1
ret void
}