The first attempt missed changing test files for tools (update_llc_test_checks.py). Original commit message: This implements the main suggested change from issue #56498. Using the shorter (non-extending) instruction with only -Oz ("minsize") rather than -Os ("optsize") is left as a possible follow-up. As noted in the bug report, the zero-extending load may have shorter latency/better throughput across a wide range of x86 micro-arches, and it avoids a potential false dependency. The cost is an extra instruction byte. This could cause perf ups and downs from secondary effects, but I don't think it is possible to account for those in advance, and that will likely also depend on exact micro-arch. This does bring LLVM x86 codegen more in line with existing gcc codegen, so if problems are exposed they are more likely to occur for both compilers. Differential Revision: https://reviews.llvm.org/D129775
27 lines
696 B
LLVM
27 lines
696 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=i686-- | FileCheck %s
|
|
|
|
define i8 @test(ptr%P) nounwind {
|
|
; CHECK-LABEL: test:
|
|
; CHECK: # %bb.0:
|
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; CHECK-NEXT: cmpb $0, 4(%eax)
|
|
; CHECK-NEXT: je .LBB0_1
|
|
; CHECK-NEXT: # %bb.2: # %F
|
|
; CHECK-NEXT: movzbl 7(%eax), %eax
|
|
; CHECK-NEXT: retl
|
|
; CHECK-NEXT: .LBB0_1: # %TB
|
|
; CHECK-NEXT: movb $4, %al
|
|
; CHECK-NEXT: retl
|
|
%Q = getelementptr i32, ptr %P, i32 1
|
|
%S = load i8, ptr %Q
|
|
%T = icmp eq i8 %S, 0
|
|
br i1 %T, label %TB, label %F
|
|
TB:
|
|
ret i8 4
|
|
F:
|
|
%U = getelementptr i8, ptr %Q, i32 3
|
|
%V = load i8, ptr %U
|
|
ret i8 %V
|
|
}
|