Files
clang-p2996/llvm/test/CodeGen/X86/zext-sext.ll
Matthias Braun 189900eb14 X86: Stop assigning register costs for longer encodings.
This stops reporting CostPerUse 1 for `R8`-`R15` and `XMM8`-`XMM31`.
This was previously done because instruction encoding require a REX
prefix when using them resulting in longer instruction encodings. I
found that this regresses the quality of the register allocation as the
costs impose an ordering on eviction candidates. I also feel that there
is a bit of an impedance mismatch as the actual costs occure when
encoding instructions using those registers, but the order of VReg
assignments is not primarily ordered by number of Defs+Uses.

I did extensive measurements with the llvm-test-suite wiht SPEC2006 +
SPEC2017 included, internal services showed similar patterns. Generally
there are a log of improvements but also a lot of regression. But on
average the allocation quality seems to improve at a small code size
regression.

Results for measuring static and dynamic instruction counts:

Dynamic Counts (scaled by execution frequency) / Optimization Remarks:
    Spills+FoldedSpills   -5.6%
    Reloads+FoldedReloads -4.2%
    Copies                -0.1%

Static / LLVM Statistics:
    regalloc.NumSpills    mean -1.6%, geomean -2.8%
    regalloc.NumReloads   mean -1.7%, geomean -3.1%
    size..text            mean +0.4%, geomean +0.4%

Static / LLVM Statistics:
    mean -2.2%, geomean -3.1%) regalloc.NumSpills
    mean -2.6%, geomean -3.9%) regalloc.NumReloads
    mean +0.6%, geomean +0.6%) size..text

Static / LLVM Statistics:
    regalloc.NumSpills   mean -3.0%
    regalloc.NumReloads  mean -3.3%
    size..text           mean +0.3%, geomean +0.3%

Differential Revision: https://reviews.llvm.org/D133902
2022-09-30 16:01:33 -07:00

82 lines
3.0 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=atom | FileCheck %s
; <rdar://problem/8006248>
; This randomly started passing after an unrelated change, if it fails again it
; might be worth looking at PR12324: misched bringup.
@llvm.used = appending global [1 x ptr] [ptr @func], section "llvm.metadata"
define void @func(ptr %a, ptr %b, ptr %c, ptr %d) nounwind {
; CHECK-LABEL: func:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movslq (%rsi), %rax
; CHECK-NEXT: movl $4, %esi
; CHECK-NEXT: subq %rax, %rsi
; CHECK-NEXT: movq (%rdx), %rax
; CHECK-NEXT: movswl 8(%rdi), %edx
; CHECK-NEXT: movswl (%rax,%rsi,2), %eax
; CHECK-NEXT: imull %edx, %eax
; CHECK-NEXT: addl $2138875574, %eax # imm = 0x7F7CA6B6
; CHECK-NEXT: cmpl $2138875574, %eax # imm = 0x7F7CA6B6
; CHECK-NEXT: setl %dl
; CHECK-NEXT: cmpl $-8608074, %eax # imm = 0xFF7CA6B6
; CHECK-NEXT: setge %sil
; CHECK-NEXT: andb %dl, %sil
; CHECK-NEXT: movzbl %sil, %edx
; CHECK-NEXT: movslq %eax, %rsi
; CHECK-NEXT: movq %rsi, %rdi
; CHECK-NEXT: negl %edx
; CHECK-NEXT: subq %rax, %rdi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testl $-2, %edx
; CHECK-NEXT: cmovneq %rax, %rdi
; CHECK-NEXT: testl %esi, %esi
; CHECK-NEXT: cmovnsq %rax, %rdi
; CHECK-NEXT: movq (%rcx), %rax
; CHECK-NEXT: subq %rdi, %rsi
; CHECK-NEXT: leaq -2138875574(%rax,%rsi), %rax
; CHECK-NEXT: movq %rax, (%rcx)
; CHECK-NEXT: retq
entry:
%tmp103 = getelementptr inbounds [40 x i16], ptr %a, i64 0, i64 4
%tmp104 = load i16, ptr %tmp103, align 2
%tmp105 = sext i16 %tmp104 to i32
%tmp106 = load i32, ptr %b, align 4
%tmp107 = sub nsw i32 4, %tmp106
%tmp108 = load ptr, ptr %c, align 8
%tmp109 = sext i32 %tmp107 to i64
%tmp110 = getelementptr inbounds i16, ptr %tmp108, i64 %tmp109
%tmp111 = load i16, ptr %tmp110, align 1
%tmp112 = sext i16 %tmp111 to i32
%tmp = mul i32 355244649, %tmp112
%tmp1 = mul i32 %tmp, %tmp105
%tmp2 = add i32 %tmp1, 2138875574
%tmp3 = add i32 %tmp2, 1546991088
%tmp4 = mul i32 %tmp3, 2122487257
%tmp5 = icmp sge i32 %tmp4, 2138875574
%tmp6 = icmp slt i32 %tmp4, -8608074
%tmp7 = or i1 %tmp5, %tmp6
%outSign = select i1 %tmp7, i32 1, i32 -1
%tmp8 = icmp slt i32 %tmp4, 0
%tmp9 = icmp eq i32 %outSign, 1
%tmp10 = and i1 %tmp8, %tmp9
%tmp11 = sext i32 %tmp4 to i64
%tmp12 = add i64 %tmp11, 5089792279245435153
%tmp13 = sub i64 %tmp12, 2138875574
%tmp14 = zext i32 %tmp4 to i64
%tmp15 = sub i64 %tmp11, %tmp14
%tmp16 = select i1 %tmp10, i64 %tmp15, i64 0
%tmp17 = sub i64 %tmp13, %tmp16
%tmp18 = mul i64 %tmp17, 4540133155013554595
%tmp19 = sub i64 %tmp18, 5386586244038704851
%tmp20 = add i64 %tmp19, -1368057358110947217
%tmp21 = mul i64 %tmp20, -422037402840850817
%tmp115 = load i64, ptr %d, align 8
%alphaX = mul i64 468858157810230901, %tmp21
%alphaXbetaY = add i64 %alphaX, %tmp115
%transformed = add i64 %alphaXbetaY, 9040145182981852475
store i64 %transformed, ptr %d, align 8
ret void
}