Files
clang-p2996/llvm/test/CodeGen/X86/copy-eflags.ll
Chandler Carruth 5ecd81aab0 [x86][eflags] Fix PR37431 by teaching the EFLAGS copy lowering to
specially handle SETB_C* pseudo instructions.

Summary:
While the logic here is somewhat similar to the arithmetic lowering, it
is different enough that it made sense to have its own function.
I actually tried a bunch of different optimizations here and none worked
well so I gave up and just always do the arithmetic based lowering.

Looking at code from the PR test case, we actually pessimize a bunch of
code when generating these. Because SETB_C* pseudo instructions clobber
EFLAGS, we end up creating a bunch of copies of EFLAGS to feed multiple
SETB_C* pseudos from a single set of EFLAGS. This in turn causes the
lowering code to ruin all the clever code generation that SETB_C* was
hoping to achieve. None of this is needed. Whenever we're generating
multiple SETB_C* instructions from a single set of EFLAGS we should
instead generate a single maximally wide one and extract subregs for all
the different desired widths. That would result in substantially better
code generation. But this patch doesn't attempt to address that.

The test case from the PR is included as well as more directed testing
of the specific lowering pattern used for these pseudos.

Reviewers: craig.topper

Subscribers: sanjoy, mcrosier, llvm-commits, hiraditya

Differential Revision: https://reviews.llvm.org/D46799

llvm-svn: 332389
2018-05-15 20:16:57 +00:00

366 lines
11 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X32
; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X64
;
; Test patterns that require preserving and restoring flags.
@b = common global i8 0, align 1
@c = common global i32 0, align 4
@a = common global i8 0, align 1
@d = common global i8 0, align 1
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
declare void @external(i32)
; A test that re-uses flags in interesting ways due to volatile accesses.
; Specifically, the first increment's flags are reused for the branch despite
; being clobbered by the second increment.
define i32 @test1() nounwind {
; X32-LABEL: test1:
; X32: # %bb.0: # %entry
; X32-NEXT: movb b, %cl
; X32-NEXT: movl %ecx, %eax
; X32-NEXT: incb %al
; X32-NEXT: movb %al, b
; X32-NEXT: incl c
; X32-NEXT: sete %dl
; X32-NEXT: movb a, %ah
; X32-NEXT: movb %ah, %ch
; X32-NEXT: incb %ch
; X32-NEXT: cmpb %cl, %ah
; X32-NEXT: sete d
; X32-NEXT: movb %ch, a
; X32-NEXT: testb %dl, %dl
; X32-NEXT: jne .LBB0_2
; X32-NEXT: # %bb.1: # %if.then
; X32-NEXT: movsbl %al, %eax
; X32-NEXT: pushl %eax
; X32-NEXT: calll external
; X32-NEXT: addl $4, %esp
; X32-NEXT: .LBB0_2: # %if.end
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: retl
;
; X64-LABEL: test1:
; X64: # %bb.0: # %entry
; X64-NEXT: movb {{.*}}(%rip), %dil
; X64-NEXT: movl %edi, %eax
; X64-NEXT: incb %al
; X64-NEXT: movb %al, {{.*}}(%rip)
; X64-NEXT: incl {{.*}}(%rip)
; X64-NEXT: sete %sil
; X64-NEXT: movb {{.*}}(%rip), %cl
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: incb %dl
; X64-NEXT: cmpb %dil, %cl
; X64-NEXT: sete {{.*}}(%rip)
; X64-NEXT: movb %dl, {{.*}}(%rip)
; X64-NEXT: testb %sil, %sil
; X64-NEXT: jne .LBB0_2
; X64-NEXT: # %bb.1: # %if.then
; X64-NEXT: pushq %rax
; X64-NEXT: movsbl %al, %edi
; X64-NEXT: callq external
; X64-NEXT: addq $8, %rsp
; X64-NEXT: .LBB0_2: # %if.end
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
entry:
%bval = load i8, i8* @b
%inc = add i8 %bval, 1
store volatile i8 %inc, i8* @b
%cval = load volatile i32, i32* @c
%inc1 = add nsw i32 %cval, 1
store volatile i32 %inc1, i32* @c
%aval = load volatile i8, i8* @a
%inc2 = add i8 %aval, 1
store volatile i8 %inc2, i8* @a
%cmp = icmp eq i8 %aval, %bval
%conv5 = zext i1 %cmp to i8
store i8 %conv5, i8* @d
%tobool = icmp eq i32 %inc1, 0
br i1 %tobool, label %if.end, label %if.then
if.then:
%conv6 = sext i8 %inc to i32
call void @external(i32 %conv6)
br label %if.end
if.end:
ret i32 0
}
; Preserve increment flags across a call.
define i32 @test2(i32* %ptr) nounwind {
; X32-LABEL: test2:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: incl (%eax)
; X32-NEXT: setne %bl
; X32-NEXT: pushl $42
; X32-NEXT: calll external
; X32-NEXT: addl $4, %esp
; X32-NEXT: testb %bl, %bl
; X32-NEXT: je .LBB1_1
; X32-NEXT: # %bb.2: # %else
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: popl %ebx
; X32-NEXT: retl
; X32-NEXT: .LBB1_1: # %then
; X32-NEXT: movl $64, %eax
; X32-NEXT: popl %ebx
; X32-NEXT: retl
;
; X64-LABEL: test2:
; X64: # %bb.0: # %entry
; X64-NEXT: pushq %rbx
; X64-NEXT: incl (%rdi)
; X64-NEXT: setne %bl
; X64-NEXT: movl $42, %edi
; X64-NEXT: callq external
; X64-NEXT: testb %bl, %bl
; X64-NEXT: je .LBB1_1
; X64-NEXT: # %bb.2: # %else
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: retq
; X64-NEXT: .LBB1_1: # %then
; X64-NEXT: movl $64, %eax
; X64-NEXT: popq %rbx
; X64-NEXT: retq
entry:
%val = load i32, i32* %ptr
%inc = add i32 %val, 1
store i32 %inc, i32* %ptr
%cmp = icmp eq i32 %inc, 0
call void @external(i32 42)
br i1 %cmp, label %then, label %else
then:
ret i32 64
else:
ret i32 0
}
declare void @external_a()
declare void @external_b()
; This lowers to a conditional tail call instead of a conditional branch. This
; is tricky because we can only do this from a leaf function, and so we have to
; use volatile stores similar to test1 to force the save and restore of
; a condition without calling another function. We then set up subsequent calls
; in tail position.
define void @test_tail_call(i32* %ptr) nounwind optsize {
; X32-LABEL: test_tail_call:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: incl (%eax)
; X32-NEXT: setne %al
; X32-NEXT: incb a
; X32-NEXT: sete d
; X32-NEXT: testb %al, %al
; X32-NEXT: jne external_b # TAILCALL
; X32-NEXT: # %bb.1: # %then
; X32-NEXT: jmp external_a # TAILCALL
;
; X64-LABEL: test_tail_call:
; X64: # %bb.0: # %entry
; X64-NEXT: incl (%rdi)
; X64-NEXT: setne %al
; X64-NEXT: incb {{.*}}(%rip)
; X64-NEXT: sete {{.*}}(%rip)
; X64-NEXT: testb %al, %al
; X64-NEXT: jne external_b # TAILCALL
; X64-NEXT: # %bb.1: # %then
; X64-NEXT: jmp external_a # TAILCALL
entry:
%val = load i32, i32* %ptr
%inc = add i32 %val, 1
store i32 %inc, i32* %ptr
%cmp = icmp eq i32 %inc, 0
%aval = load volatile i8, i8* @a
%inc2 = add i8 %aval, 1
store volatile i8 %inc2, i8* @a
%cmp2 = icmp eq i8 %inc2, 0
%conv5 = zext i1 %cmp2 to i8
store i8 %conv5, i8* @d
br i1 %cmp, label %then, label %else
then:
tail call void @external_a()
ret void
else:
tail call void @external_b()
ret void
}
; Test a function that gets special select lowering into CFG with copied EFLAGS
; threaded across the CFG. This requires our EFLAGS copy rewriting to handle
; cross-block rewrites in at least some narrow cases.
define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) {
; X32-LABEL: PR37100:
; X32: # %bb.0: # %bb
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: pushl %ebx
; X32-NEXT: .cfi_def_cfa_offset 12
; X32-NEXT: pushl %edi
; X32-NEXT: .cfi_def_cfa_offset 16
; X32-NEXT: pushl %esi
; X32-NEXT: .cfi_def_cfa_offset 20
; X32-NEXT: .cfi_offset %esi, -20
; X32-NEXT: .cfi_offset %edi, -16
; X32-NEXT: .cfi_offset %ebx, -12
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-NEXT: movb {{[0-9]+}}(%esp), %ch
; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
; X32-NEXT: jmp .LBB3_1
; X32-NEXT: .p2align 4, 0x90
; X32-NEXT: .LBB3_5: # %bb1
; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: idivl %ebp
; X32-NEXT: .LBB3_1: # %bb1
; X32-NEXT: # =>This Inner Loop Header: Depth=1
; X32-NEXT: movsbl %cl, %eax
; X32-NEXT: movl %eax, %edx
; X32-NEXT: sarl $31, %edx
; X32-NEXT: cmpl %eax, %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: sbbl %edx, %eax
; X32-NEXT: setl %al
; X32-NEXT: setl %dl
; X32-NEXT: movzbl %dl, %ebp
; X32-NEXT: negl %ebp
; X32-NEXT: testb %al, %al
; X32-NEXT: jne .LBB3_3
; X32-NEXT: # %bb.2: # %bb1
; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
; X32-NEXT: movb %ch, %cl
; X32-NEXT: .LBB3_3: # %bb1
; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
; X32-NEXT: movb %cl, (%ebx)
; X32-NEXT: movl (%edi), %edx
; X32-NEXT: testb %al, %al
; X32-NEXT: jne .LBB3_5
; X32-NEXT: # %bb.4: # %bb1
; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
; X32-NEXT: movl %edx, %ebp
; X32-NEXT: jmp .LBB3_5
;
; X64-LABEL: PR37100:
; X64: # %bb.0: # %bb
; X64-NEXT: movq %rdx, %r10
; X64-NEXT: jmp .LBB3_1
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB3_5: # %bb1
; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: idivl %esi
; X64-NEXT: .LBB3_1: # %bb1
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movsbq %dil, %rax
; X64-NEXT: xorl %esi, %esi
; X64-NEXT: cmpq %rax, %r10
; X64-NEXT: setl %sil
; X64-NEXT: negl %esi
; X64-NEXT: cmpq %rax, %r10
; X64-NEXT: jl .LBB3_3
; X64-NEXT: # %bb.2: # %bb1
; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
; X64-NEXT: movl %ecx, %edi
; X64-NEXT: .LBB3_3: # %bb1
; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
; X64-NEXT: movb %dil, (%r8)
; X64-NEXT: jl .LBB3_5
; X64-NEXT: # %bb.4: # %bb1
; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
; X64-NEXT: movl (%r9), %esi
; X64-NEXT: jmp .LBB3_5
bb:
br label %bb1
bb1:
%tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
%tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
%tmp3 = icmp sgt i16 %tmp2, 7
%tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
%tmp5 = sext i8 %tmp to i64
%tmp6 = icmp slt i64 %arg3, %tmp5
%tmp7 = sext i1 %tmp6 to i32
%tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
store volatile i8 %tmp8, i8* %ptr1
%tmp9 = load volatile i32, i32* %ptr2
%tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
%tmp11 = srem i32 0, %tmp10
%tmp12 = trunc i32 %tmp11 to i16
br label %bb1
}
; Use a particular instruction pattern in order to lower to the post-RA pseudo
; used to lower SETB into an SBB pattern in order to make sure that kind of
; usage of a copied EFLAGS continues to work.
define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) {
; X32-LABEL: PR37431:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %esi
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %esi, -8
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl (%eax), %eax
; X32-NEXT: movl %eax, %ecx
; X32-NEXT: sarl $31, %ecx
; X32-NEXT: cmpl %eax, %eax
; X32-NEXT: sbbl %ecx, %eax
; X32-NEXT: setb %al
; X32-NEXT: sbbb %cl, %cl
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movb %cl, (%edx)
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: subl %eax, %ecx
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: xorl %edx, %edx
; X32-NEXT: idivl %ecx
; X32-NEXT: movb %dl, (%esi)
; X32-NEXT: popl %esi
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl
;
; X64-LABEL: PR37431:
; X64: # %bb.0: # %entry
; X64-NEXT: movq %rdx, %rcx
; X64-NEXT: movslq (%rdi), %rax
; X64-NEXT: cmpq %rax, %rax
; X64-NEXT: sbbb %dl, %dl
; X64-NEXT: cmpq %rax, %rax
; X64-NEXT: movb %dl, (%rsi)
; X64-NEXT: sbbl %esi, %esi
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: idivl %esi
; X64-NEXT: movb %dl, (%rcx)
; X64-NEXT: retq
entry:
%tmp = load i32, i32* %arg1
%tmp1 = sext i32 %tmp to i64
%tmp2 = icmp ugt i64 %tmp1, undef
%tmp3 = zext i1 %tmp2 to i8
%tmp4 = sub i8 0, %tmp3
store i8 %tmp4, i8* %arg2
%tmp5 = sext i8 %tmp4 to i32
%tmp6 = srem i32 0, %tmp5
%tmp7 = trunc i32 %tmp6 to i8
store i8 %tmp7, i8* %arg3
ret void
}