specially handle SETB_C* pseudo instructions. Summary: While the logic here is somewhat similar to the arithmetic lowering, it is different enough that it made sense to have its own function. I actually tried a bunch of different optimizations here and none worked well so I gave up and just always do the arithmetic based lowering. Looking at code from the PR test case, we actually pessimize a bunch of code when generating these. Because SETB_C* pseudo instructions clobber EFLAGS, we end up creating a bunch of copies of EFLAGS to feed multiple SETB_C* pseudos from a single set of EFLAGS. This in turn causes the lowering code to ruin all the clever code generation that SETB_C* was hoping to achieve. None of this is needed. Whenever we're generating multiple SETB_C* instructions from a single set of EFLAGS we should instead generate a single maximally wide one and extract subregs for all the different desired widths. That would result in substantially better code generation. But this patch doesn't attempt to address that. The test case from the PR is included as well as more directed testing of the specific lowering pattern used for these pseudos. Reviewers: craig.topper Subscribers: sanjoy, mcrosier, llvm-commits, hiraditya Differential Revision: https://reviews.llvm.org/D46799 llvm-svn: 332389
366 lines
11 KiB
LLVM
366 lines
11 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X32
|
|
; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X64
|
|
;
|
|
; Test patterns that require preserving and restoring flags.
|
|
|
|
@b = common global i8 0, align 1
|
|
@c = common global i32 0, align 4
|
|
@a = common global i8 0, align 1
|
|
@d = common global i8 0, align 1
|
|
@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
|
|
|
|
declare void @external(i32)
|
|
|
|
; A test that re-uses flags in interesting ways due to volatile accesses.
|
|
; Specifically, the first increment's flags are reused for the branch despite
|
|
; being clobbered by the second increment.
|
|
define i32 @test1() nounwind {
|
|
; X32-LABEL: test1:
|
|
; X32: # %bb.0: # %entry
|
|
; X32-NEXT: movb b, %cl
|
|
; X32-NEXT: movl %ecx, %eax
|
|
; X32-NEXT: incb %al
|
|
; X32-NEXT: movb %al, b
|
|
; X32-NEXT: incl c
|
|
; X32-NEXT: sete %dl
|
|
; X32-NEXT: movb a, %ah
|
|
; X32-NEXT: movb %ah, %ch
|
|
; X32-NEXT: incb %ch
|
|
; X32-NEXT: cmpb %cl, %ah
|
|
; X32-NEXT: sete d
|
|
; X32-NEXT: movb %ch, a
|
|
; X32-NEXT: testb %dl, %dl
|
|
; X32-NEXT: jne .LBB0_2
|
|
; X32-NEXT: # %bb.1: # %if.then
|
|
; X32-NEXT: movsbl %al, %eax
|
|
; X32-NEXT: pushl %eax
|
|
; X32-NEXT: calll external
|
|
; X32-NEXT: addl $4, %esp
|
|
; X32-NEXT: .LBB0_2: # %if.end
|
|
; X32-NEXT: xorl %eax, %eax
|
|
; X32-NEXT: retl
|
|
;
|
|
; X64-LABEL: test1:
|
|
; X64: # %bb.0: # %entry
|
|
; X64-NEXT: movb {{.*}}(%rip), %dil
|
|
; X64-NEXT: movl %edi, %eax
|
|
; X64-NEXT: incb %al
|
|
; X64-NEXT: movb %al, {{.*}}(%rip)
|
|
; X64-NEXT: incl {{.*}}(%rip)
|
|
; X64-NEXT: sete %sil
|
|
; X64-NEXT: movb {{.*}}(%rip), %cl
|
|
; X64-NEXT: movl %ecx, %edx
|
|
; X64-NEXT: incb %dl
|
|
; X64-NEXT: cmpb %dil, %cl
|
|
; X64-NEXT: sete {{.*}}(%rip)
|
|
; X64-NEXT: movb %dl, {{.*}}(%rip)
|
|
; X64-NEXT: testb %sil, %sil
|
|
; X64-NEXT: jne .LBB0_2
|
|
; X64-NEXT: # %bb.1: # %if.then
|
|
; X64-NEXT: pushq %rax
|
|
; X64-NEXT: movsbl %al, %edi
|
|
; X64-NEXT: callq external
|
|
; X64-NEXT: addq $8, %rsp
|
|
; X64-NEXT: .LBB0_2: # %if.end
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: retq
|
|
entry:
|
|
%bval = load i8, i8* @b
|
|
%inc = add i8 %bval, 1
|
|
store volatile i8 %inc, i8* @b
|
|
%cval = load volatile i32, i32* @c
|
|
%inc1 = add nsw i32 %cval, 1
|
|
store volatile i32 %inc1, i32* @c
|
|
%aval = load volatile i8, i8* @a
|
|
%inc2 = add i8 %aval, 1
|
|
store volatile i8 %inc2, i8* @a
|
|
%cmp = icmp eq i8 %aval, %bval
|
|
%conv5 = zext i1 %cmp to i8
|
|
store i8 %conv5, i8* @d
|
|
%tobool = icmp eq i32 %inc1, 0
|
|
br i1 %tobool, label %if.end, label %if.then
|
|
|
|
if.then:
|
|
%conv6 = sext i8 %inc to i32
|
|
call void @external(i32 %conv6)
|
|
br label %if.end
|
|
|
|
if.end:
|
|
ret i32 0
|
|
}
|
|
|
|
; Preserve increment flags across a call.
|
|
define i32 @test2(i32* %ptr) nounwind {
|
|
; X32-LABEL: test2:
|
|
; X32: # %bb.0: # %entry
|
|
; X32-NEXT: pushl %ebx
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: incl (%eax)
|
|
; X32-NEXT: setne %bl
|
|
; X32-NEXT: pushl $42
|
|
; X32-NEXT: calll external
|
|
; X32-NEXT: addl $4, %esp
|
|
; X32-NEXT: testb %bl, %bl
|
|
; X32-NEXT: je .LBB1_1
|
|
; X32-NEXT: # %bb.2: # %else
|
|
; X32-NEXT: xorl %eax, %eax
|
|
; X32-NEXT: popl %ebx
|
|
; X32-NEXT: retl
|
|
; X32-NEXT: .LBB1_1: # %then
|
|
; X32-NEXT: movl $64, %eax
|
|
; X32-NEXT: popl %ebx
|
|
; X32-NEXT: retl
|
|
;
|
|
; X64-LABEL: test2:
|
|
; X64: # %bb.0: # %entry
|
|
; X64-NEXT: pushq %rbx
|
|
; X64-NEXT: incl (%rdi)
|
|
; X64-NEXT: setne %bl
|
|
; X64-NEXT: movl $42, %edi
|
|
; X64-NEXT: callq external
|
|
; X64-NEXT: testb %bl, %bl
|
|
; X64-NEXT: je .LBB1_1
|
|
; X64-NEXT: # %bb.2: # %else
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: popq %rbx
|
|
; X64-NEXT: retq
|
|
; X64-NEXT: .LBB1_1: # %then
|
|
; X64-NEXT: movl $64, %eax
|
|
; X64-NEXT: popq %rbx
|
|
; X64-NEXT: retq
|
|
entry:
|
|
%val = load i32, i32* %ptr
|
|
%inc = add i32 %val, 1
|
|
store i32 %inc, i32* %ptr
|
|
%cmp = icmp eq i32 %inc, 0
|
|
call void @external(i32 42)
|
|
br i1 %cmp, label %then, label %else
|
|
|
|
then:
|
|
ret i32 64
|
|
|
|
else:
|
|
ret i32 0
|
|
}
|
|
|
|
declare void @external_a()
|
|
declare void @external_b()
|
|
|
|
; This lowers to a conditional tail call instead of a conditional branch. This
|
|
; is tricky because we can only do this from a leaf function, and so we have to
|
|
; use volatile stores similar to test1 to force the save and restore of
|
|
; a condition without calling another function. We then set up subsequent calls
|
|
; in tail position.
|
|
define void @test_tail_call(i32* %ptr) nounwind optsize {
|
|
; X32-LABEL: test_tail_call:
|
|
; X32: # %bb.0: # %entry
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: incl (%eax)
|
|
; X32-NEXT: setne %al
|
|
; X32-NEXT: incb a
|
|
; X32-NEXT: sete d
|
|
; X32-NEXT: testb %al, %al
|
|
; X32-NEXT: jne external_b # TAILCALL
|
|
; X32-NEXT: # %bb.1: # %then
|
|
; X32-NEXT: jmp external_a # TAILCALL
|
|
;
|
|
; X64-LABEL: test_tail_call:
|
|
; X64: # %bb.0: # %entry
|
|
; X64-NEXT: incl (%rdi)
|
|
; X64-NEXT: setne %al
|
|
; X64-NEXT: incb {{.*}}(%rip)
|
|
; X64-NEXT: sete {{.*}}(%rip)
|
|
; X64-NEXT: testb %al, %al
|
|
; X64-NEXT: jne external_b # TAILCALL
|
|
; X64-NEXT: # %bb.1: # %then
|
|
; X64-NEXT: jmp external_a # TAILCALL
|
|
entry:
|
|
%val = load i32, i32* %ptr
|
|
%inc = add i32 %val, 1
|
|
store i32 %inc, i32* %ptr
|
|
%cmp = icmp eq i32 %inc, 0
|
|
%aval = load volatile i8, i8* @a
|
|
%inc2 = add i8 %aval, 1
|
|
store volatile i8 %inc2, i8* @a
|
|
%cmp2 = icmp eq i8 %inc2, 0
|
|
%conv5 = zext i1 %cmp2 to i8
|
|
store i8 %conv5, i8* @d
|
|
br i1 %cmp, label %then, label %else
|
|
|
|
then:
|
|
tail call void @external_a()
|
|
ret void
|
|
|
|
else:
|
|
tail call void @external_b()
|
|
ret void
|
|
}
|
|
|
|
; Test a function that gets special select lowering into CFG with copied EFLAGS
|
|
; threaded across the CFG. This requires our EFLAGS copy rewriting to handle
|
|
; cross-block rewrites in at least some narrow cases.
|
|
define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) {
|
|
; X32-LABEL: PR37100:
|
|
; X32: # %bb.0: # %bb
|
|
; X32-NEXT: pushl %ebp
|
|
; X32-NEXT: .cfi_def_cfa_offset 8
|
|
; X32-NEXT: pushl %ebx
|
|
; X32-NEXT: .cfi_def_cfa_offset 12
|
|
; X32-NEXT: pushl %edi
|
|
; X32-NEXT: .cfi_def_cfa_offset 16
|
|
; X32-NEXT: pushl %esi
|
|
; X32-NEXT: .cfi_def_cfa_offset 20
|
|
; X32-NEXT: .cfi_offset %esi, -20
|
|
; X32-NEXT: .cfi_offset %edi, -16
|
|
; X32-NEXT: .cfi_offset %ebx, -12
|
|
; X32-NEXT: .cfi_offset %ebp, -8
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
|
|
; X32-NEXT: movb {{[0-9]+}}(%esp), %ch
|
|
; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
|
|
; X32-NEXT: jmp .LBB3_1
|
|
; X32-NEXT: .p2align 4, 0x90
|
|
; X32-NEXT: .LBB3_5: # %bb1
|
|
; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
|
|
; X32-NEXT: xorl %eax, %eax
|
|
; X32-NEXT: xorl %edx, %edx
|
|
; X32-NEXT: idivl %ebp
|
|
; X32-NEXT: .LBB3_1: # %bb1
|
|
; X32-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X32-NEXT: movsbl %cl, %eax
|
|
; X32-NEXT: movl %eax, %edx
|
|
; X32-NEXT: sarl $31, %edx
|
|
; X32-NEXT: cmpl %eax, %esi
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: sbbl %edx, %eax
|
|
; X32-NEXT: setl %al
|
|
; X32-NEXT: setl %dl
|
|
; X32-NEXT: movzbl %dl, %ebp
|
|
; X32-NEXT: negl %ebp
|
|
; X32-NEXT: testb %al, %al
|
|
; X32-NEXT: jne .LBB3_3
|
|
; X32-NEXT: # %bb.2: # %bb1
|
|
; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
|
|
; X32-NEXT: movb %ch, %cl
|
|
; X32-NEXT: .LBB3_3: # %bb1
|
|
; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
|
|
; X32-NEXT: movb %cl, (%ebx)
|
|
; X32-NEXT: movl (%edi), %edx
|
|
; X32-NEXT: testb %al, %al
|
|
; X32-NEXT: jne .LBB3_5
|
|
; X32-NEXT: # %bb.4: # %bb1
|
|
; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
|
|
; X32-NEXT: movl %edx, %ebp
|
|
; X32-NEXT: jmp .LBB3_5
|
|
;
|
|
; X64-LABEL: PR37100:
|
|
; X64: # %bb.0: # %bb
|
|
; X64-NEXT: movq %rdx, %r10
|
|
; X64-NEXT: jmp .LBB3_1
|
|
; X64-NEXT: .p2align 4, 0x90
|
|
; X64-NEXT: .LBB3_5: # %bb1
|
|
; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: xorl %edx, %edx
|
|
; X64-NEXT: idivl %esi
|
|
; X64-NEXT: .LBB3_1: # %bb1
|
|
; X64-NEXT: # =>This Inner Loop Header: Depth=1
|
|
; X64-NEXT: movsbq %dil, %rax
|
|
; X64-NEXT: xorl %esi, %esi
|
|
; X64-NEXT: cmpq %rax, %r10
|
|
; X64-NEXT: setl %sil
|
|
; X64-NEXT: negl %esi
|
|
; X64-NEXT: cmpq %rax, %r10
|
|
; X64-NEXT: jl .LBB3_3
|
|
; X64-NEXT: # %bb.2: # %bb1
|
|
; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
|
; X64-NEXT: movl %ecx, %edi
|
|
; X64-NEXT: .LBB3_3: # %bb1
|
|
; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
|
; X64-NEXT: movb %dil, (%r8)
|
|
; X64-NEXT: jl .LBB3_5
|
|
; X64-NEXT: # %bb.4: # %bb1
|
|
; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
|
|
; X64-NEXT: movl (%r9), %esi
|
|
; X64-NEXT: jmp .LBB3_5
|
|
bb:
|
|
br label %bb1
|
|
|
|
bb1:
|
|
%tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
|
|
%tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
|
|
%tmp3 = icmp sgt i16 %tmp2, 7
|
|
%tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
|
|
%tmp5 = sext i8 %tmp to i64
|
|
%tmp6 = icmp slt i64 %arg3, %tmp5
|
|
%tmp7 = sext i1 %tmp6 to i32
|
|
%tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
|
|
store volatile i8 %tmp8, i8* %ptr1
|
|
%tmp9 = load volatile i32, i32* %ptr2
|
|
%tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
|
|
%tmp11 = srem i32 0, %tmp10
|
|
%tmp12 = trunc i32 %tmp11 to i16
|
|
br label %bb1
|
|
}
|
|
|
|
; Use a particular instruction pattern in order to lower to the post-RA pseudo
|
|
; used to lower SETB into an SBB pattern in order to make sure that kind of
|
|
; usage of a copied EFLAGS continues to work.
|
|
define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) {
|
|
; X32-LABEL: PR37431:
|
|
; X32: # %bb.0: # %entry
|
|
; X32-NEXT: pushl %esi
|
|
; X32-NEXT: .cfi_def_cfa_offset 8
|
|
; X32-NEXT: .cfi_offset %esi, -8
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
|
; X32-NEXT: movl (%eax), %eax
|
|
; X32-NEXT: movl %eax, %ecx
|
|
; X32-NEXT: sarl $31, %ecx
|
|
; X32-NEXT: cmpl %eax, %eax
|
|
; X32-NEXT: sbbl %ecx, %eax
|
|
; X32-NEXT: setb %al
|
|
; X32-NEXT: sbbb %cl, %cl
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
|
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
|
|
; X32-NEXT: movb %cl, (%edx)
|
|
; X32-NEXT: movzbl %al, %eax
|
|
; X32-NEXT: xorl %ecx, %ecx
|
|
; X32-NEXT: subl %eax, %ecx
|
|
; X32-NEXT: xorl %eax, %eax
|
|
; X32-NEXT: xorl %edx, %edx
|
|
; X32-NEXT: idivl %ecx
|
|
; X32-NEXT: movb %dl, (%esi)
|
|
; X32-NEXT: popl %esi
|
|
; X32-NEXT: .cfi_def_cfa_offset 4
|
|
; X32-NEXT: retl
|
|
;
|
|
; X64-LABEL: PR37431:
|
|
; X64: # %bb.0: # %entry
|
|
; X64-NEXT: movq %rdx, %rcx
|
|
; X64-NEXT: movslq (%rdi), %rax
|
|
; X64-NEXT: cmpq %rax, %rax
|
|
; X64-NEXT: sbbb %dl, %dl
|
|
; X64-NEXT: cmpq %rax, %rax
|
|
; X64-NEXT: movb %dl, (%rsi)
|
|
; X64-NEXT: sbbl %esi, %esi
|
|
; X64-NEXT: xorl %eax, %eax
|
|
; X64-NEXT: xorl %edx, %edx
|
|
; X64-NEXT: idivl %esi
|
|
; X64-NEXT: movb %dl, (%rcx)
|
|
; X64-NEXT: retq
|
|
entry:
|
|
%tmp = load i32, i32* %arg1
|
|
%tmp1 = sext i32 %tmp to i64
|
|
%tmp2 = icmp ugt i64 %tmp1, undef
|
|
%tmp3 = zext i1 %tmp2 to i8
|
|
%tmp4 = sub i8 0, %tmp3
|
|
store i8 %tmp4, i8* %arg2
|
|
%tmp5 = sext i8 %tmp4 to i32
|
|
%tmp6 = srem i32 0, %tmp5
|
|
%tmp7 = trunc i32 %tmp6 to i8
|
|
store i8 %tmp7, i8* %arg3
|
|
ret void
|
|
}
|