Currently MachineCSE forbids PRE when the instruction reads a physical register. Relax this so that it's allowed when the value being read is the same as what would be read in the place the instruction would be hoisted to. This is being done in preparation for adding FPCR handling to the AArch64 backend, in order to prevent it to from worsening the generated code, but for targets that already have a similar register it should improve things. This patch affects code generation in several tests. The new code looks better except for in Thumb2/LowOverheadLoops/memcall.ll where we perform PRE but the LowOverheadLoops transformation then undoes it. Also in AMDGPU/selectcc-opt.ll the CHECK makes things look worse, but actually the function as a whole is better (as a MOV is PRE'd). Differential Revision: https://reviews.llvm.org/D136675
121 lines
2.9 KiB
LLVM
121 lines
2.9 KiB
LLVM
; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
|
|
;rdar://8003725
|
|
|
|
declare void @llvm.trap()
|
|
|
|
@G1 = external global i32
|
|
@G2 = external global i32
|
|
|
|
define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
|
|
entry:
|
|
; CHECK-LABEL: f1:
|
|
; CHECK: cmp
|
|
; CHECK: moveq
|
|
; CHECK-NOT: cmp
|
|
; CHECK: mov{{eq|ne}}
|
|
%tmp1 = icmp eq i32 %cond1, 0
|
|
%tmp2 = select i1 %tmp1, i32 %x1, i32 %x2
|
|
%tmp3 = select i1 %tmp1, i32 %x2, i32 %x3
|
|
%tmp4 = add i32 %tmp2, %tmp3
|
|
ret i32 %tmp4
|
|
}
|
|
|
|
@foo = external global i32
|
|
@bar = external global [250 x i8], align 1
|
|
|
|
; CSE of cmp across BB boundary
|
|
; rdar://10660865
|
|
define void @f2() nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: f2:
|
|
; CHECK: cmp
|
|
; CHECK: bxlt
|
|
; CHECK-NOT: cmp
|
|
; CHECK: movle
|
|
%0 = load i32, i32* @foo, align 4
|
|
%cmp28 = icmp sgt i32 %0, 0
|
|
br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
|
|
|
|
for.body.lr.ph: ; preds = %entry
|
|
%1 = icmp sgt i32 %0, 1
|
|
%smax = select i1 %1, i32 %0, i32 1
|
|
call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8], [250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i1 false)
|
|
call void @llvm.trap()
|
|
unreachable
|
|
|
|
for.cond1.preheader: ; preds = %entry
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
|
|
|
|
; rdar://12462006
|
|
define i8* @f3(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
|
|
entry:
|
|
; CHECK-LABEL: f3:
|
|
; CHECK-NOT: sub
|
|
; CHECK: cmp
|
|
; CHECK: blt
|
|
%0 = load i32, i32* %offset, align 4
|
|
%cmp = icmp slt i32 %0, %size
|
|
%s = sub nsw i32 %0, %size
|
|
%size2 = sub nsw i32 %size, 0
|
|
br i1 %cmp, label %return, label %if.end
|
|
|
|
if.end:
|
|
; We are checking cse between %sub here and %s in entry block.
|
|
%sub = sub nsw i32 %0, %size2
|
|
%s2 = sub nsw i32 %s, %size
|
|
%s3 = sub nsw i32 %sub, %s2
|
|
; CHECK: sub [[R1:r[0-9]+]], [[R2:r[0-9]+]], r2
|
|
; CHECK: sub [[R3:r[0-9]+]], r2, [[R1]]
|
|
; CHECK: add [[R4:r[0-9]+]], [[R1]], [[R3]]
|
|
; CHECK-NOT: sub
|
|
; CHECK: str
|
|
store i32 %s3, i32* %offset, align 4
|
|
%add.ptr = getelementptr inbounds i8, i8* %base, i32 %sub
|
|
br label %return
|
|
|
|
return:
|
|
%retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
|
|
ret i8* %retval.0
|
|
}
|
|
|
|
; The cmp of %val should not be hoisted above the preceding conditional branch
|
|
define void @f4(i32** %ptr1, i64* %ptr2, i64 %val) {
|
|
entry:
|
|
; CHECK-LABEL: f4:
|
|
; CHECK: cmp
|
|
; CHECK: movne
|
|
; CHECK: strne
|
|
; CHECK: orrs
|
|
; CHECK-NOT: subs
|
|
; CHECK-NOT: sbcs
|
|
; CHECK: beq
|
|
%tobool.not = icmp eq i32** %ptr1, null
|
|
br i1 %tobool.not, label %if.end, label %if.then
|
|
|
|
if.then:
|
|
store i32* null, i32** %ptr1, align 4
|
|
br label %if.end
|
|
|
|
if.end:
|
|
; CHECK: subs
|
|
; CHECK: sbcs
|
|
; CHECK: bxlt lr
|
|
%tobool1 = icmp ne i64 %val, 0
|
|
%cmp = icmp slt i64 %val, 10
|
|
%or.cond = and i1 %tobool1, %cmp
|
|
br i1 %or.cond, label %cleanup, label %if.end3
|
|
|
|
if.end3:
|
|
; CHECK: subs
|
|
; CHECK: sbc
|
|
%sub = add nsw i64 %val, -10
|
|
store i64 %sub, i64* %ptr2, align 8
|
|
br label %cleanup
|
|
|
|
cleanup:
|
|
ret void
|
|
}
|