Add support for the moveq instruction, which is both faster and smaller (1/2 to 1/3 the size) than a move with immediate to register. This change introduces the instruction, along with a set of pseudoinstructions to handle immediate moves to a register that is lowered post-RA. Pseudos are used as moveq can only write to the full register, which makes matching i8 and i16 immediate loads difficult in tablegen. Furthermore, selecting moveq before RA constrains that immediate to be moved into a data register, which may not be optimal. The bulk of this change are fixes to existing tests, which cover the new functionality sufficiently.
84 lines
2.5 KiB
LLVM
84 lines
2.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=m68k --regalloc=fast %s -o - | FileCheck %s
|
|
|
|
define dso_local void @foo1() {
|
|
; CHECK-LABEL: foo1:
|
|
; CHECK: .cfi_startproc
|
|
; CHECK-NEXT: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: suba.l #2, %sp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset -6
|
|
; CHECK-NEXT: moveq #0, %d0
|
|
; CHECK-NEXT: move.b %d0, (0,%sp) ; 1-byte Folded Spill
|
|
; CHECK-NEXT: .LBB0_1: ; %do.body
|
|
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
|
; CHECK-NEXT: move.b (0,%sp), %d0 ; 1-byte Folded Reload
|
|
; CHECK-NEXT: cmpi.b #0, %d0
|
|
; CHECK-NEXT: bne .LBB0_1
|
|
; CHECK-NEXT: ; %bb.2: ; %do.end
|
|
; CHECK-NEXT: adda.l #2, %sp
|
|
; CHECK-NEXT: rts
|
|
entry:
|
|
br label %do.body
|
|
|
|
do.body: ; preds = %land.end, %entry
|
|
%cmp5 = icmp eq i32 0, 4
|
|
br label %land.end
|
|
|
|
land.end: ; preds = %do.body
|
|
br i1 %cmp5, label %do.body, label %do.end
|
|
|
|
do.end: ; preds = %land.end
|
|
ret void
|
|
}
|
|
|
|
define i32 @foo2(ptr noundef %0) {
|
|
; CHECK-LABEL: foo2:
|
|
; CHECK: .cfi_startproc
|
|
; CHECK-NEXT: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: suba.l #4, %sp
|
|
; CHECK-NEXT: .cfi_def_cfa_offset -8
|
|
; CHECK-NEXT: move.l (8,%sp), %a0
|
|
; CHECK-NEXT: move.b (%a0), %d0
|
|
; CHECK-NEXT: move.b %d0, (0,%sp) ; 1-byte Folded Spill
|
|
; CHECK-NEXT: and.b #1, %d0
|
|
; CHECK-NEXT: move.b %d0, (2,%sp) ; 1-byte Folded Spill
|
|
; CHECK-NEXT: sub.b #1, %d0
|
|
; CHECK-NEXT: bgt .LBB1_2
|
|
; CHECK-NEXT: ; %bb.1: ; %if
|
|
; CHECK-NEXT: move.b (2,%sp), %d0 ; 1-byte Folded Reload
|
|
; CHECK-NEXT: move.b (0,%sp), %d1 ; 1-byte Folded Reload
|
|
; CHECK-NEXT: add.b %d1, %d0
|
|
; CHECK-NEXT: bra .LBB1_3
|
|
; CHECK-NEXT: .LBB1_2: ; %else
|
|
; CHECK-NEXT: move.b (2,%sp), %d1 ; 1-byte Folded Reload
|
|
; CHECK-NEXT: move.b (0,%sp), %d0 ; 1-byte Folded Reload
|
|
; CHECK-NEXT: sub.b %d1, %d0
|
|
; CHECK-NEXT: move.b %d0, (0,%sp) ; 1-byte Folded Spill
|
|
; CHECK-NEXT: .LBB1_3: ; %cont
|
|
; CHECK-NEXT: move.b %d0, (2,%sp) ; 1-byte Folded Spill
|
|
; CHECK-NEXT: move.b (2,%sp), %d0 ; 1-byte Folded Reload
|
|
; CHECK-NEXT: ext.w %d0
|
|
; CHECK-NEXT: ext.l %d0
|
|
; CHECK-NEXT: adda.l #4, %sp
|
|
; CHECK-NEXT: rts
|
|
entry:
|
|
%1 = getelementptr i8, ptr %0, i32 0
|
|
%2 = load i8, ptr %1
|
|
%3 = and i8 %2, 1
|
|
%4 = icmp sle i8 %3, 1
|
|
br i1 %4, label %if, label %else
|
|
|
|
if:
|
|
%5 = add i8 %3, %2
|
|
br label %cont
|
|
|
|
else:
|
|
%6 = sub i8 %2, %3
|
|
br label %cont
|
|
|
|
cont:
|
|
%7 = phi i8 [%5, %if], [%6, %else]
|
|
%8 = sext i8 %7 to i32
|
|
ret i32 %8
|
|
}
|