Imagine a loop of the form:
```
preheader:
%r = def
header:
bcc latch, inner
inner1:
..
inner2:
b latch
latch:
%r = subs %r
bcc header
```
It can be possible for code to spend a decent amount of time in the
header<->latch loop, not going into the inner part of the loop as much.
The greedy register allocator can prefer to spill _around_ %r though,
adding spills around the subs in the loop, which can be very detrimental
for performance. (The case I am looking at is actually a very deeply
nested set of loops that repeat the header<->latch pattern at multiple
different levels).
The greedy RA will apply a preference to spill to the IV, as it is live
through the header block. This patch attempts to add a heuristic to
prevent that in this case for variables that look like IVs, in a similar
regard to the extra spill weight that gets added to variables that look
like IVs, that are expensive to spill. That will mean spills are more
likely to be pushed into the inner blocks, where they are less likely to
be executed and not as expensive as spills around the IV.
This gives a 8% speedup in the exchange benchmark from spec2017 when
compiled with flang-new, whilst importantly stabilising the scores to be
less chaotic to other changes. Running ctmark showed no difference in
the compile time. I've tried to run a range of benchmarking for
performance, most of which were relatively flat not showing many large
differences. One matrix multiply case improved 21.3% due to removing a
cascading chains of spills, and some other knock-on effects happen which
usually cause small differences in the scores.
413 lines
21 KiB
YAML
413 lines
21 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple aarch64 --run-pass=greedy,virtregrewriter -verify-machineinstrs %s -o - | FileCheck %s
|
|
|
|
# We should ideally not spill around any of the SUBSWri in the loop exit blocks (if.end and if.end27).
|
|
|
|
--- |
|
|
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
|
target triple = "aarch64"
|
|
|
|
@g = dso_local local_unnamed_addr global [9 x [9 x i32]] zeroinitializer, align 4
|
|
|
|
define void @test(ptr nocapture noundef readonly %p, i32 noundef %m, ptr noundef %q) {
|
|
entry:
|
|
%0 = load i32, ptr %p, align 4
|
|
%spec.select = tail call i32 @llvm.smax.i32(i32 %0, i32 1)
|
|
%arrayidx2 = getelementptr inbounds i32, ptr %p, i64 1
|
|
%1 = load i32, ptr %arrayidx2, align 4
|
|
%cond8 = tail call i32 @llvm.smax.i32(i32 %1, i32 1)
|
|
br label %do.body
|
|
|
|
do.body: ; preds = %if.end27, %entry
|
|
%indvars.iv49 = phi i64 [ %indvars.iv.next50, %if.end27 ], [ 0, %entry ]
|
|
%n0.0 = phi i32 [ %dec30, %if.end27 ], [ %spec.select, %entry ]
|
|
%n1.0 = phi i32 [ %n1.2, %if.end27 ], [ %cond8, %entry ]
|
|
%arrayidx9 = getelementptr inbounds [9 x [9 x i32]], ptr @g, i64 0, i64 %indvars.iv49
|
|
%2 = load i32, ptr %arrayidx9, align 4
|
|
%cmp11 = icmp sgt i32 %2, 0
|
|
br i1 %cmp11, label %do.body12.preheader, label %if.end27
|
|
|
|
do.body12.preheader: ; preds = %do.body
|
|
br label %do.body12
|
|
|
|
do.body12: ; preds = %do.body12.preheader, %if.end
|
|
%indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %do.body12.preheader ]
|
|
%n1.1 = phi i32 [ %dec, %if.end ], [ %n1.0, %do.body12.preheader ]
|
|
%arrayidx14 = getelementptr inbounds [9 x [9 x i32]], ptr @g, i64 0, i64 %indvars.iv
|
|
%3 = load i32, ptr %arrayidx14, align 4
|
|
%cmp16 = icmp sgt i32 %3, 0
|
|
br i1 %cmp16, label %if.then17, label %if.end
|
|
|
|
if.then17: ; preds = %do.body12
|
|
%arrayidx19 = getelementptr inbounds i32, ptr %q, i64 %indvars.iv
|
|
%4 = load volatile i32, ptr %arrayidx19, align 4
|
|
br label %for.body
|
|
|
|
for.cond.cleanup: ; preds = %for.body
|
|
tail call void asm sideeffect "nop;nop", "~{x0},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr}"() #3
|
|
%sunkaddr = mul i64 %indvars.iv, 4
|
|
%sunkaddr1 = getelementptr inbounds i8, ptr %q, i64 %sunkaddr
|
|
store volatile i32 %add, ptr %sunkaddr1, align 4
|
|
br label %if.end
|
|
|
|
for.body: ; preds = %for.body, %if.then17
|
|
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 100, %if.then17 ]
|
|
%s.046 = phi i32 [ %4, %if.then17 ], [ %add, %for.body ]
|
|
%call = tail call i32 @callee() #3
|
|
%add = add nsw i32 %call, %s.046
|
|
%lsr.iv.next = add nsw i32 %lsr.iv, -1
|
|
%exitcond.not = icmp eq i32 %lsr.iv.next, 0
|
|
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
|
|
|
|
if.end: ; preds = %for.cond.cleanup, %do.body12
|
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
|
%dec = add nsw i32 %n1.1, -1
|
|
%cmp24.not = icmp eq i32 %dec, 0
|
|
br i1 %cmp24.not, label %do.end, label %do.body12
|
|
|
|
do.end: ; preds = %if.end
|
|
%arrayidx26 = getelementptr inbounds i32, ptr %q, i64 %indvars.iv49
|
|
store volatile i32 0, ptr %arrayidx26, align 4
|
|
br label %if.end27
|
|
|
|
if.end27: ; preds = %do.end, %do.body
|
|
%n1.2 = phi i32 [ 0, %do.end ], [ %n1.0, %do.body ]
|
|
%indvars.iv.next50 = add nuw nsw i64 %indvars.iv49, 1
|
|
%dec30 = add nsw i32 %n0.0, -1
|
|
%cmp31.not = icmp eq i32 %dec30, 0
|
|
br i1 %cmp31.not, label %do.end32, label %do.body
|
|
|
|
do.end32: ; preds = %if.end27
|
|
ret void
|
|
}
|
|
|
|
declare i32 @callee(...)
|
|
|
|
declare i32 @llvm.smax.i32(i32, i32) #2
|
|
...
|
|
---
|
|
name: test
|
|
alignment: 4
|
|
exposesReturnsTwice: false
|
|
legalized: false
|
|
regBankSelected: false
|
|
selected: false
|
|
failedISel: false
|
|
tracksRegLiveness: true
|
|
hasWinCFI: false
|
|
callsEHReturn: false
|
|
callsUnwindInit: false
|
|
hasEHCatchret: false
|
|
hasEHScopes: false
|
|
hasEHFunclets: false
|
|
isOutlined: false
|
|
debugInstrRef: false
|
|
failsVerification: false
|
|
tracksDebugUserValues: false
|
|
registers:
|
|
- { id: 0, class: gpr32, preferred-register: '' }
|
|
- { id: 1, class: gpr32, preferred-register: '' }
|
|
- { id: 2, class: gpr64common, preferred-register: '' }
|
|
- { id: 3, class: gpr32sp, preferred-register: '' }
|
|
- { id: 4, class: gpr32all, preferred-register: '' }
|
|
- { id: 5, class: gpr64common, preferred-register: '' }
|
|
- { id: 6, class: gpr32sp, preferred-register: '' }
|
|
- { id: 7, class: gpr32, preferred-register: '' }
|
|
- { id: 8, class: gpr32sp, preferred-register: '' }
|
|
- { id: 9, class: gpr32, preferred-register: '' }
|
|
- { id: 10, class: gpr32, preferred-register: '' }
|
|
- { id: 11, class: gpr32, preferred-register: '' }
|
|
- { id: 12, class: gpr64sp, preferred-register: '' }
|
|
- { id: 13, class: gpr32, preferred-register: '' }
|
|
- { id: 14, class: gpr32all, preferred-register: '' }
|
|
- { id: 15, class: gpr64sp, preferred-register: '' }
|
|
- { id: 16, class: gpr32, preferred-register: '' }
|
|
- { id: 17, class: gpr64common, preferred-register: '' }
|
|
- { id: 18, class: gpr32, preferred-register: '' }
|
|
- { id: 19, class: gpr64common, preferred-register: '' }
|
|
- { id: 20, class: gpr64all, preferred-register: '' }
|
|
- { id: 21, class: gpr32common, preferred-register: '' }
|
|
- { id: 22, class: gpr32, preferred-register: '' }
|
|
- { id: 23, class: gpr32, preferred-register: '' }
|
|
- { id: 24, class: gpr32common, preferred-register: '' }
|
|
- { id: 25, class: gpr32, preferred-register: '' }
|
|
- { id: 26, class: gpr32, preferred-register: '' }
|
|
- { id: 27, class: gpr64all, preferred-register: '' }
|
|
- { id: 28, class: gpr32, preferred-register: '' }
|
|
- { id: 29, class: gpr64, preferred-register: '' }
|
|
- { id: 30, class: gpr64, preferred-register: '' }
|
|
- { id: 31, class: gpr64common, preferred-register: '' }
|
|
- { id: 32, class: gpr32common, preferred-register: '' }
|
|
- { id: 33, class: gpr32, preferred-register: '' }
|
|
- { id: 34, class: gpr64all, preferred-register: '' }
|
|
- { id: 35, class: gpr64all, preferred-register: '' }
|
|
- { id: 36, class: gpr32, preferred-register: '' }
|
|
- { id: 37, class: gpr64, preferred-register: '' }
|
|
- { id: 38, class: gpr64, preferred-register: '' }
|
|
- { id: 39, class: gpr64common, preferred-register: '' }
|
|
- { id: 40, class: gpr32common, preferred-register: '' }
|
|
- { id: 41, class: gpr32, preferred-register: '' }
|
|
- { id: 42, class: gpr32, preferred-register: '' }
|
|
- { id: 43, class: gpr32, preferred-register: '' }
|
|
- { id: 44, class: gpr32, preferred-register: '' }
|
|
- { id: 45, class: gpr32, preferred-register: '' }
|
|
- { id: 46, class: gpr32, preferred-register: '' }
|
|
- { id: 47, class: gpr32, preferred-register: '' }
|
|
- { id: 48, class: gpr64sp, preferred-register: '' }
|
|
- { id: 49, class: gpr32, preferred-register: '' }
|
|
- { id: 50, class: gpr32all, preferred-register: '' }
|
|
- { id: 51, class: gpr32, preferred-register: '' }
|
|
- { id: 52, class: gpr64sp, preferred-register: '' }
|
|
- { id: 53, class: gpr32, preferred-register: '' }
|
|
- { id: 54, class: gpr64common, preferred-register: '' }
|
|
- { id: 55, class: gpr32common, preferred-register: '' }
|
|
- { id: 56, class: gpr32common, preferred-register: '' }
|
|
- { id: 57, class: gpr64common, preferred-register: '' }
|
|
- { id: 58, class: gpr32common, preferred-register: '' }
|
|
- { id: 59, class: gpr32common, preferred-register: '' }
|
|
- { id: 60, class: gpr32, preferred-register: '' }
|
|
- { id: 61, class: gpr32all, preferred-register: '' }
|
|
liveins:
|
|
- { reg: '$x0', virtual-reg: '%17' }
|
|
- { reg: '$x2', virtual-reg: '%19' }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 1
|
|
adjustsStack: true
|
|
hasCalls: true
|
|
stackProtector: ''
|
|
functionContext: ''
|
|
maxCallFrameSize: 0
|
|
cvBytesOfCalleeSavedRegisters: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
hasTailCall: false
|
|
localFrameSize: 0
|
|
savePoint: ''
|
|
restorePoint: ''
|
|
fixedStack: []
|
|
stack: []
|
|
entry_values: []
|
|
callSites: []
|
|
debugValueSubstitutions: []
|
|
constants: []
|
|
machineFunctionInfo: {}
|
|
body: |
|
|
; CHECK-LABEL: name: test
|
|
; CHECK: bb.0.entry:
|
|
; CHECK-NEXT: successors: %bb.1(0x80000000)
|
|
; CHECK-NEXT: liveins: $x0, $x2
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: renamable $x10 = COPY $xzr
|
|
; CHECK-NEXT: renamable $w8 = LDRWui renamable $x0, 0 :: (load (s32) from %ir.p)
|
|
; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x0, 1 :: (load (s32) from %ir.arrayidx2)
|
|
; CHECK-NEXT: dead $wzr = SUBSWri renamable $w8, 1, 0, implicit-def $nzcv
|
|
; CHECK-NEXT: renamable $w11 = CSINCWr killed renamable $w8, $wzr, 12, implicit $nzcv
|
|
; CHECK-NEXT: renamable $x8 = COPY killed renamable $x10
|
|
; CHECK-NEXT: dead $wzr = SUBSWri renamable $w9, 1, 0, implicit-def $nzcv
|
|
; CHECK-NEXT: renamable $w10 = CSINCWr killed renamable $w9, $wzr, 12, implicit $nzcv
|
|
; CHECK-NEXT: STRXui renamable $x2, %stack.0, 0 :: (store (s64) into %stack.0)
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.1.do.body:
|
|
; CHECK-NEXT: successors: %bb.3(0x50000000), %bb.2(0x30000000)
|
|
; CHECK-NEXT: liveins: $w10, $w11, $x2, $x8
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: STRXui renamable $x8, %stack.1, 0 :: (store (s64) into %stack.1)
|
|
; CHECK-NEXT: renamable $w9 = MOVi32imm 36, implicit-def $x9
|
|
; CHECK-NEXT: renamable $x8 = MADDXrrr killed renamable $x8, killed renamable $x9, $xzr
|
|
; CHECK-NEXT: renamable $x9 = MOVaddr target-flags(aarch64-page) @g, target-flags(aarch64-pageoff, aarch64-nc) @g
|
|
; CHECK-NEXT: renamable $w8 = LDRWroX killed renamable $x9, killed renamable $x8, 0, 0 :: (load (s32) from %ir.arrayidx9)
|
|
; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w8, 1, 0, implicit-def $nzcv
|
|
; CHECK-NEXT: Bcc 10, %bb.3, implicit $nzcv
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.2:
|
|
; CHECK-NEXT: successors: %bb.10(0x80000000)
|
|
; CHECK-NEXT: liveins: $w10, $w11, $x2
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: renamable $x8 = LDRXui %stack.1, 0 :: (load (s64) from %stack.1)
|
|
; CHECK-NEXT: B %bb.10
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.3.do.body12.preheader:
|
|
; CHECK-NEXT: successors: %bb.4(0x80000000)
|
|
; CHECK-NEXT: liveins: $w10, $w11, $x2
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: renamable $x12 = COPY $xzr
|
|
; CHECK-NEXT: STRWui renamable $w11, %stack.2, 0 :: (store (s32) into %stack.2)
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.4.do.body12:
|
|
; CHECK-NEXT: successors: %bb.5(0x50000000), %bb.8(0x30000000)
|
|
; CHECK-NEXT: liveins: $w10, $w11, $x2, $x12
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: renamable $w8 = MOVi32imm 36, implicit-def $x8
|
|
; CHECK-NEXT: renamable $x8 = MADDXrrr renamable $x12, killed renamable $x8, $xzr
|
|
; CHECK-NEXT: renamable $x9 = MOVaddr target-flags(aarch64-page) @g, target-flags(aarch64-pageoff, aarch64-nc) @g
|
|
; CHECK-NEXT: renamable $w8 = LDRWroX killed renamable $x9, killed renamable $x8, 0, 0 :: (load (s32) from %ir.arrayidx14)
|
|
; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w8, 1, 0, implicit-def $nzcv
|
|
; CHECK-NEXT: Bcc 11, %bb.8, implicit $nzcv
|
|
; CHECK-NEXT: B %bb.5
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.5.if.then17:
|
|
; CHECK-NEXT: successors: %bb.7(0x80000000)
|
|
; CHECK-NEXT: liveins: $w10, $x2, $x12
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: STRWui killed renamable $w10, %stack.3, 0 :: (store (s32) into %stack.3)
|
|
; CHECK-NEXT: STRXui renamable $x12, %stack.4, 0 :: (store (s64) into %stack.4)
|
|
; CHECK-NEXT: renamable $w20 = LDRWroX killed renamable $x2, killed renamable $x12, 0, 1 :: (volatile load (s32) from %ir.arrayidx19)
|
|
; CHECK-NEXT: renamable $w19 = MOVi32imm 100
|
|
; CHECK-NEXT: B %bb.7
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.6.for.cond.cleanup:
|
|
; CHECK-NEXT: successors: %bb.8(0x80000000)
|
|
; CHECK-NEXT: liveins: $w20
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: renamable $w1 = COPY killed renamable $w20
|
|
; CHECK-NEXT: INLINEASM &"nop;nop", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $x0, 12 /* clobber */, implicit-def dead early-clobber $x2, 12 /* clobber */, implicit-def dead early-clobber $x3, 12 /* clobber */, implicit-def dead early-clobber $x4, 12 /* clobber */, implicit-def dead early-clobber $x5, 12 /* clobber */, implicit-def dead early-clobber $x6, 12 /* clobber */, implicit-def dead early-clobber $x7, 12 /* clobber */, implicit-def dead early-clobber $x8, 12 /* clobber */, implicit-def dead early-clobber $x9, 12 /* clobber */, implicit-def dead early-clobber $x10, 12 /* clobber */, implicit-def dead early-clobber $x11, 12 /* clobber */, implicit-def dead early-clobber $x12, 12 /* clobber */, implicit-def dead early-clobber $x13, 12 /* clobber */, implicit-def dead early-clobber $x14, 12 /* clobber */, implicit-def dead early-clobber $x15, 12 /* clobber */, implicit-def dead early-clobber $x16, 12 /* clobber */, implicit-def dead early-clobber $x17, 12 /* clobber */, implicit-def dead early-clobber $x18, 12 /* clobber */, implicit-def dead early-clobber $x19, 12 /* clobber */, implicit-def dead early-clobber $x20, 12 /* clobber */, implicit-def dead early-clobber $x21, 12 /* clobber */, implicit-def dead early-clobber $x22, 12 /* clobber */, implicit-def dead early-clobber $x23, 12 /* clobber */, implicit-def dead early-clobber $x24, 12 /* clobber */, implicit-def dead early-clobber $x25, 12 /* clobber */, implicit-def dead early-clobber $x26, 12 /* clobber */, implicit-def dead early-clobber $x27, 12 /* clobber */, implicit-def dead early-clobber $x28, 12 /* clobber */, implicit-def dead early-clobber $fp, 12 /* clobber */, implicit-def dead early-clobber $lr
|
|
; CHECK-NEXT: renamable $x2 = LDRXui %stack.0, 0 :: (load (s64) from %stack.0)
|
|
; CHECK-NEXT: renamable $x12 = LDRXui %stack.4, 0 :: (load (s64) from %stack.4)
|
|
; CHECK-NEXT: STRWroX killed renamable $w1, renamable $x2, renamable $x12, 0, 1 :: (volatile store (s32) into %ir.sunkaddr1)
|
|
; CHECK-NEXT: renamable $w11 = LDRWui %stack.2, 0 :: (load (s32) from %stack.2)
|
|
; CHECK-NEXT: renamable $w10 = LDRWui %stack.3, 0 :: (load (s32) from %stack.3)
|
|
; CHECK-NEXT: B %bb.8
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.7.for.body:
|
|
; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.7(0x7c000000)
|
|
; CHECK-NEXT: liveins: $w19, $w20
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
|
|
; CHECK-NEXT: BL @callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
|
|
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
|
|
; CHECK-NEXT: renamable $w19 = nsw SUBSWri killed renamable $w19, 1, 0, implicit-def $nzcv
|
|
; CHECK-NEXT: renamable $w20 = nsw ADDWrr killed renamable $w0, killed renamable $w20
|
|
; CHECK-NEXT: Bcc 0, %bb.6, implicit $nzcv
|
|
; CHECK-NEXT: B %bb.7
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.8.if.end:
|
|
; CHECK-NEXT: successors: %bb.9(0x04000000), %bb.4(0x7c000000)
|
|
; CHECK-NEXT: liveins: $w10, $w11, $x2, $x12
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: renamable $w10 = nsw SUBSWri killed renamable $w10, 1, 0, implicit-def $nzcv
|
|
; CHECK-NEXT: renamable $x12 = nuw nsw ADDXri killed renamable $x12, 1, 0
|
|
; CHECK-NEXT: Bcc 1, %bb.4, implicit $nzcv
|
|
; CHECK-NEXT: B %bb.9
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.9.do.end:
|
|
; CHECK-NEXT: successors: %bb.10(0x80000000)
|
|
; CHECK-NEXT: liveins: $w11, $x2
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: renamable $w10 = COPY $wzr
|
|
; CHECK-NEXT: renamable $x8 = LDRXui %stack.1, 0 :: (load (s64) from %stack.1)
|
|
; CHECK-NEXT: STRWroX $wzr, renamable $x2, renamable $x8, 0, 1 :: (volatile store (s32) into %ir.arrayidx26)
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.10.if.end27:
|
|
; CHECK-NEXT: successors: %bb.11(0x04000000), %bb.1(0x7c000000)
|
|
; CHECK-NEXT: liveins: $w10, $w11, $x2, $x8
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: renamable $w11 = nsw SUBSWri killed renamable $w11, 1, 0, implicit-def $nzcv
|
|
; CHECK-NEXT: renamable $x8 = nuw nsw ADDXri killed renamable $x8, 1, 0
|
|
; CHECK-NEXT: Bcc 1, %bb.1, implicit $nzcv
|
|
; CHECK-NEXT: B %bb.11
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: bb.11.do.end32:
|
|
; CHECK-NEXT: RET_ReallyLR
|
|
bb.0.entry:
|
|
successors: %bb.1(0x80000000)
|
|
liveins: $x0, $x2
|
|
|
|
%19:gpr64common = COPY $x2
|
|
%17:gpr64common = COPY $x0
|
|
%54:gpr64common = COPY $xzr
|
|
%21:gpr32common = LDRWui %17, 0 :: (load (s32) from %ir.p)
|
|
%24:gpr32common = LDRWui %17, 1 :: (load (s32) from %ir.arrayidx2)
|
|
dead $wzr = SUBSWri %21, 1, 0, implicit-def $nzcv
|
|
%55:gpr32common = CSINCWr %21, $wzr, 12, implicit $nzcv
|
|
dead $wzr = SUBSWri %24, 1, 0, implicit-def $nzcv
|
|
%56:gpr32common = CSINCWr %24, $wzr, 12, implicit $nzcv
|
|
undef %29.sub_32:gpr64 = MOVi32imm 36
|
|
%31:gpr64common = MOVaddr target-flags(aarch64-page) @g, target-flags(aarch64-pageoff, aarch64-nc) @g
|
|
|
|
bb.1.do.body:
|
|
successors: %bb.2(0x50000000), %bb.11(0x30000000)
|
|
|
|
%30:gpr64 = MADDXrrr %54, %29, $xzr
|
|
%32:gpr32common = LDRWroX %31, %30, 0, 0 :: (load (s32) from %ir.arrayidx9)
|
|
dead $wzr = SUBSWri %32, 1, 0, implicit-def $nzcv
|
|
Bcc 10, %bb.2, implicit $nzcv
|
|
|
|
bb.11:
|
|
successors: %bb.9(0x80000000)
|
|
|
|
B %bb.9
|
|
|
|
bb.2.do.body12.preheader:
|
|
successors: %bb.3(0x80000000)
|
|
|
|
%57:gpr64common = COPY $xzr
|
|
|
|
bb.3.do.body12:
|
|
successors: %bb.4(0x50000000), %bb.7(0x30000000)
|
|
|
|
%38:gpr64 = MADDXrrr %57, %29, $xzr
|
|
%40:gpr32common = LDRWroX %31, %38, 0, 0 :: (load (s32) from %ir.arrayidx14)
|
|
dead $wzr = SUBSWri %40, 1, 0, implicit-def $nzcv
|
|
Bcc 11, %bb.7, implicit $nzcv
|
|
B %bb.4
|
|
|
|
bb.4.if.then17:
|
|
successors: %bb.6(0x80000000)
|
|
|
|
%60:gpr32 = LDRWroX %19, %57, 0, 1 :: (volatile load (s32) from %ir.arrayidx19)
|
|
%59:gpr32common = MOVi32imm 100
|
|
B %bb.6
|
|
|
|
bb.5.for.cond.cleanup:
|
|
successors: %bb.7(0x80000000)
|
|
|
|
INLINEASM &"nop;nop", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $x0, 12 /* clobber */, implicit-def dead early-clobber $x2, 12 /* clobber */, implicit-def dead early-clobber $x3, 12 /* clobber */, implicit-def dead early-clobber $x4, 12 /* clobber */, implicit-def dead early-clobber $x5, 12 /* clobber */, implicit-def dead early-clobber $x6, 12 /* clobber */, implicit-def dead early-clobber $x7, 12 /* clobber */, implicit-def dead early-clobber $x8, 12 /* clobber */, implicit-def dead early-clobber $x9, 12 /* clobber */, implicit-def dead early-clobber $x10, 12 /* clobber */, implicit-def dead early-clobber $x11, 12 /* clobber */, implicit-def dead early-clobber $x12, 12 /* clobber */, implicit-def dead early-clobber $x13, 12 /* clobber */, implicit-def dead early-clobber $x14, 12 /* clobber */, implicit-def dead early-clobber $x15, 12 /* clobber */, implicit-def dead early-clobber $x16, 12 /* clobber */, implicit-def dead early-clobber $x17, 12 /* clobber */, implicit-def dead early-clobber $x18, 12 /* clobber */, implicit-def dead early-clobber $x19, 12 /* clobber */, implicit-def dead early-clobber $x20, 12 /* clobber */, implicit-def dead early-clobber $x21, 12 /* clobber */, implicit-def dead early-clobber $x22, 12 /* clobber */, implicit-def dead early-clobber $x23, 12 /* clobber */, implicit-def dead early-clobber $x24, 12 /* clobber */, implicit-def dead early-clobber $x25, 12 /* clobber */, implicit-def dead early-clobber $x26, 12 /* clobber */, implicit-def dead early-clobber $x27, 12 /* clobber */, implicit-def dead early-clobber $x28, 12 /* clobber */, implicit-def dead early-clobber $fp, 12 /* clobber */, implicit-def dead early-clobber $lr
|
|
STRWroX %60, %19, %57, 0, 1 :: (volatile store (s32) into %ir.sunkaddr1)
|
|
B %bb.7
|
|
|
|
bb.6.for.body:
|
|
successors: %bb.5(0x04000000), %bb.6(0x7c000000)
|
|
|
|
ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp
|
|
BL @callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0
|
|
ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
|
|
%45:gpr32 = COPY $w0
|
|
%59:gpr32common = nsw SUBSWri %59, 1, 0, implicit-def $nzcv
|
|
%60:gpr32 = nsw ADDWrr %45, %60
|
|
Bcc 0, %bb.5, implicit $nzcv
|
|
B %bb.6
|
|
|
|
bb.7.if.end:
|
|
successors: %bb.8(0x04000000), %bb.3(0x7c000000)
|
|
|
|
%56:gpr32common = nsw SUBSWri %56, 1, 0, implicit-def $nzcv
|
|
%57:gpr64common = nuw nsw ADDXri %57, 1, 0
|
|
Bcc 1, %bb.3, implicit $nzcv
|
|
B %bb.8
|
|
|
|
bb.8.do.end:
|
|
successors: %bb.9(0x80000000)
|
|
|
|
%56:gpr32common = COPY $wzr
|
|
STRWroX $wzr, %19, %54, 0, 1 :: (volatile store (s32) into %ir.arrayidx26)
|
|
|
|
bb.9.if.end27:
|
|
successors: %bb.10(0x04000000), %bb.1(0x7c000000)
|
|
|
|
%55:gpr32common = nsw SUBSWri %55, 1, 0, implicit-def $nzcv
|
|
%54:gpr64common = nuw nsw ADDXri %54, 1, 0
|
|
Bcc 1, %bb.1, implicit $nzcv
|
|
B %bb.10
|
|
|
|
bb.10.do.end32:
|
|
RET_ReallyLR
|
|
|
|
...
|