Files
clang-p2996/llvm/test/CodeGen/Thumb2/phi_prevent_copy.mir
David Green 9cb8f4d1ad [ARM] Add a tail-predication loop predicate register
The semantics of tail predication loops means that the value of LR as an
instruction is executed determines the predicate. In other words:

mov r3, #3
DLSTP lr, r3        // Start tail predication, lr==3
VADD.s32 q0, q1, q2 // Lanes 0,1 and 2 are updated in q0.
mov lr, #1
VADD.s32 q0, q1, q2 // Only first lane is updated.

This means that the value of lr cannot be spilled and re-used in tail
predication regions without potentially altering the behaviour of the
program. More lanes than required could be stored, for example, and in
the case of a gather those lanes might not have been setup, leading to
alignment exceptions.

This patch adds a new lr predicate operand to MVE instructions in order
to keep a reference to the lr that they use as a tail predicate. It will
usually hold the zeroreg meaning not predicated, being set to the LR phi
value in the MVETPAndVPTOptimisationsPass. This will prevent it from
being spilled anywhere that it needs to be used.

A lot of tests needed updating.

Differential Revision: https://reviews.llvm.org/D107638
2021-09-02 13:42:58 +01:00

101 lines
4.7 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -simplify-mir -run-pass=phi-node-elimination %s -o - | FileCheck %s
--- |
; ModuleID = '<stdin>'
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "arm-arm-none-eabi"
; Function Attrs: nofree norecurse nounwind
define void @test(i8* noalias nocapture %X, i8* noalias nocapture readonly %Y, i32 %n) {
entry:
%cmp6 = icmp sgt i32 %n, 0
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %X, i8* align 4 %Y, i32 %n, i1 false)
br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.body.preheader, %entry
ret void
}
; Function Attrs: argmemonly nofree nosync nounwind willreturn
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg)
...
---
name: test
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: test
; CHECK: bb.0.entry:
; CHECK: successors: %bb.1(0x50000000), %bb.3(0x30000000)
; CHECK: liveins: $r0, $r1, $r2
; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY killed $r2
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY killed $r1
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY killed $r0
; CHECK: t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
; CHECK: t2Bcc %bb.3, 11 /* CC::lt */, killed $cpsr
; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg
; CHECK: bb.1.for.body.preheader:
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2BICri:%[0-9]+]]:rgpr = t2BICri killed [[t2ADDri]], 16, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[t2LSRri:%[0-9]+]]:gprlr = t2LSRri killed [[t2BICri]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[COPY3:%[0-9]+]]:rgpr = COPY [[COPY1]]
; CHECK: [[COPY4:%[0-9]+]]:rgpr = COPY [[COPY2]]
; CHECK: [[COPY5:%[0-9]+]]:rgpr = COPY [[COPY]]
; CHECK: [[t2WhileLoopStartLR:%[0-9]+]]:gprlr = t2WhileLoopStartLR killed [[t2LSRri]], %bb.3, implicit-def dead $cpsr
; CHECK: bb.2:
; CHECK: [[COPY6:%[0-9]+]]:rgpr = COPY [[COPY5]]
; CHECK: [[COPY7:%[0-9]+]]:gprlr = COPY [[t2WhileLoopStartLR]]
; CHECK: [[COPY8:%[0-9]+]]:rgpr = COPY [[COPY4]]
; CHECK: [[COPY9:%[0-9]+]]:rgpr = COPY [[COPY3]]
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[COPY6]], 0, $noreg, $noreg
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri killed [[COPY6]], 16, 14 /* CC::al */, $noreg, $noreg
; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post killed [[COPY9]], 16, 1, [[MVE_VCTP8_]], $noreg
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post killed [[MVE_VLDRBU8_post1]], killed [[COPY8]], 16, 1, killed [[MVE_VCTP8_]], $noreg
; CHECK: [[COPY10:%[0-9]+]]:rgpr = COPY [[MVE_VLDRBU8_post]]
; CHECK: [[COPY10:%[0-9]+]]:rgpr = COPY [[MVE_VSTRBU8_post]]
; CHECK: [[COPY10:%[0-9]+]]:rgpr = COPY [[t2SUBri]]
; CHECK: [[t2LoopEndDec:%[0-9]+]]:gprlr = t2LoopEndDec killed [[COPY7]], %bb.2, implicit-def dead $cpsr
; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg
; CHECK: bb.3.for.cond.cleanup:
; CHECK: tBX_RET 14 /* CC::al */, $noreg
bb.0.entry:
successors: %bb.1(0x50000000), %bb.3(0x30000000)
liveins: $r0, $r1, $r2
%2:rgpr = COPY killed $r2
%1:rgpr = COPY killed $r1
%0:rgpr = COPY killed $r0
t2CMPri %2, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
t2Bcc %bb.3, 11 /* CC::lt */, killed $cpsr
t2B %bb.1, 14 /* CC::al */, $noreg
bb.1.for.body.preheader:
successors: %bb.3(0x40000000), %bb.2(0x40000000)
%3:rgpr = t2ADDri %2, 15, 14 /* CC::al */, $noreg, $noreg
%4:rgpr = t2BICri killed %3, 16, 14 /* CC::al */, $noreg, $noreg
%5:gprlr = t2LSRri killed %4, 4, 14 /* CC::al */, $noreg, $noreg
%6:gprlr = t2WhileLoopStartLR killed %5, %bb.3, implicit-def dead $cpsr
bb.2:
successors: %bb.2(0x40000000), %bb.3(0x40000000)
%7:rgpr = PHI %1, %bb.1, %8, %bb.2
%9:rgpr = PHI %0, %bb.1, %10, %bb.2
%11:gprlr = PHI %6, %bb.1, %12, %bb.2
%13:rgpr = PHI %2, %bb.1, %14, %bb.2
%15:vccr = MVE_VCTP8 %13, 0, $noreg, $noreg
%14:rgpr = t2SUBri killed %13, 16, 14 /* CC::al */, $noreg, $noreg
%8:rgpr, %16:mqpr = MVE_VLDRBU8_post killed %7, 16, 1, %15, $noreg
%10:rgpr = MVE_VSTRBU8_post killed %16, killed %9, 16, 1, killed %15, $noreg
%12:gprlr = t2LoopEndDec killed %11, %bb.2, implicit-def dead $cpsr
t2B %bb.3, 14 /* CC::al */, $noreg
bb.3.for.cond.cleanup:
tBX_RET 14 /* CC::al */, $noreg
...