The semantics of tail predication loops means that the value of LR as an instruction is executed determines the predicate. In other words: mov r3, #3 DLSTP lr, r3 // Start tail predication, lr==3 VADD.s32 q0, q1, q2 // Lanes 0,1 and 2 are updated in q0. mov lr, #1 VADD.s32 q0, q1, q2 // Only first lane is updated. This means that the value of lr cannot be spilled and re-used in tail predication regions without potentially altering the behaviour of the program. More lanes than required could be stored, for example, and in the case of a gather those lanes might not have been setup, leading to alignment exceptions. This patch adds a new lr predicate operand to MVE instructions in order to keep a reference to the lr that they use as a tail predicate. It will usually hold the zeroreg meaning not predicated, being set to the LR phi value in the MVETPAndVPTOptimisationsPass. This will prevent it from being spilled anywhere that it needs to be used. A lot of tests needed updating. Differential Revision: https://reviews.llvm.org/D107638
240 lines
10 KiB
YAML
240 lines
10 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve -simplify-mir --verify-machineinstrs -run-pass=finalize-isel %s -o - | FileCheck %s
|
|
--- |
|
|
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
|
|
target triple = "arm-arm-none-eabi"
|
|
|
|
; Function Attrs: argmemonly nofree nosync nounwind willreturn
|
|
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg)
|
|
; Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
|
|
declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1 immarg)
|
|
|
|
define void @test1(i32* noalias %X, i32* noalias readonly %Y, i32 %n) {
|
|
entry:
|
|
%0 = bitcast i32* %X to i8*
|
|
%1 = bitcast i32* %Y to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 %n, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define void @test2(i32* noalias %X, i32* noalias readonly %Y, i32 %n) {
|
|
entry:
|
|
%cmp6 = icmp sgt i32 %n, 0
|
|
br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
%X.bits = bitcast i32* %X to i8*
|
|
%Y.bits = bitcast i32* %Y to i8*
|
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %X.bits, i8* align 4 %Y.bits, i32 %n, i1 false)
|
|
br label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.body.preheader, %entry
|
|
ret void
|
|
}
|
|
|
|
define void @test3(i32* nocapture %X, i8 zeroext %c, i32 %n) {
|
|
entry:
|
|
%0 = bitcast i32* %X to i8*
|
|
tail call void @llvm.memset.p0i8.i32(i8* align 4 %0, i8 %c, i32 %n, i1 false)
|
|
ret void
|
|
}
|
|
|
|
|
|
define void @test4(i8* nocapture %X, i8 zeroext %c, i32 %n) {
|
|
entry:
|
|
%cmp4 = icmp sgt i32 %n, 0
|
|
br i1 %cmp4, label %for.body.preheader, label %for.cond.cleanup
|
|
|
|
for.body.preheader: ; preds = %entry
|
|
call void @llvm.memset.p0i8.i32(i8* align 1 %X, i8 %c, i32 %n, i1 false)
|
|
br label %for.cond.cleanup
|
|
|
|
for.cond.cleanup: ; preds = %for.body.preheader, %entry
|
|
ret void
|
|
}
|
|
|
|
...
|
|
---
|
|
name: test1
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: $r0, $r1, $r2
|
|
|
|
; CHECK-LABEL: name: test1
|
|
; CHECK: liveins: $r0, $r1, $r2
|
|
; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r2
|
|
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1
|
|
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
|
|
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
|
|
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.2, implicit-def $cpsr
|
|
; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg
|
|
; CHECK: .1:
|
|
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY1]], %bb.0, %7, %bb.1
|
|
; CHECK: [[PHI1:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.0, %9, %bb.1
|
|
; CHECK: [[PHI2:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.0, %11, %bb.1
|
|
; CHECK: [[PHI3:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.0, %13, %bb.1
|
|
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI3]], 0, $noreg, $noreg
|
|
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI3]], 16, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[PHI]], 16, 1, [[MVE_VCTP8_]], $noreg
|
|
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[MVE_VLDRBU8_post1]], [[PHI1]], 16, 1, [[MVE_VCTP8_]], $noreg
|
|
; CHECK: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[PHI2]], 1
|
|
; CHECK: t2LoopEnd [[t2LoopDec]], %bb.1, implicit-def $cpsr
|
|
; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg
|
|
; CHECK: .2.entry:
|
|
; CHECK: tBX_RET 14 /* CC::al */, $noreg
|
|
%2:rgpr = COPY $r2
|
|
%1:rgpr = COPY $r1
|
|
%0:rgpr = COPY $r0
|
|
MVE_MEMCPYLOOPINST %0, %1, %2, implicit-def $cpsr
|
|
tBX_RET 14 /* CC::al */, $noreg
|
|
|
|
...
|
|
---
|
|
name: test2
|
|
tracksRegLiveness: true
|
|
body: |
|
|
; CHECK-LABEL: name: test2
|
|
; CHECK: bb.0.entry:
|
|
; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000)
|
|
; CHECK: liveins: $r0, $r1, $r2
|
|
; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r2
|
|
; CHECK: [[COPY1:%[0-9]+]]:rgpr = COPY $r1
|
|
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
|
|
; CHECK: t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
|
; CHECK: t2Bcc %bb.2, 11 /* CC::lt */, $cpsr
|
|
; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.1.for.body.preheader:
|
|
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
|
|
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.4, implicit-def $cpsr
|
|
; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.3:
|
|
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY1]], %bb.1, %7, %bb.3
|
|
; CHECK: [[PHI1:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.1, %9, %bb.3
|
|
; CHECK: [[PHI2:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.1, %11, %bb.3
|
|
; CHECK: [[PHI3:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.1, %13, %bb.3
|
|
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI3]], 0, $noreg, $noreg
|
|
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI3]], 16, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[MVE_VLDRBU8_post:%[0-9]+]]:rgpr, [[MVE_VLDRBU8_post1:%[0-9]+]]:mqpr = MVE_VLDRBU8_post [[PHI]], 16, 1, [[MVE_VCTP8_]], $noreg
|
|
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[MVE_VLDRBU8_post1]], [[PHI1]], 16, 1, [[MVE_VCTP8_]], $noreg
|
|
; CHECK: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[PHI2]], 1
|
|
; CHECK: t2LoopEnd [[t2LoopDec]], %bb.3, implicit-def $cpsr
|
|
; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.4.for.body.preheader:
|
|
; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.2.for.cond.cleanup:
|
|
; CHECK: tBX_RET 14 /* CC::al */, $noreg
|
|
bb.0.entry:
|
|
successors: %bb.1(0x50000000), %bb.2(0x30000000)
|
|
liveins: $r0, $r1, $r2
|
|
|
|
%2:rgpr = COPY $r2
|
|
%1:rgpr = COPY $r1
|
|
%0:rgpr = COPY $r0
|
|
t2CMPri %2, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
|
t2Bcc %bb.2, 11 /* CC::lt */, $cpsr
|
|
t2B %bb.1, 14 /* CC::al */, $noreg
|
|
|
|
bb.1.for.body.preheader:
|
|
successors: %bb.2(0x80000000)
|
|
|
|
MVE_MEMCPYLOOPINST %0, %1, %2, implicit-def $cpsr
|
|
|
|
bb.2.for.cond.cleanup:
|
|
tBX_RET 14 /* CC::al */, $noreg
|
|
|
|
...
|
|
---
|
|
name: test3
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: $r0, $r1, $r2
|
|
|
|
; CHECK-LABEL: name: test3
|
|
; CHECK: liveins: $r0, $r1, $r2
|
|
; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r2
|
|
; CHECK: [[COPY1:%[0-9]+]]:mqpr = COPY $r1
|
|
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
|
|
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
|
|
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.2, implicit-def $cpsr
|
|
; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg
|
|
; CHECK: .1:
|
|
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.0, %7, %bb.1
|
|
; CHECK: [[PHI1:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.0, %9, %bb.1
|
|
; CHECK: [[PHI2:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.0, %11, %bb.1
|
|
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI2]], 0, $noreg, $noreg
|
|
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 16, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY1]], [[PHI]], 16, 1, [[MVE_VCTP8_]], $noreg
|
|
; CHECK: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[PHI1]], 1
|
|
; CHECK: t2LoopEnd [[t2LoopDec]], %bb.1, implicit-def $cpsr
|
|
; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg
|
|
; CHECK: .2.entry:
|
|
; CHECK: tBX_RET 14 /* CC::al */, $noreg
|
|
%2:rgpr = COPY $r2
|
|
%1:mqpr = COPY $r1
|
|
%0:rgpr = COPY $r0
|
|
MVE_MEMSETLOOPINST %0, %1, %2, implicit-def $cpsr
|
|
tBX_RET 14 /* CC::al */, $noreg
|
|
|
|
...
|
|
---
|
|
name: test4
|
|
alignment: 2
|
|
tracksRegLiveness: true
|
|
body: |
|
|
; CHECK-LABEL: name: test4
|
|
; CHECK: bb.0.entry:
|
|
; CHECK: successors: %bb.1(0x50000000), %bb.2(0x30000000)
|
|
; CHECK: liveins: $r0, $r1, $r2
|
|
; CHECK: [[COPY:%[0-9]+]]:rgpr = COPY $r2
|
|
; CHECK: [[COPY1:%[0-9]+]]:mqpr = COPY $r1
|
|
; CHECK: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
|
|
; CHECK: t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
|
; CHECK: t2Bcc %bb.2, 11 /* CC::lt */, $cpsr
|
|
; CHECK: t2B %bb.1, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.1.for.body.preheader:
|
|
; CHECK: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY]], 15, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2LSRri:%[0-9]+]]:rgpr = t2LSRri killed [[t2ADDri]], 4, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[t2WhileLoopSetup:%[0-9]+]]:gprlr = t2WhileLoopSetup killed [[t2LSRri]]
|
|
; CHECK: t2WhileLoopStart [[t2WhileLoopSetup]], %bb.4, implicit-def $cpsr
|
|
; CHECK: t2B %bb.3, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.3:
|
|
; CHECK: [[PHI:%[0-9]+]]:rgpr = PHI [[COPY2]], %bb.1, %7, %bb.3
|
|
; CHECK: [[PHI1:%[0-9]+]]:gprlr = PHI [[t2WhileLoopSetup]], %bb.1, %9, %bb.3
|
|
; CHECK: [[PHI2:%[0-9]+]]:rgpr = PHI [[COPY]], %bb.1, %11, %bb.3
|
|
; CHECK: [[MVE_VCTP8_:%[0-9]+]]:vccr = MVE_VCTP8 [[PHI2]], 0, $noreg, $noreg
|
|
; CHECK: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 16, 14 /* CC::al */, $noreg, $noreg
|
|
; CHECK: [[MVE_VSTRBU8_post:%[0-9]+]]:rgpr = MVE_VSTRBU8_post [[COPY1]], [[PHI]], 16, 1, [[MVE_VCTP8_]], $noreg
|
|
; CHECK: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[PHI1]], 1
|
|
; CHECK: t2LoopEnd [[t2LoopDec]], %bb.3, implicit-def $cpsr
|
|
; CHECK: t2B %bb.4, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.4.for.body.preheader:
|
|
; CHECK: t2B %bb.2, 14 /* CC::al */, $noreg
|
|
; CHECK: bb.2.for.cond.cleanup:
|
|
; CHECK: tBX_RET 14 /* CC::al */, $noreg
|
|
bb.0.entry:
|
|
successors: %bb.1(0x50000000), %bb.2(0x30000000)
|
|
liveins: $r0, $r1, $r2
|
|
|
|
%2:rgpr = COPY $r2
|
|
%1:mqpr = COPY $r1
|
|
%0:rgpr = COPY $r0
|
|
t2CMPri %2, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
|
|
t2Bcc %bb.2, 11 /* CC::lt */, $cpsr
|
|
t2B %bb.1, 14 /* CC::al */, $noreg
|
|
|
|
bb.1.for.body.preheader:
|
|
MVE_MEMSETLOOPINST %0, %1, %2, implicit-def $cpsr
|
|
|
|
bb.2.for.cond.cleanup:
|
|
tBX_RET 14 /* CC::al */, $noreg
|
|
|
|
...
|