This is the groundwork required to implement strictfp. For now, this should be NFC for regular instructoins (many instructions just gain an extra use of a reserved register). Regalloc won't rematerialize instructions with reads of physical registers, but we were suffering from that anyway with the exec reads. Should add it for all the related FP uses (possibly with some extras). I did not add it to either the gpr index mode instructions (or every single VALU instruction) since it's a ridiculous feature already modeled as an arbitrary side effect. Also work towards marking instructions with FP exceptions. This doesn't actually set the bit yet since this would start to change codegen. It seems nofpexcept is currently not implied from the regular IR FP operations. Add it to some MIR tests where I think it might matter.
45 lines
2.2 KiB
YAML
45 lines
2.2 KiB
YAML
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=post-RA-sched %s -o - | FileCheck -check-prefix=GCN %s
|
|
|
|
# Check that we move consumer further from producer, even if one of them is in a bundle.
|
|
|
|
---
|
|
name: src_bundle_latency
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: src_bundle_latency
|
|
; GCN: $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
|
|
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
|
|
; GCN: }
|
|
; GCN: $vgpr6 = V_ADD_F32_e32 killed $vgpr0, $vgpr0, implicit $mode, implicit $exec
|
|
; GCN: $vgpr5 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
$vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
|
|
$vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
|
$vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
|
|
}
|
|
$vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
|
|
$vgpr6 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $mode, implicit $exec
|
|
...
|
|
|
|
---
|
|
name: dst_bundle_latency
|
|
tracksRegLiveness: true
|
|
body: |
|
|
bb.0:
|
|
; GCN-LABEL: name: dst_bundle_latency
|
|
; GCN: $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $mode, implicit $exec
|
|
; GCN: $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $mode, implicit $exec
|
|
; GCN: BUNDLE killed $vgpr0, killed $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
|
|
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, 0, 0, implicit $exec
|
|
; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, 0, 0, implicit $exec
|
|
; GCN: }
|
|
$vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $mode, implicit $exec
|
|
$vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $mode, implicit $exec
|
|
BUNDLE $vgpr0, $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
|
|
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
|
|
GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
|
|
}
|
|
...
|