Files
clang-p2996/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
Jay Foad 28233b11ac [AMDGPU] New AMDGPUInsertSingleUseVDST pass (#72388)
Add support for emitting GFX11.5 s_singleuse_vdst instructions. This is
a power saving feature whereby the compiler can annotate VALU
instructions whose results are known to have only a single use, so the
hardware can in some cases avoid writing the result back to VGPR RAM.

To begin with the pass is disabled by default because of one missing
feature: we need an exclusion list of opcodes that never qualify as
single-use producers and/or consumers. A future patch will implement
this and enable the pass by default.

---------

Co-authored-by: Scott Egerton <scott.egerton@amd.com>
2023-11-24 10:23:06 +00:00

628 lines
20 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -march=amdgcn -mcpu=gfx1150 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=gfx1150 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s
# One single-use producer.
---
name: one_producer
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: one_producer
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr0, $vgpr2
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $vgpr0
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
$vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
bb.1:
liveins: $vgpr0, $vgpr2
...
# One single-use producer of a 64-bit value.
---
name: one_producer_64bit
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: one_producer_64bit
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec
; CHECK-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr4_vgpr5
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $vgpr0_vgpr1
$vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec
$vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec
bb.1:
liveins: $vgpr4_vgpr5
...
# Two consecutive single-use producers.
---
name: two_producers
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: two_producers
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr0, $vgpr3
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $vgpr0
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
$vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
$vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec
bb.1:
liveins: $vgpr0, $vgpr3
...
# Redefinitions of v0.
---
name: redefinitions
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: redefinitions
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
bb.0:
liveins: $vgpr0
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
bb.1:
...
# One producer with no consumers.
---
name: no_consumer
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: no_consumer
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
bb.0:
liveins: $vgpr0
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
bb.1:
...
# One consumer with two uses of the same value.
---
name: one_consumer_two_uses
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: one_consumer_two_uses
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr0, $vgpr2
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $vgpr0
$vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
bb.1:
liveins: $vgpr0, $vgpr2
...
# A longer example.
---
name: longer_example
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: longer_example
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr3, $vgpr5, $sgpr0, $sgpr2, $sgpr4, $sgpr5, $sgpr16, $sgpr17, $sgpr18, $sgpr19
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr14 = V_MUL_F32_e32 $sgpr4, $vgpr3, implicit $exec, implicit $mode
; CHECK-NEXT: $sgpr3 = S_MUL_F16 $sgpr0, $sgpr2, implicit $mode
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr15 = V_MUL_F32_e32 $sgpr5, $vgpr3, implicit $exec, implicit $mode
; CHECK-NEXT: $vgpr17 = V_FMA_F32_e64 0, $sgpr16, 0, $vgpr5, 0, $vgpr14, 0, 0, implicit $exec, implicit $mode
; CHECK-NEXT: $sgpr1 = S_ADD_F16 $sgpr0, 15360, implicit $mode
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr15 = V_FMA_F32_e64 0, $sgpr17, 0, $vgpr5, 0, $vgpr15, 0, 0, implicit $exec, implicit $mode
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr14 = V_FMA_F32_e64 0, $sgpr18, 0, $vgpr15, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
; CHECK-NEXT: $vgpr15 = V_FMA_F32_e64 0, $sgpr19, 0, $vgpr14, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
; CHECK-NEXT: $vgpr16 = V_LOG_F32_e32 $vgpr15, implicit $exec, implicit $mode
; CHECK-NEXT: $vgpr18 = V_EXP_F32_e32 $vgpr15, implicit $exec, implicit $mode
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr16, $vgpr18
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $vgpr3, $vgpr5, $sgpr0, $sgpr2, $sgpr4, $sgpr5, $sgpr16, $sgpr17, $sgpr18, $sgpr19
$vgpr14 = V_MUL_F32_e32 $sgpr4, $vgpr3, implicit $exec, implicit $mode
$sgpr3 = S_MUL_F16 $sgpr0, $sgpr2, implicit $mode
$vgpr15 = V_MUL_F32_e32 $sgpr5, $vgpr3, implicit $exec, implicit $mode
$vgpr17 = V_FMA_F32_e64 0, $sgpr16, 0, $vgpr5, 0, $vgpr14, 0, 0, implicit $exec, implicit $mode
$sgpr1 = S_ADD_F16 $sgpr0, 15360, implicit $mode
$vgpr15 = V_FMA_F32_e64 0, $sgpr17, 0, $vgpr5, 0, $vgpr15, 0, 0, implicit $exec, implicit $mode
$vgpr14 = V_FMA_F32_e64 0, $sgpr18, 0, $vgpr15, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
$vgpr15 = V_FMA_F32_e64 0, $sgpr19, 0, $vgpr14, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode
$vgpr16 = V_LOG_F32_e32 $vgpr15, implicit $exec, implicit $mode
$vgpr18 = V_EXP_F32_e32 $vgpr15, implicit $exec, implicit $mode
bb.1:
liveins: $vgpr16, $vgpr18
...
# Multiple uses of v0.
---
name: multiple_uses_1
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: multiple_uses_1
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $vgpr0
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
bb.1:
liveins: $vgpr1, $vgpr2
...
# Multiple uses of v0 and redefinitions of v1 and v2.
---
name: multiple_uses_2
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: multiple_uses_2
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $vgpr0
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
bb.1:
liveins: $vgpr1, $vgpr2
...
# Multiple uses of all but v1.
---
name: multiple_uses_3
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: multiple_uses_3
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr2, $vgpr3
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $vgpr0
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec
bb.1:
liveins: $vgpr2, $vgpr3
...
# Results are live-in to another basic block.
---
name: basic_block_1
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: basic_block_1
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: liveins: $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $vgpr0
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
bb.2:
liveins: $vgpr1, $vgpr2
...
# Result v2 has multiple uses in another basic block.
---
name: basic_block_2
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: basic_block_2
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: liveins: $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: liveins: $vgpr3
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $vgpr0, $vgpr1
$vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec
bb.1:
liveins: $vgpr2
$vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
$vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec
bb.2:
liveins: $vgpr3
...
# Results are redefined in another basic block.
---
name: basic_block_3
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: basic_block_3
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $vgpr0
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
bb.1:
liveins: $vgpr0, $vgpr1
$vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
$vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
$vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
bb.2:
liveins: $vgpr0, $vgpr1, $vgpr2
...
# Exec modified between producer and consumer.
---
name: exec_mask
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: exec_mask
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: $exec = COPY $sgpr0_sgpr1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $sgpr0_sgpr1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$exec = COPY $sgpr0_sgpr1
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
bb.1:
liveins: $vgpr0
...
# Exec_lo modified between producer and consumer.
---
name: exec_mask_lo
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: exec_mask_lo
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: $exec_lo = COPY $sgpr0
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $sgpr0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$exec_lo = COPY $sgpr0
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
bb.1:
liveins: $vgpr0
...
# Exec_hi modified between producer and consumer.
---
name: exec_mask_hi
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: exec_mask_hi
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: $exec_hi = COPY $sgpr0
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $sgpr0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$exec_hi = COPY $sgpr0
$vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
bb.1:
liveins: $vgpr0
...
# Write 32-bit vgpr and then read from low 16 bits.
---
name: write_full_read_lo
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: write_full_read_lo
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr1_lo16
; CHECK-NEXT: {{ $}}
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec
bb.1:
liveins: $vgpr1_lo16
...
# Write 32-bit vgpr and then read from high 16 bits.
---
name: write_full_read_hi
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: write_full_read_hi
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr1_hi16
; CHECK-NEXT: {{ $}}
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec
bb.1:
liveins: $vgpr1_hi16
...
# Write 32-bit vgpr and then read from both halves.
---
name: write_full_read_both
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: write_full_read_both
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr1
; CHECK-NEXT: {{ $}}
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec
$vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec
bb.1:
liveins: $vgpr1
...
# Write 32-bit vgpr and then read from both halves in the same instruction.
---
name: write_full_read_both_same_instruction
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: write_full_read_both_same_instruction
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: $vgpr1_lo16 = V_ADD_F16_t16_e32 $vgpr0_lo16, $vgpr0_hi16, implicit $mode, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr1_lo16
; CHECK-NEXT: {{ $}}
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr1_lo16 = V_ADD_F16_t16_e32 $vgpr0_lo16, $vgpr0_hi16, implicit $mode, implicit $exec
bb.1:
liveins: $vgpr1_lo16
...
# Write low 16-bits and then read 32-bit vgpr.
---
name: write_lo_read_full
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: write_lo_read_full
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr1
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $vgpr0
$vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec
$vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
bb.1:
liveins: $vgpr1
...
# Write high 16-bits and then read 32-bit vgpr.
---
name: write_hi_read_full
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: write_hi_read_full
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr1
; CHECK-NEXT: {{ $}}
bb.0:
liveins: $vgpr0
$vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec
$vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
bb.1:
liveins: $vgpr1
...