Files
clang-p2996/llvm/test/CodeGen/AMDGPU/memory_clause.mir
Nicolai Hähnle 10cef708a7 AMDGPU: Clean up LDS-related occupancy calculations
Occupancy is expressed as waves per SIMD. This means that we need to
take into account the number of SIMDs per "CU" or, to be more precise,
the number of SIMDs over which a workgroup may be distributed.

getOccupancyWithLocalMemSize was wrong because it didn't take SIMDs
into account at all.

At the same time, we need to take into account that WGP mode offers
access to a larger total amount of LDS, since this can affect how
non-power-of-two LDS allocations are rounded. To make this work
consistently, we distinguish between (available) local memory size and
addressable local memory size (which is always limited by 64kB on
gfx10+, even with WGP mode).

This change results in a massive amount of test churn. A lot of it is
caused by the fact that the default work group size is 1024, which means
that (due to rounding effects) the default occupancy on older hardware
is 8 instead of 10, which affects scheduling via register pressure
estimates. I've adjusted most tests by just running the UTC tools, but
in some cases I manually changed the work group size to 32 or 64 to make
sure that work group size chunkiness has no effect.

Differential Revision: https://reviews.llvm.org/D139468
2023-01-23 21:43:06 +01:00

553 lines
23 KiB
YAML

# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass=si-form-memory-clauses %s -o - | FileCheck -check-prefix=GCN %s
# GCN-LABEL: {{^}}name: vector_clause{{$}}
# GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
# GCN-NEXT: %5:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %5, %1, 0, 0, implicit $exec
---
name: vector_clause
tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
%5:vreg_64 = IMPLICIT_DEF
GLOBAL_STORE_DWORDX4 %5, %1, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %5, %2, 16, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %5, %3, 32, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %5, %4, 48, 0, implicit $exec
...
# This would be a valid soft clause, but there's no need for a KILL
# since the pointer uses are live beyond the end the clause.
# GCN-LABEL: {{^}}name: vector_clause_no_kill{{$}}
# GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, implicit $exec
---
name: vector_clause_no_kill
tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %3, 32, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %4, 48, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: subreg_full{{$}}
# GCN: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, implicit $exec
# GCN-NEXT: %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, implicit $exec
# GCN-NEXT: %1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
# GCN-NEXT: %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
# GCN-NEXT: KILL %0.sub2_sub3{{$}}
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, implicit $exec
---
name: subreg_full
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_128 }
- { id: 1, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, implicit $exec
%1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, implicit $exec
%1.sub2:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
%1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: subreg_part{{$}}
# GCN: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, implicit $exec
# GCN-NEXT: %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, implicit $exec
# GCN-NEXT: %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
# GCN-NEXT: KILL %0.sub2_sub3{{$}}
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, implicit $exec
---
name: subreg_part
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_128 }
- { id: 1, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, implicit $exec
%1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, implicit $exec
%1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0.sub0_sub1, %1, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: dead{{$}}
# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
---
name: dead
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_128 }
- { id: 2, class: vreg_128 }
- { id: 3, class: vreg_128 }
- { id: 4, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: subreg_dead{{$}}
# GCN: undef %2.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, implicit $exec
# GCN-NEXT: dead %2.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
# GCN-NEXT: GLOBAL_STORE_DWORD %1, %2.sub0, 0, 0, implicit $exec
---
name: subreg_dead
tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_64 = IMPLICIT_DEF
undef %2.sub0:vreg_64 = GLOBAL_LOAD_DWORD %0, 16, 0, implicit $exec
dead %2.sub1:vreg_64 = GLOBAL_LOAD_DWORD %0, 32, 0, implicit $exec
GLOBAL_STORE_DWORD %1, %2.sub0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: kill{{$}}
# GCN: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
# GCN-NEXT: KILL %0{{$}}
---
name: kill
tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_64 = IMPLICIT_DEF
%2:vreg_64 = IMPLICIT_DEF
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 killed %1, 16, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %2, %3, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %2, %4, 16, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: indirect{{$}}
# GCN: %1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, implicit $exec
# GCN-NEXT: %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
---
name: indirect
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_64 }
- { id: 2, class: vreg_128 }
- { id: 3, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1:vreg_64 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %3, 16, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: stack{{$}}
# GCN: %0:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, implicit $exec
# GCN-NEXT: GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, implicit $exec
---
name: stack
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vreg_128 }
- { id: 2, class: vreg_128 }
stack:
- { id: 0, type: default, offset: 0, size: 64, alignment: 8 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %stack.0, 16, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %1, 0, 0, implicit $exec
GLOBAL_STORE_DWORDX4 %0, %2, 16, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: overflow_counter{{$}}
# GCN: dead %2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec
# GCN-NEXT: dead %3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec
# GCN-NEXT: dead %4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, implicit $exec
# GCN-NEXT: dead %5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, implicit $exec
# GCN-NEXT: dead %6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, implicit $exec
# GCN-NEXT: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, implicit $exec
# GCN-NEXT: dead %8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, implicit $exec
# GCN-NEXT: dead %9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, implicit $exec
# GCN-NEXT: dead %10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, implicit $exec
# GCN-NEXT: dead %11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, implicit $exec
# GCN-NEXT: dead %12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, implicit $exec
# GCN-NEXT: dead %13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, implicit $exec
# GCN-NEXT: dead %14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, implicit $exec
# GCN-NEXT: dead %15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, implicit $exec
# GCN-NEXT: dead %16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
# GCN-NEXT: dead %17:vgpr_32 = GLOBAL_LOAD_DWORD %1, 60, 0, implicit $exec
# GCN-NEXT: dead %18:vgpr_32 = GLOBAL_LOAD_DWORD %1, 64, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
---
name: overflow_counter
tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_64 = IMPLICIT_DEF
%2:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, implicit $exec
%3:vgpr_32 = GLOBAL_LOAD_DWORD %0, 4, 0, implicit $exec
%4:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, implicit $exec
%5:vgpr_32 = GLOBAL_LOAD_DWORD %0, 12, 0, implicit $exec
%6:vgpr_32 = GLOBAL_LOAD_DWORD %0, 16, 0, implicit $exec
%7:vgpr_32 = GLOBAL_LOAD_DWORD %0, 20, 0, implicit $exec
%8:vgpr_32 = GLOBAL_LOAD_DWORD %0, 24, 0, implicit $exec
%9:vgpr_32 = GLOBAL_LOAD_DWORD %0, 28, 0, implicit $exec
%10:vgpr_32 = GLOBAL_LOAD_DWORD %0, 32, 0, implicit $exec
%11:vgpr_32 = GLOBAL_LOAD_DWORD %0, 36, 0, implicit $exec
%12:vgpr_32 = GLOBAL_LOAD_DWORD %0, 40, 0, implicit $exec
%13:vgpr_32 = GLOBAL_LOAD_DWORD %0, 44, 0, implicit $exec
%14:vgpr_32 = GLOBAL_LOAD_DWORD %0, 48, 0, implicit $exec
%15:vgpr_32 = GLOBAL_LOAD_DWORD %0, 52, 0, implicit $exec
%16:vgpr_32 = GLOBAL_LOAD_DWORD %0, 56, 0, implicit $exec
%17:vgpr_32 = GLOBAL_LOAD_DWORD %1, 60, 0, implicit $exec
%18:vgpr_32 = GLOBAL_LOAD_DWORD %1, 64, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: reg_pressure{{$}}
# GCN: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
# GCN-NEXT: dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
# GCN-NEXT: dead %6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, implicit $exec
# GCN-NEXT: dead %7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, implicit $exec
# GCN-NEXT: dead %8:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, implicit $exec
# GCN-NEXT: KILL %0{{$}}
# GCN-NEXT: dead %9:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 128, 0, implicit $exec
# GCN-NEXT: dead %10:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 144, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
---
name: reg_pressure
tracksRegLiveness: true
body: |
bb.0:
%0:vreg_64 = IMPLICIT_DEF
%1:vreg_64 = IMPLICIT_DEF
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
%5:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 48, 0, implicit $exec
%6:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 64, 0, implicit $exec
%7:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 80, 0, implicit $exec
%8:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 96, 0, implicit $exec
%9:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 128, 0, implicit $exec
%10:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 144, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: image_clause{{$}}
# GCN: %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: %5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL undef %2:sgpr_128{{$}}
# GCN-NEXT: IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec
---
name: image_clause
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: sgpr_256 }
- { id: 2, class: sgpr_128 }
- { id: 3, class: vreg_128 }
- { id: 4, class: vreg_128 }
- { id: 5, class: vreg_128 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
%4:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
%5:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, undef %2:sgpr_128, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
IMAGE_STORE_V4_V2 %3, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128))
IMAGE_STORE_V4_V2 %4, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128))
IMAGE_STORE_V4_V2 %5, %0, %1, 15, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (store (s128))
...
# GCN-LABEL: {{^}}name: mixed_clause{{$}}
# GCN: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
# GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
# GCN-NEXT: KILL %2{{$}}
# GCN-NEXT: KILL %0{{$}}
---
name: mixed_clause
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: sgpr_256 }
- { id: 2, class: sgpr_128 }
- { id: 3, class: vreg_128 }
- { id: 4, class: vreg_128 }
- { id: 5, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = IMPLICIT_DEF
%3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
%5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %2, 0, 0, 0, 0, implicit $exec
...
# GCN-LABEL: {{^}}name: atomic{{$}}
# GCN: %1:vgpr_32 = IMPLICIT_DEF
# GCN-NEXT: dead %2:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 2, implicit $exec, implicit $flat_scr
# GCN-NEXT: dead %3:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 2, implicit $exec, implicit $flat_scr
# GCN-NEXT: FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr
# GCN-NEXT: S_ENDPGM 0
---
name: atomic
tracksRegLiveness: true
registers:
- { id: 0, class: vreg_64 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 2, implicit $exec, implicit $flat_scr
%3:vgpr_32 = FLAT_ATOMIC_ADD_RTN %0, %1, 0, 2, implicit $exec, implicit $flat_scr
FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr
FLAT_ATOMIC_ADD %0, %1, 0, 0, implicit $exec, implicit $flat_scr
S_ENDPGM 0
...
# One of the pointers has an additional use after the clause, but one
# doesn't. Only the final use should be killed.
# GCN-LABEL: {{^}}name: ptr_use_after_clause{{$}}
# GCN: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
# GCN-NEXT: dead %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 48, 0, implicit $exec
# GCN-NEXT: KILL %1{{$}}
# GCN-NEXT: S_NOP 0, implicit %0
---
name: ptr_use_after_clause
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_64 = COPY $vgpr2_vgpr3
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 16, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 32, 0, implicit $exec
%5:vreg_128 = GLOBAL_LOAD_DWORDX4 %1, 48, 0, implicit $exec
S_NOP 0, implicit %0
...
# Only part of the register is really live past the clause.
# GCN-LABEL: {{^}}name: ptr_use_after_clause_subreg{{$}}
# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, implicit $exec
# GCN-NEXT: KILL %0.sub2_sub3{{$}}
# GCN-NEXT: S_NOP 0, implicit %0.sub0_sub1{{$}}
---
name: ptr_use_after_clause_subreg
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
%0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, implicit $exec
S_NOP 0, implicit %0.sub0_sub1
...
# More complex situation where only some of the use subregisters live
# beyond the clause.
# GCN-LABEL: {{^}}name: ptr_use_after_clause_subreg_multi{{$}}
# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub3_sub4, 32, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub6_sub7, 48, 0, implicit $exec
# GCN-NEXT: KILL %0.sub2_sub3_sub4, %0.sub7{{$}}
# GCN-NEXT: S_NOP 0, implicit %0.sub0_sub1, implicit %0.sub5_sub6
---
name: ptr_use_after_clause_subreg_multi
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
%0:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub3_sub4, 32, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub6_sub7, 48, 0, implicit $exec
S_NOP 0, implicit %0.sub0_sub1, implicit %0.sub5_sub6
...
# Have subranges, but none of them are killed
# GCN-LABEL: {{^}}name: no_killed_subranges{{$}}
# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, implicit $exec
# GCN-NEXT: dead %3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, implicit $exec
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, implicit $exec
# GCN-NEXT: S_NOP 0, implicit %0.sub0_sub1{{$}}
# GCN-NEXT: S_NOP 0, implicit %0.sub2_sub3{{$}}
---
name: no_killed_subranges
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
%0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 16, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, implicit $exec
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub2_sub3, 48, 0, implicit $exec
S_NOP 0, implicit %0.sub0_sub1
S_NOP 0, implicit %0.sub2_sub3
...
# sub2 is undef at entry to the soft clause. It should not be have its
# live range extended.
# GCN-LABEL: name: no_killed_undef_subrange_use
# GCN: dead %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
# GCN-NEXT: dead %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, implicit $exec
# GCN-NEXT: KILL %0.sub0_sub1{{$}}
# GCN-NEXT: %0.sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
# GCN-NEXT: S_NOP 0, implicit %0.sub2
---
name: no_killed_undef_subrange_use
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
undef %0.sub0_sub1:vreg_128 = COPY $vgpr0_vgpr1
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 0, 0, implicit $exec
%3:vreg_128 = GLOBAL_LOAD_DWORDX4 %0.sub0_sub1, 32, 0, implicit $exec
%0.sub2:vreg_128 = V_MOV_B32_e32 0, implicit $exec
S_NOP 0, implicit %0.sub2
...
# Make sure intervening implicit_defs are not treated as breaking the
# clause
#
# GCN-LABEL: {{^}}name: implicit_def_no_break{{$}}
# GCN: %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
# GCN-NEXT: %2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
# GCN-NEXT: %3:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 32, 0, implicit $exec
# GCN-NEXT: %5:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 48, 0, implicit $exec
# GCN-NEXT: KILL %3{{$}}
# GCN-NEXT: KILL %0{{$}}
---
name: implicit_def_no_break
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
%2:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 16, 0, implicit $exec
%3:vreg_64 = IMPLICIT_DEF
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 32, 0, implicit $exec
%5:vreg_128 = GLOBAL_LOAD_DWORDX4 %3, 48, 0, implicit $exec
S_NOP 0, implicit %1, implicit %2, implicit %4, implicit %5
...
# GCN-LABEL: {{^}}name: kill_part_subreg{{$}}
# GCN: undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, implicit $exec
# GCN-NEXT: %1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, implicit $exec
# GCN-NEXT: %1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
# GCN-NEXT: KILL %0.sub0_sub1_sub2, %0.sub3
---
name: kill_part_subreg
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
%0:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
undef %1.sub0:vreg_128 = GLOBAL_LOAD_DWORD %0.sub0_sub1, 0, 0, implicit $exec
%1.sub1:vreg_128 = GLOBAL_LOAD_DWORD %0.sub1_sub2, 16, 0, implicit $exec
%1.sub3:vreg_128 = GLOBAL_LOAD_DWORD %0.sub2_sub3, 32, 0, implicit $exec
S_NOP 0, implicit %1
...
# GCN-LABEL: {{^}}name: mem_clause_sreg256_used_stack{{$}}
# GCN: undef %0.sub7:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 8, 0
# GCN-NEXT: dead %0.sub3:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 24, 0
# GCN-NEXT: KILL undef %1
---
name: mem_clause_sreg256_used_stack
stack:
- { id: 0, type: default, offset: 0, size: 40, alignment: 8 }
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
undef %0.sub7:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 8, 0
%0.sub3:sgpr_256 = S_LOAD_DWORD_IMM undef %1:sgpr_64(p4), 24, 0
S_ENDPGM 0
...