Files
clang-p2996/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
Matt Arsenault 81b2c23b77 AMDGPU: Use kill instruction to hint soft clause live ranges
Previously we would use a bundle to hint the register allocator to not
overwrite the pointers in a sequence of loads to avoid breaking soft
clauses. This bundling was based on a fuzzy register pressure
heuristic, so we could not guarantee using more registers than are
really available. This would result in register allocator failing on
unsatisfiable bundles. Use a kill to artificially extend the live
ranges, so we can always succeed at register allocation even if it
means extra spills in the worst case.

This seems to capture most of the benefit of the bundle while avoiding
most of the risk presented by the bundle. However the lit tests do
show a handful of regressions. In some cases with sequences of
volatile loads, unused load components end up getting reallocated to
the next load which forces a wait between. There are also a few small
scheduling regressions where a hazard used to be avoided, and one
spill torture test which for some reason nearly doubles the stack
usage. There is also a bit of noise from leftover kills (it may make
sense for post-RA pseudos to strip all of these out).
2021-02-26 18:26:40 -05:00

819 lines
68 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx1010 -stop-after=greedy < %s | FileCheck %s
%llpc.array.element = type <{ i32, [12 x i8] }>
%llpc.array.element.2 = type <{ i32, [12 x i8] }>
%llpc.array.element.5 = type <{ i32, [12 x i8] }>
define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x i32> inreg %userData) {
; CHECK-LABEL: name: _amdgpu_gs_main
; CHECK: bb.0..expVert:
; CHECK: liveins: $sgpr3, $sgpr4, $sgpr5, $sgpr8, $sgpr9, $sgpr10, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr25, $sgpr27, $sgpr31
; CHECK: undef %56.sub0:sgpr_64 = COPY $sgpr31
; CHECK: SI_SPILL_S32_SAVE $sgpr27, %stack.2, implicit $exec, implicit $sgpr32 :: (store 4 into %stack.2, addrspace 5)
; CHECK: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr25
; CHECK: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr5
; CHECK: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
; CHECK: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr18
; CHECK: undef %50.sub0:sgpr_64 = COPY $sgpr19
; CHECK: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr20
; CHECK: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr21
; CHECK: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr22
; CHECK: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr23
; CHECK: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr9
; CHECK: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr10
; CHECK: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr8
; CHECK: undef %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %56, 232, 0, 0 :: (load 8 from %ir.40, addrspace 4)
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
; CHECK: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc
; CHECK: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 4, implicit-def dead $scc
; CHECK: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY1]], 4, implicit-def dead $scc
; CHECK: [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_]], 31, implicit-def dead $scc
; CHECK: [[S_ASHR_I32_1:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_1]], 31, implicit-def dead $scc
; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc
; CHECK: SI_SPILL_S32_SAVE [[S_AND_B32_]], %stack.0, implicit $exec, implicit $sgpr32 :: (store 4 into %stack.0, addrspace 5)
; CHECK: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc
; CHECK: undef %130.sub0:sreg_64 = S_ADD_U32 [[COPY4]], [[S_LSHL_B32_2]], implicit-def $scc
; CHECK: %130.sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %130, 16, 0, 0 :: (load 16 from %ir.84, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0, 0 :: (load 16 from `<4 x i32> addrspace(4)* undef`, addrspace 4)
; CHECK: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %132:sgpr_128, 0, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: KILL undef %74:sreg_64
; CHECK: KILL undef %132:sgpr_128
; CHECK: KILL %130.sub0, %130.sub1
; CHECK: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: %71.sub3:sgpr_128 = S_MOV_B32 553734060
; CHECK: %71.sub2:sgpr_128 = S_MOV_B32 -1
; CHECK: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: KILL undef %89:sgpr_128
; CHECK: KILL undef %118:sgpr_128
; CHECK: SI_SPILL_S128_SAVE %71, %stack.1, implicit $exec, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5)
; CHECK: %71.sub1:sgpr_128 = S_MOV_B32 0
; CHECK: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc
; CHECK: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc
; CHECK: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc
; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY4]], 64, implicit-def $scc
; CHECK: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %54:sreg_32, 0, implicit-def dead $scc, implicit $scc
; CHECK: undef %149.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_]], implicit-def $scc
; CHECK: %149.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
; CHECK: undef %156.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_1]], implicit-def $scc
; CHECK: %156.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
; CHECK: undef %163.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_2]], implicit-def $scc
; CHECK: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %149, 0, 0, 0 :: (load 16 from %ir.91, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %156, 0, 0, 0 :: (load 16 from %ir.97, addrspace 4)
; CHECK: KILL %156.sub0, %156.sub1
; CHECK: KILL %149.sub0, %149.sub1
; CHECK: %163.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %171:sreg_32, 31, implicit-def dead $scc
; CHECK: undef %176.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], undef %171:sreg_32, implicit-def $scc
; CHECK: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %163, 0, 0, 0 :: (load 16 from %ir.103, addrspace 4)
; CHECK: %176.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
; CHECK: undef %183.sub0:sreg_64 = S_ADD_U32 %50.sub0, [[S_LSHL_B32_]], implicit-def $scc
; CHECK: %183.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
; CHECK: undef %190.sub0:sreg_64 = S_ADD_U32 %50.sub0, [[S_LSHL_B32_1]], implicit-def $scc
; CHECK: %190.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
; CHECK: undef %200.sub0:sreg_64 = S_ADD_U32 %50.sub0, undef %171:sreg_32, implicit-def $scc
; CHECK: %200.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 %50.sub0, 224, implicit-def $scc
; CHECK: [[S_ADDC_U32_1:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc
; CHECK: undef %210.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_]], implicit-def $scc
; CHECK: %210.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
; CHECK: undef %217.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_1]], implicit-def $scc
; CHECK: %217.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
; CHECK: undef %224.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_2]], implicit-def $scc
; CHECK: %224.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_ADD_U32_2:%[0-9]+]]:sreg_32 = S_ADD_U32 %50.sub0, 576, implicit-def $scc
; CHECK: [[S_ADDC_U32_2:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc
; CHECK: undef %241.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], [[S_LSHL_B32_]], implicit-def $scc
; CHECK: %241.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
; CHECK: undef %253.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], [[S_LSHL_B32_2]], implicit-def $scc
; CHECK: %253.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK: undef %261.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], undef %171:sreg_32, implicit-def $scc
; CHECK: %261.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
; CHECK: undef %273.sub0:sreg_64 = S_ADD_U32 [[COPY5]], [[S_LSHL_B32_]], implicit-def $scc
; CHECK: %273.sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
; CHECK: undef %286.sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_1]], implicit-def $scc
; CHECK: %286.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
; CHECK: undef %293.sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc
; CHECK: %293.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc
; CHECK: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc
; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, undef %314:sreg_32, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %71, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 16, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %369:sgpr_128, undef %370:sreg_32, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %380:sgpr_128, 16, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %176, 0, 0, 0 :: (load 16 from %ir.111, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %183, 0, 0, 0 :: (load 16 from %ir.117, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %190, 0, 0, 0 :: (load 16 from %ir.123, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %200, 0, 0, 0 :: (load 16 from %ir.131, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %210, 0, 0, 0 :: (load 16 from %ir.138, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %364:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %375:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR]], -98, implicit-def dead $scc
; CHECK: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR1]], -114, implicit-def dead $scc
; CHECK: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR2]], -130, implicit-def dead $scc
; CHECK: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc
; CHECK: undef %327.sub0:sreg_64 = S_ADD_U32 [[COPY7]], [[S_LSHL_B32_]], implicit-def $scc
; CHECK: %327.sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
; CHECK: undef %335.sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc
; CHECK: %335.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
; CHECK: undef %343.sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_1]], implicit-def $scc
; CHECK: %343.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
; CHECK: undef %351.sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_2]], implicit-def $scc
; CHECK: %351.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY9]], 4, implicit-def dead $scc
; CHECK: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %396:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %50, 224, 0, 0 :: (load 16 from %ir.155, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %217, 0, 0, 0 :: (load 16 from %ir.144, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %224, 0, 0, 0 :: (load 16 from %ir.150, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %241, 0, 0, 0 :: (load 16 from %ir.162, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %253, 0, 0, 0 :: (load 16 from %ir.170, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR4]], -217, implicit-def dead $scc
; CHECK: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -233, implicit-def dead $scc
; CHECK: [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR5]], -249, implicit-def dead $scc
; CHECK: [[S_ADD_I32_10:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM3]], -297, implicit-def dead $scc
; CHECK: [[S_ADD_I32_11:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -313, implicit-def dead $scc
; CHECK: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -329, implicit-def dead $scc
; CHECK: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -345, implicit-def dead $scc
; CHECK: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR6]], -441, implicit-def dead $scc
; CHECK: [[S_ADD_U32_3:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], 160, implicit-def $scc
; CHECK: [[S_ADDC_U32_3:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %36:sreg_32, 0, implicit-def dead $scc, implicit $scc
; CHECK: undef %411.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_2]], implicit-def $scc
; CHECK: %411.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY10]], 4, implicit-def dead $scc
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc
; CHECK: undef %425.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_4]], implicit-def $scc
; CHECK: %425.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_ADD_U32_4:%[0-9]+]]:sreg_32 = S_ADD_U32 %56.sub0, 168, implicit-def $scc
; CHECK: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %57:sreg_32, 0, implicit-def dead $scc, implicit $scc
; CHECK: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc
; CHECK: undef %441.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_5]], implicit-def $scc
; CHECK: %441.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: %71.sub0:sgpr_128 = S_LOAD_DWORD_IMM %441, 0, 0, 0 :: (load 4 from %ir..i085.i, align 8, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %261, 0, 0, 0 :: (load 16 from %ir.176, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %273, 0, 0, 0 :: (load 16 from %ir.185, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %286, 0, 0, 0 :: (load 16 from %ir.194, addrspace 4)
; CHECK: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %293, 0, 0, 0 :: (load 16 from %ir.200, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 3, implicit-def dead $scc
; CHECK: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc
; CHECK: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc
; CHECK: undef %453.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_6]], implicit-def $scc
; CHECK: %453.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc
; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %453, 0, 0, 0 :: (load 8 from %ir.304, addrspace 4)
; CHECK: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0, 0 :: (load 16 from %ir.223, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0, 0 :: (load 16 from %ir.230, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %343, 0, 0, 0 :: (load 16 from %ir.236, addrspace 4)
; CHECK: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %351, 0, 0, 0 :: (load 16 from %ir.242, addrspace 4)
; CHECK: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc
; CHECK: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY1]], 3, implicit-def dead $scc
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc
; CHECK: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc
; CHECK: undef %468.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_7]], implicit-def $scc
; CHECK: %468.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc
; CHECK: %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %468, 0, 0, 0 :: (load 8 from %ir.316, addrspace 4)
; CHECK: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, [[S_MOV_B32_]], implicit-def dead $scc
; CHECK: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0, 0 :: (load 16 from %ir.278, addrspace 4)
; CHECK: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0, 0 :: (load 4 from `i32 addrspace(4)* undef`, addrspace 4)
; CHECK: KILL %411.sub0, %411.sub1
; CHECK: KILL undef %488:sreg_64
; CHECK: KILL %71.sub0_sub1
; CHECK: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY11]], 3, implicit-def dead $scc
; CHECK: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0, 0 :: (load 16 from %ir.287, addrspace 4)
; CHECK: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc
; CHECK: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc
; CHECK: undef %485.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_8]], implicit-def $scc
; CHECK: %485.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc
; CHECK: %71.sub0:sgpr_128 = S_LOAD_DWORD_IMM %485, 0, 0, 0 :: (load 4 from %ir..i0100.i, align 8, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: KILL [[S_LOAD_DWORDX4_IMM24]]
; CHECK: KILL [[S_LOAD_DWORDX4_IMM23]]
; CHECK: %71.sub1:sgpr_128 = S_AND_B32 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_]], implicit-def dead $scc
; CHECK: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc
; CHECK: [[S_ADD_I32_19:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -475, implicit-def dead $scc
; CHECK: [[S_ADD_I32_20:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -491, implicit-def dead $scc
; CHECK: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -507, implicit-def dead $scc
; CHECK: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -539, implicit-def dead $scc
; CHECK: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc
; CHECK: [[SI_SPILL_S32_RESTORE:%[0-9]+]]:sgpr_32 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load 4 from %stack.2, addrspace 5)
; CHECK: [[S_ADD_U32_5:%[0-9]+]]:sreg_32 = S_ADD_U32 [[SI_SPILL_S32_RESTORE]], 96, implicit-def $scc
; CHECK: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc
; CHECK: undef %514.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_]], implicit-def $scc
; CHECK: %514.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0, 0 :: (load 16 from %ir.347, addrspace 4)
; CHECK: undef %522.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_1]], implicit-def $scc
; CHECK: %522.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0, 0 :: (load 16 from %ir.353, addrspace 4)
; CHECK: undef %530.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_2]], implicit-def $scc
; CHECK: %530.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
; CHECK: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0, 0 :: (load 16 from %ir.359, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "BufferResource", align 1, addrspace 4)
; CHECK: KILL [[S_LOAD_DWORDX4_IMM27]]
; CHECK: KILL [[S_LOAD_DWORDX4_IMM25]]
; CHECK: KILL [[V_MOV_B32_e32_]]
; CHECK: KILL [[S_LOAD_DWORDX4_IMM26]]
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], implicit $exec
; CHECK: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]], implicit $exec
; CHECK: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -4, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_]], [[V_ADD_U32_e32_2]], implicit $exec
; CHECK: [[V_SUBREV_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 27, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_1]], [[V_ADD_U32_e32_3]], implicit $exec
; CHECK: [[V_SUBREV_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 28, [[BUFFER_LOAD_DWORD_OFFSET]], implicit $exec
; CHECK: [[V_OR_B32_e32_3:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_2]], [[V_SUBREV_U32_e32_]], implicit $exec
; CHECK: [[V_OR_B32_e32_4:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_3]], [[V_SUBREV_U32_e32_1]], implicit $exec
; CHECK: [[V_OR_B32_e32_5:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_SUB_I32_]], [[V_OR_B32_e32_4]], implicit $exec
; CHECK: [[V_OR_B32_e32_6:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_SUB_I32_1]], [[V_OR_B32_e32_5]], implicit $exec
; CHECK: [[V_OR_B32_e32_7:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_SUB_I32_2]], [[V_OR_B32_e32_6]], implicit $exec
; CHECK: [[V_SUBREV_U32_e32_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 32, [[BUFFER_LOAD_FORMAT_X_IDXEN2]], implicit $exec
; CHECK: [[V_SUBREV_U32_e32_3:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 33, [[BUFFER_LOAD_FORMAT_X_IDXEN3]], implicit $exec
; CHECK: [[V_OR_B32_e32_8:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_7]], [[V_SUBREV_U32_e32_2]], implicit $exec
; CHECK: [[V_SUBREV_U32_e32_4:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 34, [[BUFFER_LOAD_FORMAT_X_IDXEN4]], implicit $exec
; CHECK: [[V_OR_B32_e32_9:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_8]], [[V_SUBREV_U32_e32_3]], implicit $exec
; CHECK: [[V_SUBREV_U32_e32_5:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 36, [[BUFFER_LOAD_FORMAT_X_IDXEN5]], implicit $exec
; CHECK: [[V_OR_B32_e32_10:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_9]], [[V_SUBREV_U32_e32_4]], implicit $exec
; CHECK: [[V_SUBREV_U32_e32_6:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 37, [[BUFFER_LOAD_FORMAT_X_IDXEN6]], implicit $exec
; CHECK: [[V_OR_B32_e32_11:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_10]], [[V_SUBREV_U32_e32_5]], implicit $exec
; CHECK: [[V_SUBREV_U32_e32_7:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 38, [[BUFFER_LOAD_FORMAT_X_IDXEN7]], implicit $exec
; CHECK: [[V_OR_B32_e32_12:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_11]], [[V_SUBREV_U32_e32_6]], implicit $exec
; CHECK: [[V_SUBREV_U32_e32_8:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 39, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_13:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_12]], [[V_SUBREV_U32_e32_7]], implicit $exec
; CHECK: [[V_SUBREV_U32_e32_9:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 50, [[BUFFER_LOAD_FORMAT_X_IDXEN8]], implicit $exec
; CHECK: [[V_OR_B32_e32_14:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_13]], [[V_SUBREV_U32_e32_8]], implicit $exec
; CHECK: [[V_SUBREV_U32_e32_10:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 51, [[BUFFER_LOAD_FORMAT_X_IDXEN9]], implicit $exec
; CHECK: [[V_OR_B32_e32_15:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_14]], [[V_SUBREV_U32_e32_9]], implicit $exec
; CHECK: [[V_SUBREV_U32_e32_11:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 52, [[BUFFER_LOAD_FORMAT_X_IDXEN10]], implicit $exec
; CHECK: [[V_OR_B32_e32_16:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_15]], [[V_SUBREV_U32_e32_10]], implicit $exec
; CHECK: [[V_SUBREV_U32_e32_12:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 53, [[BUFFER_LOAD_FORMAT_X_IDXEN11]], implicit $exec
; CHECK: [[V_OR_B32_e32_17:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_16]], [[V_SUBREV_U32_e32_11]], implicit $exec
; CHECK: [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -72, [[BUFFER_LOAD_FORMAT_X_IDXEN12]], implicit $exec
; CHECK: [[V_OR_B32_e32_18:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_17]], [[V_SUBREV_U32_e32_12]], implicit $exec
; CHECK: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -73, [[BUFFER_LOAD_FORMAT_X_IDXEN13]], implicit $exec
; CHECK: [[V_OR_B32_e32_19:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_18]], [[V_ADD_U32_e32_4]], implicit $exec
; CHECK: [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -74, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_20:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_19]], [[V_ADD_U32_e32_5]], implicit $exec
; CHECK: [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -75, [[BUFFER_LOAD_FORMAT_X_IDXEN14]], implicit $exec
; CHECK: [[V_OR_B32_e32_21:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_20]], [[V_ADD_U32_e32_6]], implicit $exec
; CHECK: [[V_ADD_U32_e32_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -77, [[BUFFER_LOAD_FORMAT_X_IDXEN15]], implicit $exec
; CHECK: [[V_OR_B32_e32_22:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_21]], [[V_ADD_U32_e32_7]], implicit $exec
; CHECK: [[V_ADD_U32_e32_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -93, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_23:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_22]], [[V_ADD_U32_e32_8]], implicit $exec
; CHECK: [[V_ADD_U32_e32_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -94, [[BUFFER_LOAD_FORMAT_X_IDXEN16]], implicit $exec
; CHECK: [[V_OR_B32_e32_24:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_23]], [[V_ADD_U32_e32_9]], implicit $exec
; CHECK: [[V_ADD_U32_e32_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -95, [[BUFFER_LOAD_DWORD_OFFSET1]], implicit $exec
; CHECK: [[V_OR_B32_e32_25:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_24]], [[V_ADD_U32_e32_10]], implicit $exec
; CHECK: [[V_ADD_U32_e32_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -96, [[BUFFER_LOAD_DWORD_OFFSET2]], implicit $exec
; CHECK: [[V_OR_B32_e32_26:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_25]], [[V_ADD_U32_e32_11]], implicit $exec
; CHECK: [[V_ADD_U32_e32_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -97, [[BUFFER_LOAD_DWORD_OFFSET3]], implicit $exec
; CHECK: [[V_OR_B32_e32_27:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_26]], [[V_ADD_U32_e32_12]], implicit $exec
; CHECK: [[V_OR_B32_e32_28:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_27]], [[V_ADD_U32_e32_13]], implicit $exec
; CHECK: [[V_OR_B32_e32_29:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_2]], [[V_OR_B32_e32_28]], implicit $exec
; CHECK: [[V_OR_B32_e32_30:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_3]], [[V_OR_B32_e32_29]], implicit $exec
; CHECK: [[V_OR_B32_e32_31:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_4]], [[V_OR_B32_e32_30]], implicit $exec
; CHECK: [[V_ADD_U32_e32_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -194, [[BUFFER_LOAD_FORMAT_X_IDXEN17]], implicit $exec
; CHECK: [[V_OR_B32_e32_32:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_5]], [[V_OR_B32_e32_31]], implicit $exec
; CHECK: [[V_ADD_U32_e32_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -195, [[BUFFER_LOAD_FORMAT_X_IDXEN18]], implicit $exec
; CHECK: [[V_OR_B32_e32_33:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_32]], [[V_ADD_U32_e32_14]], implicit $exec
; CHECK: [[V_ADD_U32_e32_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -196, [[BUFFER_LOAD_FORMAT_X_IDXEN19]], implicit $exec
; CHECK: [[V_OR_B32_e32_34:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_33]], [[V_ADD_U32_e32_15]], implicit $exec
; CHECK: [[V_ADD_U32_e32_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -197, [[BUFFER_LOAD_FORMAT_X_IDXEN20]], implicit $exec
; CHECK: [[V_OR_B32_e32_35:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_34]], [[V_ADD_U32_e32_16]], implicit $exec
; CHECK: [[V_ADD_U32_e32_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -216, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_36:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_35]], [[V_ADD_U32_e32_17]], implicit $exec
; CHECK: [[V_OR_B32_e32_37:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_36]], [[V_ADD_U32_e32_18]], implicit $exec
; CHECK: [[V_OR_B32_e32_38:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_7]], [[V_OR_B32_e32_37]], implicit $exec
; CHECK: [[V_OR_B32_e32_39:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_8]], [[V_OR_B32_e32_38]], implicit $exec
; CHECK: [[V_OR_B32_e32_40:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_9]], [[V_OR_B32_e32_39]], implicit $exec
; CHECK: [[V_OR_B32_e32_41:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_10]], [[V_OR_B32_e32_40]], implicit $exec
; CHECK: [[V_OR_B32_e32_42:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_11]], [[V_OR_B32_e32_41]], implicit $exec
; CHECK: [[V_OR_B32_e32_43:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_12]], [[V_OR_B32_e32_42]], implicit $exec
; CHECK: [[V_OR_B32_e32_44:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_13]], [[V_OR_B32_e32_43]], implicit $exec
; CHECK: [[V_ADD_U32_e32_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -457, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_45:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_14]], [[V_OR_B32_e32_44]], implicit $exec
; CHECK: [[V_ADD_U32_e32_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -458, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_46:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_45]], [[V_ADD_U32_e32_19]], implicit $exec
; CHECK: [[V_ADD_U32_e32_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -459, [[BUFFER_LOAD_FORMAT_X_IDXEN21]], implicit $exec
; CHECK: [[V_OR_B32_e32_47:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_46]], [[V_ADD_U32_e32_20]], implicit $exec
; CHECK: [[V_ADD_U32_e32_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -466, [[BUFFER_LOAD_FORMAT_X_IDXEN22]], implicit $exec
; CHECK: [[V_OR_B32_e32_48:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_47]], [[V_ADD_U32_e32_21]], implicit $exec
; CHECK: [[V_OR_B32_e32_49:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_48]], [[V_ADD_U32_e32_22]], implicit $exec
; CHECK: [[V_OR_B32_e32_50:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_15]], [[V_OR_B32_e32_49]], implicit $exec
; CHECK: [[V_OR_B32_e32_51:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_16]], [[V_OR_B32_e32_50]], implicit $exec
; CHECK: [[V_OR_B32_e32_52:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_17]], [[V_OR_B32_e32_51]], implicit $exec
; CHECK: [[V_OR_B32_e32_53:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_23]], [[V_OR_B32_e32_52]], implicit $exec
; CHECK: [[V_OR_B32_e32_54:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_18]], [[V_OR_B32_e32_53]], implicit $exec
; CHECK: [[V_OR_B32_e32_55:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_19]], [[V_OR_B32_e32_54]], implicit $exec
; CHECK: [[V_OR_B32_e32_56:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_20]], [[V_OR_B32_e32_55]], implicit $exec
; CHECK: [[V_OR_B32_e32_57:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_21]], [[V_OR_B32_e32_56]], implicit $exec
; CHECK: [[V_OR_B32_e32_58:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_22]], [[V_OR_B32_e32_57]], implicit $exec
; CHECK: [[V_ADD_U32_e32_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -555, [[BUFFER_LOAD_FORMAT_X_IDXEN23]], implicit $exec
; CHECK: [[V_ADD_U32_e32_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -556, [[BUFFER_LOAD_FORMAT_X_IDXEN24]], implicit $exec
; CHECK: [[V_OR_B32_e32_59:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_58]], [[V_ADD_U32_e32_23]], implicit $exec
; CHECK: [[V_ADD_U32_e32_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -557, [[BUFFER_LOAD_FORMAT_X_IDXEN25]], implicit $exec
; CHECK: [[V_OR_B32_e32_60:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_59]], [[V_ADD_U32_e32_24]], implicit $exec
; CHECK: [[V_ADD_U32_e32_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -574, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_61:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_60]], [[V_ADD_U32_e32_25]], implicit $exec
; CHECK: [[V_ADD_U32_e32_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -575, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_62:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_61]], [[V_ADD_U32_e32_26]], implicit $exec
; CHECK: [[SI_SPILL_S32_RESTORE1:%[0-9]+]]:sreg_32_xm0_xexec = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 4 from %stack.0, addrspace 5)
; CHECK: [[SI_SPILL_S128_RESTORE:%[0-9]+]]:sgpr_128 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load 16 from %stack.1, align 4, addrspace 5)
; CHECK: undef %914.sub2_sub3:sgpr_128 = COPY [[SI_SPILL_S128_RESTORE]].sub2_sub3 {
; CHECK: internal %914.sub0:sgpr_128 = COPY [[SI_SPILL_S128_RESTORE]].sub0
; CHECK: }
; CHECK: %914.sub1:sgpr_128 = COPY [[SI_SPILL_S32_RESTORE1]]
; CHECK: [[S_BUFFER_LOAD_DWORD_IMM8:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %914, 0, 0, 0 :: (dereferenceable invariant load 4)
; CHECK: [[V_ADD_U32_e32_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -576, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_63:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_62]], [[V_ADD_U32_e32_27]], implicit $exec
; CHECK: [[V_ADD_U32_e32_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -577, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_63]], [[V_ADD_U32_e32_28]], implicit $exec
; CHECK: [[V_ADD_U32_e32_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
; CHECK: [[V_OR_B32_e32_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_64]], [[V_ADD_U32_e32_29]], implicit $exec
; CHECK: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %564:sreg_64, 0, 0, 0 :: (load 32 from `<8 x i32> addrspace(4)* undef`, addrspace 4)
; CHECK: [[V_OR_B32_e32_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_65]], [[V_ADD_U32_e32_30]], implicit $exec
; CHECK: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc
; CHECK: [[V_OR_B32_e32_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_24]], [[V_OR_B32_e32_66]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e32_67]], implicit $exec
; CHECK: undef %691.sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec
; CHECK: IMAGE_STORE_V4_V2_gfx10 %691, undef %578:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom "ImageResource")
; CHECK: S_ENDPGM 0
.expVert:
%0 = extractelement <31 x i32> %userData, i64 2
%1 = extractelement <31 x i32> %userData, i64 3
%2 = extractelement <31 x i32> %userData, i64 4
%3 = extractelement <31 x i32> %userData, i64 7
%4 = extractelement <31 x i32> %userData, i64 8
%5 = extractelement <31 x i32> %userData, i64 9
%6 = extractelement <31 x i32> %userData, i64 17
%7 = extractelement <31 x i32> %userData, i64 18
%8 = extractelement <31 x i32> %userData, i64 19
%9 = extractelement <31 x i32> %userData, i64 20
%10 = extractelement <31 x i32> %userData, i64 21
%11 = extractelement <31 x i32> %userData, i64 22
%12 = extractelement <31 x i32> %userData, i64 24
%13 = extractelement <31 x i32> %userData, i64 26
%14 = extractelement <31 x i32> %userData, i64 30
%15 = insertelement <2 x i32> undef, i32 %13, i32 0
%16 = bitcast <2 x i32> %15 to i64
%17 = inttoptr i64 %16 to i8 addrspace(4)*
%18 = insertelement <2 x i32> undef, i32 %12, i32 0
%19 = bitcast <2 x i32> %18 to i64
%20 = inttoptr i64 %19 to i8 addrspace(4)*
%21 = insertelement <2 x i32> undef, i32 %11, i32 0
%22 = bitcast <2 x i32> %21 to i64
%23 = insertelement <2 x i32> undef, i32 %10, i32 0
%24 = bitcast <2 x i32> %23 to i64
%25 = insertelement <2 x i32> undef, i32 %9, i32 0
%26 = bitcast <2 x i32> %25 to i64
%27 = inttoptr i64 %26 to i8 addrspace(4)*
%28 = insertelement <2 x i32> undef, i32 %8, i32 0
%29 = bitcast <2 x i32> %28 to i64
%30 = insertelement <2 x i32> undef, i32 %7, i32 0
%31 = bitcast <2 x i32> %30 to i64
%32 = inttoptr i64 %31 to i8 addrspace(4)*
%33 = insertelement <2 x i32> undef, i32 %6, i32 0
%34 = bitcast <2 x i32> %33 to i64
%35 = inttoptr i64 %34 to i8 addrspace(4)*
%36 = insertelement <2 x i32> undef, i32 %14, i32 0
%37 = bitcast <2 x i32> %36 to i64
%38 = inttoptr i64 %37 to i8 addrspace(4)*
%39 = getelementptr i8, i8 addrspace(4)* %38, i64 232
%.i0.i = bitcast i8 addrspace(4)* %39 to i32 addrspace(4)*
%rootDesc58.ii0.i = load i32, i32 addrspace(4)* %.i0.i, align 8
%.i184.i = getelementptr i8, i8 addrspace(4)* %38, i64 236
%40 = bitcast i8 addrspace(4)* %.i184.i to i32 addrspace(4)*
%rootDesc58.ii1.i = load i32, i32 addrspace(4)* %40, align 4
%41 = and i32 %rootDesc58.ii1.i, 65535
%42 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %rootDesc58.ii0.i, i32 0
%43 = insertelement <4 x i32> %42, i32 %41, i32 1
%44 = and i32 undef, 65535
%45 = insertelement <4 x i32> undef, i32 %44, i32 1
%46 = load <4 x i32>, <4 x i32> addrspace(4)* undef, align 16
%47 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %46, i32 0, i32 0, i32 0, i32 0)
%48 = add i32 %47, -1
%49 = shl i32 %0, 4
%50 = call i32 @llvm.amdgcn.readfirstlane(i32 %49)
%51 = sext i32 %50 to i64
%52 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%53 = add i32 %52, -2
%54 = or i32 %53, %48
%55 = shl i32 %1, 4
%56 = call i32 @llvm.amdgcn.readfirstlane(i32 %55)
%57 = sext i32 %56 to i64
%58 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%59 = add i32 %58, -3
%60 = or i32 %54, %59
%61 = shl i32 %2, 4
%62 = call i32 @llvm.amdgcn.readfirstlane(i32 %61)
%63 = sext i32 %62 to i64
%64 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%65 = add i32 %64, -4
%66 = or i32 %60, %65
%67 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%68 = add i32 %67, -27
%69 = or i32 %66, %68
%70 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> undef, i32 0, i32 0, i32 0)
%71 = add i32 %70, -28
%72 = or i32 %69, %71
%73 = call i32 @llvm.amdgcn.readfirstlane(i32 %0)
%74 = getelementptr i8, i8 addrspace(4)* %35, i64 16
%75 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
%76 = add i32 %75, -29
%77 = or i32 %72, %76
%78 = call i32 @llvm.amdgcn.readfirstlane(i32 %1)
%79 = shl i32 %78, 4
%80 = sext i32 %79 to i64
%81 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
%82 = add i32 %81, -30
%83 = or i32 %77, %82
%84 = call i32 @llvm.amdgcn.readfirstlane(i32 %2)
%85 = shl i32 %84, 4
%86 = sext i32 %85 to i64
%87 = getelementptr i8, i8 addrspace(4)* %74, i64 %86
%88 = bitcast i8 addrspace(4)* %87 to <4 x i32> addrspace(4)*
%89 = load <4 x i32>, <4 x i32> addrspace(4)* %88, align 16
%90 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %89, i32 0, i32 0)
%91 = add i32 %90, -31
%92 = or i32 %83, %91
%93 = getelementptr i8, i8 addrspace(4)* %35, i64 64
%94 = getelementptr i8, i8 addrspace(4)* %93, i64 %51
%95 = bitcast i8 addrspace(4)* %94 to <4 x i32> addrspace(4)*
%96 = load <4 x i32>, <4 x i32> addrspace(4)* %95, align 16
%97 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %96, i32 0, i32 0, i32 0, i32 0)
%98 = add i32 %97, -32
%99 = or i32 %92, %98
%100 = getelementptr i8, i8 addrspace(4)* %93, i64 %57
%101 = bitcast i8 addrspace(4)* %100 to <4 x i32> addrspace(4)*
%102 = load <4 x i32>, <4 x i32> addrspace(4)* %101, align 16
%103 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %102, i32 0, i32 0, i32 0, i32 0)
%104 = add i32 %103, -33
%105 = or i32 %99, %104
%106 = getelementptr i8, i8 addrspace(4)* %93, i64 %63
%107 = bitcast i8 addrspace(4)* %106 to <4 x i32> addrspace(4)*
%108 = load <4 x i32>, <4 x i32> addrspace(4)* %107, align 16
%109 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %108, i32 0, i32 0, i32 0, i32 0)
%110 = add i32 %109, -34
%111 = or i32 %105, %110
%112 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
%113 = sext i32 %112 to i64
%114 = getelementptr i8, i8 addrspace(4)* %93, i64 %113
%115 = bitcast i8 addrspace(4)* %114 to <4 x i32> addrspace(4)*
%116 = load <4 x i32>, <4 x i32> addrspace(4)* %115, align 16
%117 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %116, i32 0, i32 0, i32 0, i32 0)
%118 = add i32 %117, -36
%119 = or i32 %111, %118
%120 = getelementptr i8, i8 addrspace(4)* %32, i64 %51
%121 = bitcast i8 addrspace(4)* %120 to <4 x i32> addrspace(4)*
%122 = load <4 x i32>, <4 x i32> addrspace(4)* %121, align 16
%123 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %122, i32 0, i32 0, i32 0, i32 0)
%124 = add i32 %123, -37
%125 = or i32 %119, %124
%126 = getelementptr i8, i8 addrspace(4)* %32, i64 %57
%127 = bitcast i8 addrspace(4)* %126 to <4 x i32> addrspace(4)*
%128 = load <4 x i32>, <4 x i32> addrspace(4)* %127, align 16
%129 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %128, i32 0, i32 0, i32 0, i32 0)
%130 = add i32 %129, -38
%131 = or i32 %125, %130
%132 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%133 = add i32 %132, -39
%134 = or i32 %131, %133
%135 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
%136 = sext i32 %135 to i64
%137 = getelementptr i8, i8 addrspace(4)* %32, i64 %136
%138 = bitcast i8 addrspace(4)* %137 to <4 x i32> addrspace(4)*
%139 = load <4 x i32>, <4 x i32> addrspace(4)* %138, align 16
%140 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %139, i32 0, i32 0, i32 0, i32 0)
%141 = add i32 %140, -50
%142 = or i32 %134, %141
%143 = getelementptr i8, i8 addrspace(4)* %32, i64 224
%144 = getelementptr i8, i8 addrspace(4)* %143, i64 %51
%145 = bitcast i8 addrspace(4)* %144 to <4 x i32> addrspace(4)*
%146 = load <4 x i32>, <4 x i32> addrspace(4)* %145, align 16
%147 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %146, i32 0, i32 0, i32 0, i32 0)
%148 = add i32 %147, -51
%149 = or i32 %142, %148
%150 = getelementptr i8, i8 addrspace(4)* %143, i64 %57
%151 = bitcast i8 addrspace(4)* %150 to <4 x i32> addrspace(4)*
%152 = load <4 x i32>, <4 x i32> addrspace(4)* %151, align 16
%153 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %152, i32 0, i32 0, i32 0, i32 0)
%154 = add i32 %153, -52
%155 = or i32 %149, %154
%156 = getelementptr i8, i8 addrspace(4)* %143, i64 %63
%157 = bitcast i8 addrspace(4)* %156 to <4 x i32> addrspace(4)*
%158 = load <4 x i32>, <4 x i32> addrspace(4)* %157, align 16
%159 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %158, i32 0, i32 0, i32 0, i32 0)
%160 = add i32 %159, -53
%161 = or i32 %155, %160
%162 = sext i32 undef to i64
%163 = getelementptr i8, i8 addrspace(4)* %143, i64 %162
%164 = bitcast i8 addrspace(4)* %163 to <4 x i32> addrspace(4)*
%165 = load <4 x i32>, <4 x i32> addrspace(4)* %164, align 16
%166 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %165, i32 0, i32 0, i32 0, i32 0)
%167 = add i32 %166, -72
%168 = or i32 %161, %167
%169 = getelementptr i8, i8 addrspace(4)* %32, i64 576
%170 = getelementptr i8, i8 addrspace(4)* %169, i64 %51
%171 = bitcast i8 addrspace(4)* %170 to <4 x i32> addrspace(4)*
%172 = load <4 x i32>, <4 x i32> addrspace(4)* %171, align 16
%173 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %172, i32 0, i32 0, i32 0, i32 0)
%174 = add i32 %173, -73
%175 = or i32 %168, %174
%176 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%177 = add i32 %176, -74
%178 = or i32 %175, %177
%179 = getelementptr i8, i8 addrspace(4)* %169, i64 %63
%180 = bitcast i8 addrspace(4)* %179 to <4 x i32> addrspace(4)*
%181 = load <4 x i32>, <4 x i32> addrspace(4)* %180, align 16
%182 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %181, i32 0, i32 0, i32 0, i32 0)
%183 = add i32 %182, -75
%184 = or i32 %178, %183
%185 = getelementptr i8, i8 addrspace(4)* %169, i64 %113
%186 = bitcast i8 addrspace(4)* %185 to <4 x i32> addrspace(4)*
%187 = load <4 x i32>, <4 x i32> addrspace(4)* %186, align 16
%188 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %187, i32 0, i32 0, i32 0, i32 0)
%189 = add i32 %188, -77
%190 = or i32 %184, %189
%191 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%192 = add i32 %191, -93
%193 = or i32 %190, %192
%194 = inttoptr i64 %29 to i8 addrspace(4)*
%195 = getelementptr i8, i8 addrspace(4)* %194, i64 %51
%196 = bitcast i8 addrspace(4)* %195 to <4 x i32> addrspace(4)*
%197 = load <4 x i32>, <4 x i32> addrspace(4)* %196, align 16
%198 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %197, i32 0, i32 0, i32 0, i32 0)
%199 = add i32 %198, -94
%200 = or i32 %193, %199
%201 = load <4 x i32>, <4 x i32> addrspace(4)* undef, align 16
%202 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %201, i32 0, i32 0, i32 0)
%203 = add i32 %202, -95
%204 = or i32 %200, %203
%205 = getelementptr i8, i8 addrspace(4)* %27, i64 %80
%206 = bitcast i8 addrspace(4)* %205 to <4 x i32> addrspace(4)*
%207 = load <4 x i32>, <4 x i32> addrspace(4)* %206, align 16
%208 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %207, i32 0, i32 0, i32 0)
%209 = add i32 %208, -96
%210 = or i32 %204, %209
%211 = getelementptr i8, i8 addrspace(4)* %27, i64 %86
%212 = bitcast i8 addrspace(4)* %211 to <4 x i32> addrspace(4)*
%213 = load <4 x i32>, <4 x i32> addrspace(4)* %212, align 16
%214 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %213, i32 0, i32 0, i32 0)
%215 = add i32 %214, -97
%216 = or i32 %210, %215
%217 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, <{ [4 x i32], [6 x %llpc.array.element] }> addrspace(6)* null, i32 0, i32 1, i32 %0, i32 0
%218 = ptrtoint i32 addrspace(6)* %217 to i32
%219 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %45, i32 %218, i32 0)
%220 = add i32 %219, -98
%221 = or i32 %216, %220
%222 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %45, i32 undef, i32 0)
%223 = add i32 %222, -114
%224 = or i32 %221, %223
%225 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, <{ [4 x i32], [6 x %llpc.array.element] }> addrspace(6)* null, i32 0, i32 1, i32 %2, i32 0
%226 = ptrtoint i32 addrspace(6)* %225 to i32
%227 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %45, i32 %226, i32 0)
%228 = add i32 %227, -130
%229 = or i32 %224, %228
%230 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, <{ [4 x i32], [6 x %llpc.array.element] }> addrspace(6)* null, i32 0, i32 1, i32 undef, i32 0
%231 = ptrtoint i32 addrspace(6)* %230 to i32
%232 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %45, i32 %231, i32 0)
%233 = add i32 %232, -178
%234 = or i32 %229, %233
%235 = inttoptr i64 %24 to i8 addrspace(4)*
%236 = getelementptr i8, i8 addrspace(4)* %235, i64 %51
%237 = bitcast i8 addrspace(4)* %236 to <4 x i32> addrspace(4)*
%238 = load <4 x i32>, <4 x i32> addrspace(4)* %237, align 16
%239 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %238, i32 0, i32 0, i32 0, i32 0)
%240 = add i32 %239, -194
%241 = or i32 %234, %240
%242 = inttoptr i64 %22 to i8 addrspace(4)*
%243 = getelementptr i8, i8 addrspace(4)* %242, i64 %51
%244 = bitcast i8 addrspace(4)* %243 to <4 x i32> addrspace(4)*
%245 = load <4 x i32>, <4 x i32> addrspace(4)* %244, align 16
%246 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %245, i32 0, i32 0, i32 0, i32 0)
%247 = add i32 %246, -195
%248 = or i32 %241, %247
%249 = getelementptr i8, i8 addrspace(4)* %242, i64 %57
%250 = bitcast i8 addrspace(4)* %249 to <4 x i32> addrspace(4)*
%251 = load <4 x i32>, <4 x i32> addrspace(4)* %250, align 16
%252 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %251, i32 0, i32 0, i32 0, i32 0)
%253 = add i32 %252, -196
%254 = or i32 %248, %253
%255 = getelementptr i8, i8 addrspace(4)* %242, i64 %63
%256 = bitcast i8 addrspace(4)* %255 to <4 x i32> addrspace(4)*
%257 = load <4 x i32>, <4 x i32> addrspace(4)* %256, align 16
%258 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %257, i32 0, i32 0, i32 0, i32 0)
%259 = add i32 %258, -197
%260 = or i32 %254, %259
%261 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%262 = add i32 %261, -216
%263 = or i32 %260, %262
%264 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, <{ [4 x i32], [6 x %llpc.array.element.2] }> addrspace(6)* null, i32 0, i32 1, i32 %0, i32 0
%265 = ptrtoint i32 addrspace(6)* %264 to i32
%266 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %265, i32 0)
%267 = add i32 %266, -217
%268 = or i32 %263, %267
%269 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
%270 = add i32 %269, -233
%271 = or i32 %268, %270
%272 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, <{ [4 x i32], [6 x %llpc.array.element.2] }> addrspace(6)* null, i32 0, i32 1, i32 %2, i32 0
%273 = ptrtoint i32 addrspace(6)* %272 to i32
%274 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %273, i32 0)
%275 = add i32 %274, -249
%276 = or i32 %271, %275
%277 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, <{ [4 x i32], [6 x %llpc.array.element.2] }> addrspace(6)* null, i32 0, i32 1, i32 undef, i32 0
%278 = ptrtoint i32 addrspace(6)* %277 to i32
%279 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %278, i32 0)
%280 = add i32 %279, -297
%281 = or i32 %276, %280
%282 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
%283 = add i32 %282, -313
%284 = or i32 %281, %283
%285 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
%286 = add i32 %285, -329
%287 = or i32 %284, %286
%288 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
%289 = add i32 %288, -345
%290 = or i32 %287, %289
%291 = getelementptr <{ [4 x i32], [9 x %llpc.array.element.5] }>, <{ [4 x i32], [9 x %llpc.array.element.5] }> addrspace(6)* null, i32 0, i32 1, i32 %4, i32 0
%292 = ptrtoint i32 addrspace(6)* %291 to i32
%293 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %292, i32 0)
%294 = add i32 %293, -441
%295 = or i32 %290, %294
%296 = getelementptr i8, i8 addrspace(4)* %20, i64 160
%297 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%298 = add i32 %297, -457
%299 = or i32 %295, %298
%300 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%301 = add i32 %300, -458
%302 = or i32 %299, %301
%303 = getelementptr i8, i8 addrspace(4)* %296, i64 %63
%304 = bitcast i8 addrspace(4)* %303 to <4 x i32> addrspace(4)*
%305 = load <4 x i32>, <4 x i32> addrspace(4)* %304, align 16
%306 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %305, i32 0, i32 0, i32 0, i32 0)
%307 = add i32 %306, -459
%308 = or i32 %302, %307
%309 = shl i32 %5, 4
%310 = call i32 @llvm.amdgcn.readfirstlane(i32 %309)
%311 = sext i32 %310 to i64
%312 = getelementptr i8, i8 addrspace(4)* %296, i64 %311
%313 = bitcast i8 addrspace(4)* %312 to <4 x i32> addrspace(4)*
%314 = load <4 x i32>, <4 x i32> addrspace(4)* %313, align 16
%315 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %314, i32 0, i32 0, i32 0, i32 0)
%316 = add i32 %315, -466
%317 = or i32 %308, %316
%318 = getelementptr i8, i8 addrspace(4)* %38, i64 168
%319 = shl i32 %73, 3
%320 = sext i32 %319 to i64
%321 = getelementptr i8, i8 addrspace(4)* %318, i64 %320
%.i085.i = bitcast i8 addrspace(4)* %321 to i32 addrspace(4)*
%.ii0.i = load i32, i32 addrspace(4)* %.i085.i, align 8
%322 = and i32 undef, 65535
%323 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii0.i, i32 0
%324 = insertelement <4 x i32> %323, i32 %322, i32 1
%325 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %324, i32 0, i32 0)
%326 = add i32 %325, -467
%327 = or i32 %317, %326
%328 = shl i32 %78, 3
%329 = sext i32 %328 to i64
%330 = getelementptr i8, i8 addrspace(4)* %318, i64 %329
%.i088.i = bitcast i8 addrspace(4)* %330 to i32 addrspace(4)*
%.ii090.i = load i32, i32 addrspace(4)* %.i088.i, align 8
%.i191.i = getelementptr i8, i8 addrspace(4)* %330, i64 4
%331 = bitcast i8 addrspace(4)* %.i191.i to i32 addrspace(4)*
%.ii192.i = load i32, i32 addrspace(4)* %331, align 4
%332 = and i32 %.ii192.i, 65535
%333 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii090.i, i32 0
%334 = insertelement <4 x i32> %333, i32 %332, i32 1
%335 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %334, i32 0, i32 0)
%336 = add i32 %335, -468
%337 = or i32 %327, %336
%338 = shl i32 %84, 3
%339 = sext i32 %338 to i64
%340 = getelementptr i8, i8 addrspace(4)* %318, i64 %339
%.i094.i = bitcast i8 addrspace(4)* %340 to i32 addrspace(4)*
%.ii096.i = load i32, i32 addrspace(4)* %.i094.i, align 8
%.i197.i = getelementptr i8, i8 addrspace(4)* %340, i64 4
%341 = bitcast i8 addrspace(4)* %.i197.i to i32 addrspace(4)*
%.ii198.i = load i32, i32 addrspace(4)* %341, align 4
%342 = and i32 %.ii198.i, 65535
%343 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii096.i, i32 0
%344 = insertelement <4 x i32> %343, i32 %342, i32 1
%345 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %344, i32 0, i32 0)
%346 = add i32 %345, -469
%347 = or i32 %337, %346
%348 = call i32 @llvm.amdgcn.readfirstlane(i32 %3)
%349 = shl i32 %348, 3
%350 = sext i32 %349 to i64
%351 = getelementptr i8, i8 addrspace(4)* %318, i64 %350
%.i0100.i = bitcast i8 addrspace(4)* %351 to i32 addrspace(4)*
%.ii0102.i = load i32, i32 addrspace(4)* %.i0100.i, align 8
%.ii1104.i = load i32, i32 addrspace(4)* undef, align 4
%352 = and i32 %.ii1104.i, 65535
%353 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii0102.i, i32 0
%354 = insertelement <4 x i32> %353, i32 %352, i32 1
%355 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %354, i32 0, i32 0)
%356 = add i32 %355, -473
%357 = or i32 %347, %356
%358 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
%359 = add i32 %358, -474
%360 = or i32 %357, %359
%361 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
%362 = add i32 %361, -475
%363 = or i32 %360, %362
%364 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
%365 = add i32 %364, -491
%366 = or i32 %363, %365
%367 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
%368 = add i32 %367, -507
%369 = or i32 %366, %368
%370 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
%371 = add i32 %370, -539
%372 = or i32 %369, %371
%373 = getelementptr i8, i8 addrspace(4)* %17, i64 96
%374 = getelementptr i8, i8 addrspace(4)* %373, i64 %51
%375 = bitcast i8 addrspace(4)* %374 to <4 x i32> addrspace(4)*
%376 = load <4 x i32>, <4 x i32> addrspace(4)* %375, align 16
%377 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %376, i32 0, i32 0, i32 0, i32 0)
%378 = add i32 %377, -555
%379 = or i32 %372, %378
%380 = getelementptr i8, i8 addrspace(4)* %373, i64 %57
%381 = bitcast i8 addrspace(4)* %380 to <4 x i32> addrspace(4)*
%382 = load <4 x i32>, <4 x i32> addrspace(4)* %381, align 16
%383 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %382, i32 0, i32 0, i32 0, i32 0)
%384 = add i32 %383, -556
%385 = or i32 %379, %384
%386 = getelementptr i8, i8 addrspace(4)* %373, i64 %63
%387 = bitcast i8 addrspace(4)* %386 to <4 x i32> addrspace(4)*
%388 = load <4 x i32>, <4 x i32> addrspace(4)* %387, align 16
%389 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %388, i32 0, i32 0, i32 0, i32 0)
%390 = add i32 %389, -557
%391 = or i32 %385, %390
%392 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%393 = add i32 %392, -574
%394 = or i32 %391, %393
%395 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%396 = add i32 %395, -575
%397 = or i32 %394, %396
%398 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%399 = add i32 %398, -576
%400 = or i32 %397, %399
%401 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%402 = add i32 %401, -577
%403 = or i32 %400, %402
%404 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
%405 = add i32 %404, -593
%406 = or i32 %403, %405
%407 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %43, i32 0, i32 0)
%408 = add i32 %407, -594
%409 = or i32 %406, %408
%.not.i = icmp eq i32 %409, 0
%410 = load <8 x i32>, <8 x i32> addrspace(4)* undef, align 32
%.i010.i = select i1 %.not.i, float 0x36A0000000000000, float 0.000000e+00
%411 = insertelement <4 x float> undef, float %.i010.i, i32 3
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %411, i32 15, i32 undef, i32 undef, <8 x i32> %410, i32 0, i32 0)
ret void
}
declare i32 @llvm.amdgcn.readfirstlane(i32)
declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)
declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32 immarg)
declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg)
declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)