In graphics shaders it is better overall to use NSA encoding for IMAGE instructions, because the benefit of less constrained register allocation outweighs the cost of larger encoding. In particular NSA form often avoids the need for extra V_MOV_B32 instructions between IMAGE instructions, which can allow the IMAGE instructions to be claused. Note that in GFX12 there is no longer a bit in the encoding to choose between NSA and non-NSA forms, so this only affects GFX10 and GFX11.
770 lines
66 KiB
LLVM
770 lines
66 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -stop-after=greedy < %s | FileCheck %s
|
|
|
|
; TODO: This was introduced in D88020 to catch a case that some unreachable
|
|
; assert was hit during liverange split. But after D104509, there is some IR
|
|
; change after register coalescer which make the case not work as before. We
|
|
; need to find some other way to reproduce the bad case fixed by D88020.
|
|
|
|
%llpc.array.element = type <{ i32, [12 x i8] }>
|
|
%llpc.array.element.2 = type <{ i32, [12 x i8] }>
|
|
%llpc.array.element.5 = type <{ i32, [12 x i8] }>
|
|
|
|
define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x i32> inreg %userData) {
|
|
; CHECK-LABEL: name: _amdgpu_gs_main
|
|
; CHECK: bb.0..expVert:
|
|
; CHECK-NEXT: liveins: $sgpr3, $sgpr4, $sgpr5, $sgpr8, $sgpr9, $sgpr10, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr25, $sgpr27, $sgpr31
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:sgpr_64 = COPY $sgpr31
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr27
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr25
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr5
|
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr4
|
|
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr3
|
|
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr18
|
|
; CHECK-NEXT: undef [[COPY7:%[0-9]+]].sub0:sgpr_64 = COPY $sgpr19
|
|
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr20
|
|
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr21
|
|
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr22
|
|
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr23
|
|
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr9
|
|
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr10
|
|
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr8
|
|
; CHECK-NEXT: undef [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 232, 0 :: (invariant load (s64) from %ir.39, addrspace 4)
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %125:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: KILL undef %125:sgpr_128
|
|
; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_1:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_1]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub1:sgpr_128 = S_AND_B32 [[S_LOAD_DWORDX2_IMM]].sub1, 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.81, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (invariant load (s128) from `ptr addrspace(4) undef`, addrspace 4)
|
|
; CHECK-NEXT: KILL undef %74:sreg_64
|
|
; CHECK-NEXT: KILL [[S_ADD_U32_]].sub0, [[S_ADD_U32_]].sub1
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: KILL undef %89:sgpr_128
|
|
; CHECK-NEXT: KILL undef %118:sgpr_128
|
|
; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_1:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_2:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_1]], 64, 0 :: (invariant load (s128) from %ir.87, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 64, 0 :: (invariant load (s128) from %ir.93, addrspace 4)
|
|
; CHECK-NEXT: KILL [[S_ADD_U32_1]].sub0, [[S_ADD_U32_1]].sub1
|
|
; CHECK-NEXT: KILL [[S_ADD_U32_2]].sub0, [[S_ADD_U32_2]].sub1
|
|
; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %169:sreg_32, 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %169:sreg_32, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %169:sreg_32, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_9:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %301:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_MOV_B32_]], 16, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %356:sgpr_128, undef %357:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %367:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.99, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 64, 0 :: (invariant load (s128) from %ir.107, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.117, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.124, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %351:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %362:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM2]], -130, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.112, addrspace 4)
|
|
; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_13:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %383:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.129, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.145, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 576, 0 :: (invariant load (s128) from %ir.150, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 224, 0 :: (invariant load (s128) from %ir.134, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 576, 0 :: (invariant load (s128) from %ir.162, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM4]], -217, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -233, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM5]], -249, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_10:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM3]], -297, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_11:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -313, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 224, 0 :: (invariant load (s128) from %ir.140, addrspace 4)
|
|
; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -329, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -345, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM6]], -441, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY13]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_4]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s32) from %ir.273, align 8, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 576, 0 :: (invariant load (s128) from %ir.157, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub3:sgpr_128 = S_MOV_B32 553734060
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 -1
|
|
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_9]], 0, 0 :: (invariant load (s128) from %ir.170, addrspace 4)
|
|
; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub1
|
|
; CHECK-NEXT: [[COPY15:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.178, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.183, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.282, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.205, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_13]], 0, 0 :: (invariant load (s128) from %ir.211, addrspace 4)
|
|
; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.216, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.221, addrspace 4)
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY16:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0
|
|
; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY16]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s64) from %ir.293, addrspace 4)
|
|
; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
|
|
; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM2]].sub1, 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY17:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM2]].sub0
|
|
; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_1]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY17]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.256, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %469:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) undef`, addrspace 4)
|
|
; CHECK-NEXT: KILL [[S_ADD_U32_16]].sub0, [[S_ADD_U32_16]].sub1
|
|
; CHECK-NEXT: KILL undef %469:sreg_64
|
|
; CHECK-NEXT: KILL [[COPY17]].sub0_sub1_sub2, [[COPY17]].sub3
|
|
; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 160, 0 :: (invariant load (s128) from %ir.265, addrspace 4)
|
|
; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_8]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_21]], 168, 0 :: (invariant load (s32) from %ir.305, align 8, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]]
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]]
|
|
; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
|
|
; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_2]]
|
|
; CHECK-NEXT: [[COPY18:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM2]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY18]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_19:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -475, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_20:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -491, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -507, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -539, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.323, addrspace 4)
|
|
; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 96, 0 :: (invariant load (s128) from %ir.329, addrspace 4)
|
|
; CHECK-NEXT: undef [[S_ADD_U32_24:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_24:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_24]], 96, 0 :: (invariant load (s128) from %ir.335, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]]
|
|
; CHECK-NEXT: KILL [[V_MOV_B32_e32_]]
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]]
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]]
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_ADD_U32_e64_]], [[V_ADD_U32_e64_1]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -4, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[V_ADD_U32_e64_2]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 27, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_2:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_1]], [[V_ADD_U32_e64_3]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 28, [[BUFFER_LOAD_DWORD_OFFSET]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_3:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_2]], [[V_SUBREV_U32_e64_]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_4:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_3]], [[V_SUBREV_U32_e64_1]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_5:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_SUB_I32_]], [[V_OR_B32_e64_4]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_6:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_SUB_I32_1]], [[V_OR_B32_e64_5]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_7:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_SUB_I32_2]], [[V_OR_B32_e64_6]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 32, [[BUFFER_LOAD_FORMAT_X_IDXEN2]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_3:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 33, [[BUFFER_LOAD_FORMAT_X_IDXEN3]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_8:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_7]], [[V_SUBREV_U32_e64_2]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_4:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 34, [[BUFFER_LOAD_FORMAT_X_IDXEN4]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_9:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_8]], [[V_SUBREV_U32_e64_3]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_5:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 36, [[BUFFER_LOAD_FORMAT_X_IDXEN5]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_10:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_9]], [[V_SUBREV_U32_e64_4]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_6:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 37, [[BUFFER_LOAD_FORMAT_X_IDXEN6]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_11:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_10]], [[V_SUBREV_U32_e64_5]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_7:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 38, [[BUFFER_LOAD_FORMAT_X_IDXEN7]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_12:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_11]], [[V_SUBREV_U32_e64_6]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_8:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 39, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_13:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_12]], [[V_SUBREV_U32_e64_7]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_9:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 50, [[BUFFER_LOAD_FORMAT_X_IDXEN8]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_14:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_13]], [[V_SUBREV_U32_e64_8]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_10:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 51, [[BUFFER_LOAD_FORMAT_X_IDXEN9]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_15:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_14]], [[V_SUBREV_U32_e64_9]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_11:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 52, [[BUFFER_LOAD_FORMAT_X_IDXEN10]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_16:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_15]], [[V_SUBREV_U32_e64_10]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_12:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 53, [[BUFFER_LOAD_FORMAT_X_IDXEN11]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_17:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_16]], [[V_SUBREV_U32_e64_11]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -72, [[BUFFER_LOAD_FORMAT_X_IDXEN12]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_18:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_17]], [[V_SUBREV_U32_e64_12]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -73, [[BUFFER_LOAD_FORMAT_X_IDXEN13]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_19:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_18]], [[V_ADD_U32_e64_4]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -74, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_20:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_19]], [[V_ADD_U32_e64_5]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -75, [[BUFFER_LOAD_FORMAT_X_IDXEN14]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_21:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_20]], [[V_ADD_U32_e64_6]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -77, [[BUFFER_LOAD_FORMAT_X_IDXEN15]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_22:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_21]], [[V_ADD_U32_e64_7]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -93, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_23:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_22]], [[V_ADD_U32_e64_8]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -94, [[BUFFER_LOAD_FORMAT_X_IDXEN16]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_24:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_23]], [[V_ADD_U32_e64_9]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -95, [[BUFFER_LOAD_DWORD_OFFSET1]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_25:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_24]], [[V_ADD_U32_e64_10]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -96, [[BUFFER_LOAD_DWORD_OFFSET2]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_26:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_25]], [[V_ADD_U32_e64_11]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -97, [[BUFFER_LOAD_DWORD_OFFSET3]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_27:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_26]], [[V_ADD_U32_e64_12]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_28:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_27]], [[V_ADD_U32_e64_13]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_29:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_2]], [[V_OR_B32_e64_28]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_30:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_3]], [[V_OR_B32_e64_29]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_31:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_4]], [[V_OR_B32_e64_30]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -194, [[BUFFER_LOAD_FORMAT_X_IDXEN17]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_32:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_5]], [[V_OR_B32_e64_31]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -195, [[BUFFER_LOAD_FORMAT_X_IDXEN18]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_33:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_32]], [[V_ADD_U32_e64_14]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -196, [[BUFFER_LOAD_FORMAT_X_IDXEN19]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_34:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_33]], [[V_ADD_U32_e64_15]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -197, [[BUFFER_LOAD_FORMAT_X_IDXEN20]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_35:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_34]], [[V_ADD_U32_e64_16]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -216, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_36:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_35]], [[V_ADD_U32_e64_17]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_37:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_36]], [[V_ADD_U32_e64_18]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_38:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_7]], [[V_OR_B32_e64_37]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_39:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_8]], [[V_OR_B32_e64_38]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_40:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_9]], [[V_OR_B32_e64_39]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_41:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_10]], [[V_OR_B32_e64_40]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_42:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_11]], [[V_OR_B32_e64_41]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_43:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_12]], [[V_OR_B32_e64_42]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_44:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_13]], [[V_OR_B32_e64_43]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -457, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_45:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_14]], [[V_OR_B32_e64_44]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -458, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_46:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_45]], [[V_ADD_U32_e64_19]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -459, [[BUFFER_LOAD_FORMAT_X_IDXEN21]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_47:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_46]], [[V_ADD_U32_e64_20]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -466, [[BUFFER_LOAD_FORMAT_X_IDXEN22]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_48:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_47]], [[V_ADD_U32_e64_21]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_49:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_48]], [[V_ADD_U32_e64_22]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_50:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_15]], [[V_OR_B32_e64_49]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_51:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_16]], [[V_OR_B32_e64_50]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_52:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_17]], [[V_OR_B32_e64_51]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_53:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_23]], [[V_OR_B32_e64_52]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_54:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_18]], [[V_OR_B32_e64_53]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_55:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_19]], [[V_OR_B32_e64_54]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_56:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_20]], [[V_OR_B32_e64_55]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_57:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_21]], [[V_OR_B32_e64_56]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_58:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_22]], [[V_OR_B32_e64_57]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -555, [[BUFFER_LOAD_FORMAT_X_IDXEN23]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -556, [[BUFFER_LOAD_FORMAT_X_IDXEN24]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_59:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_58]], [[V_ADD_U32_e64_23]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -557, [[BUFFER_LOAD_FORMAT_X_IDXEN25]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_60:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_59]], [[V_ADD_U32_e64_24]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -574, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_61:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_60]], [[V_ADD_U32_e64_25]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -575, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_62:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_61]], [[V_ADD_U32_e64_26]], implicit $exec
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM8:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -576, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_63:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_62]], [[V_ADD_U32_e64_27]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -577, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %542:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) undef`, addrspace 4)
|
|
; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec
|
|
; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc
|
|
; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec
|
|
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec
|
|
; CHECK-NEXT: undef [[V_CNDMASK_B32_e64_:%[0-9]+]].sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec
|
|
; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %556:vgpr_32, undef %558:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
.expVert:
|
|
%0 = extractelement <31 x i32> %userData, i64 2
|
|
%1 = extractelement <31 x i32> %userData, i64 3
|
|
%2 = extractelement <31 x i32> %userData, i64 4
|
|
%3 = extractelement <31 x i32> %userData, i64 7
|
|
%4 = extractelement <31 x i32> %userData, i64 8
|
|
%5 = extractelement <31 x i32> %userData, i64 9
|
|
%6 = extractelement <31 x i32> %userData, i64 17
|
|
%7 = extractelement <31 x i32> %userData, i64 18
|
|
%8 = extractelement <31 x i32> %userData, i64 19
|
|
%9 = extractelement <31 x i32> %userData, i64 20
|
|
%10 = extractelement <31 x i32> %userData, i64 21
|
|
%11 = extractelement <31 x i32> %userData, i64 22
|
|
%12 = extractelement <31 x i32> %userData, i64 24
|
|
%13 = extractelement <31 x i32> %userData, i64 26
|
|
%14 = extractelement <31 x i32> %userData, i64 30
|
|
%15 = insertelement <2 x i32> undef, i32 %13, i32 0
|
|
%16 = bitcast <2 x i32> %15 to i64
|
|
%17 = inttoptr i64 %16 to ptr addrspace(4)
|
|
%18 = insertelement <2 x i32> undef, i32 %12, i32 0
|
|
%19 = bitcast <2 x i32> %18 to i64
|
|
%20 = inttoptr i64 %19 to ptr addrspace(4)
|
|
%21 = insertelement <2 x i32> undef, i32 %11, i32 0
|
|
%22 = bitcast <2 x i32> %21 to i64
|
|
%23 = insertelement <2 x i32> undef, i32 %10, i32 0
|
|
%24 = bitcast <2 x i32> %23 to i64
|
|
%25 = insertelement <2 x i32> undef, i32 %9, i32 0
|
|
%26 = bitcast <2 x i32> %25 to i64
|
|
%27 = inttoptr i64 %26 to ptr addrspace(4)
|
|
%28 = insertelement <2 x i32> undef, i32 %8, i32 0
|
|
%29 = bitcast <2 x i32> %28 to i64
|
|
%30 = insertelement <2 x i32> undef, i32 %7, i32 0
|
|
%31 = bitcast <2 x i32> %30 to i64
|
|
%32 = inttoptr i64 %31 to ptr addrspace(4)
|
|
%33 = insertelement <2 x i32> undef, i32 %6, i32 0
|
|
%34 = bitcast <2 x i32> %33 to i64
|
|
%35 = inttoptr i64 %34 to ptr addrspace(4)
|
|
%36 = insertelement <2 x i32> undef, i32 %14, i32 0
|
|
%37 = bitcast <2 x i32> %36 to i64
|
|
%38 = inttoptr i64 %37 to ptr addrspace(4)
|
|
%39 = getelementptr i8, ptr addrspace(4) %38, i64 232
|
|
%rootDesc58.ii0.i = load i32, ptr addrspace(4) %39, align 8
|
|
%.i184.i = getelementptr i8, ptr addrspace(4) %38, i64 236
|
|
%rootDesc58.ii1.i = load i32, ptr addrspace(4) %.i184.i, align 4
|
|
%40 = and i32 %rootDesc58.ii1.i, 65535
|
|
%41 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %rootDesc58.ii0.i, i32 0
|
|
%42 = insertelement <4 x i32> %41, i32 %40, i32 1
|
|
%43 = and i32 undef, 65535
|
|
%44 = insertelement <4 x i32> undef, i32 %43, i32 1
|
|
%45 = load <4 x i32>, ptr addrspace(4) undef, align 16
|
|
%46 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %45, i32 0, i32 0, i32 0, i32 0)
|
|
%47 = add i32 %46, -1
|
|
%48 = shl i32 %0, 4
|
|
%49 = call i32 @llvm.amdgcn.readfirstlane(i32 %48)
|
|
%50 = sext i32 %49 to i64
|
|
%51 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%52 = add i32 %51, -2
|
|
%53 = or i32 %52, %47
|
|
%54 = shl i32 %1, 4
|
|
%55 = call i32 @llvm.amdgcn.readfirstlane(i32 %54)
|
|
%56 = sext i32 %55 to i64
|
|
%57 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%58 = add i32 %57, -3
|
|
%59 = or i32 %53, %58
|
|
%60 = shl i32 %2, 4
|
|
%61 = call i32 @llvm.amdgcn.readfirstlane(i32 %60)
|
|
%62 = sext i32 %61 to i64
|
|
%63 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%64 = add i32 %63, -4
|
|
%65 = or i32 %59, %64
|
|
%66 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%67 = add i32 %66, -27
|
|
%68 = or i32 %65, %67
|
|
%69 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> undef, i32 0, i32 0, i32 0)
|
|
%70 = add i32 %69, -28
|
|
%71 = or i32 %68, %70
|
|
%72 = call i32 @llvm.amdgcn.readfirstlane(i32 %0)
|
|
%73 = getelementptr i8, ptr addrspace(4) %35, i64 16
|
|
%74 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
|
|
%75 = add i32 %74, -29
|
|
%76 = or i32 %71, %75
|
|
%77 = call i32 @llvm.amdgcn.readfirstlane(i32 %1)
|
|
%78 = shl i32 %77, 4
|
|
%79 = sext i32 %78 to i64
|
|
%80 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
|
|
%81 = add i32 %80, -30
|
|
%82 = or i32 %76, %81
|
|
%83 = call i32 @llvm.amdgcn.readfirstlane(i32 %2)
|
|
%84 = shl i32 %83, 4
|
|
%85 = sext i32 %84 to i64
|
|
%86 = getelementptr i8, ptr addrspace(4) %73, i64 %85
|
|
%87 = load <4 x i32>, ptr addrspace(4) %86, align 16
|
|
%88 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %87, i32 0, i32 0)
|
|
%89 = add i32 %88, -31
|
|
%90 = or i32 %82, %89
|
|
%91 = getelementptr i8, ptr addrspace(4) %35, i64 64
|
|
%92 = getelementptr i8, ptr addrspace(4) %91, i64 %50
|
|
%93 = load <4 x i32>, ptr addrspace(4) %92, align 16
|
|
%94 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %93, i32 0, i32 0, i32 0, i32 0)
|
|
%95 = add i32 %94, -32
|
|
%96 = or i32 %90, %95
|
|
%97 = getelementptr i8, ptr addrspace(4) %91, i64 %56
|
|
%98 = load <4 x i32>, ptr addrspace(4) %97, align 16
|
|
%99 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %98, i32 0, i32 0, i32 0, i32 0)
|
|
%100 = add i32 %99, -33
|
|
%101 = or i32 %96, %100
|
|
%102 = getelementptr i8, ptr addrspace(4) %91, i64 %62
|
|
%103 = load <4 x i32>, ptr addrspace(4) %102, align 16
|
|
%104 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %103, i32 0, i32 0, i32 0, i32 0)
|
|
%105 = add i32 %104, -34
|
|
%106 = or i32 %101, %105
|
|
%107 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
|
|
%108 = sext i32 %107 to i64
|
|
%109 = getelementptr i8, ptr addrspace(4) %91, i64 %108
|
|
%110 = load <4 x i32>, ptr addrspace(4) %109, align 16
|
|
%111 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %110, i32 0, i32 0, i32 0, i32 0)
|
|
%112 = add i32 %111, -36
|
|
%113 = or i32 %106, %112
|
|
%114 = getelementptr i8, ptr addrspace(4) %32, i64 %50
|
|
%115 = load <4 x i32>, ptr addrspace(4) %114, align 16
|
|
%116 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %115, i32 0, i32 0, i32 0, i32 0)
|
|
%117 = add i32 %116, -37
|
|
%118 = or i32 %113, %117
|
|
%119 = getelementptr i8, ptr addrspace(4) %32, i64 %56
|
|
%120 = load <4 x i32>, ptr addrspace(4) %119, align 16
|
|
%121 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %120, i32 0, i32 0, i32 0, i32 0)
|
|
%122 = add i32 %121, -38
|
|
%123 = or i32 %118, %122
|
|
%124 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%125 = add i32 %124, -39
|
|
%126 = or i32 %123, %125
|
|
%127 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
|
|
%128 = sext i32 %127 to i64
|
|
%129 = getelementptr i8, ptr addrspace(4) %32, i64 %128
|
|
%130 = load <4 x i32>, ptr addrspace(4) %129, align 16
|
|
%131 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %130, i32 0, i32 0, i32 0, i32 0)
|
|
%132 = add i32 %131, -50
|
|
%133 = or i32 %126, %132
|
|
%134 = getelementptr i8, ptr addrspace(4) %32, i64 224
|
|
%135 = getelementptr i8, ptr addrspace(4) %134, i64 %50
|
|
%136 = load <4 x i32>, ptr addrspace(4) %135, align 16
|
|
%137 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %136, i32 0, i32 0, i32 0, i32 0)
|
|
%138 = add i32 %137, -51
|
|
%139 = or i32 %133, %138
|
|
%140 = getelementptr i8, ptr addrspace(4) %134, i64 %56
|
|
%141 = load <4 x i32>, ptr addrspace(4) %140, align 16
|
|
%142 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %141, i32 0, i32 0, i32 0, i32 0)
|
|
%143 = add i32 %142, -52
|
|
%144 = or i32 %139, %143
|
|
%145 = getelementptr i8, ptr addrspace(4) %134, i64 %62
|
|
%146 = load <4 x i32>, ptr addrspace(4) %145, align 16
|
|
%147 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %146, i32 0, i32 0, i32 0, i32 0)
|
|
%148 = add i32 %147, -53
|
|
%149 = or i32 %144, %148
|
|
%150 = sext i32 undef to i64
|
|
%151 = getelementptr i8, ptr addrspace(4) %134, i64 %150
|
|
%152 = load <4 x i32>, ptr addrspace(4) %151, align 16
|
|
%153 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %152, i32 0, i32 0, i32 0, i32 0)
|
|
%154 = add i32 %153, -72
|
|
%155 = or i32 %149, %154
|
|
%156 = getelementptr i8, ptr addrspace(4) %32, i64 576
|
|
%157 = getelementptr i8, ptr addrspace(4) %156, i64 %50
|
|
%158 = load <4 x i32>, ptr addrspace(4) %157, align 16
|
|
%159 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %158, i32 0, i32 0, i32 0, i32 0)
|
|
%160 = add i32 %159, -73
|
|
%161 = or i32 %155, %160
|
|
%162 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%163 = add i32 %162, -74
|
|
%164 = or i32 %161, %163
|
|
%165 = getelementptr i8, ptr addrspace(4) %156, i64 %62
|
|
%166 = load <4 x i32>, ptr addrspace(4) %165, align 16
|
|
%167 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %166, i32 0, i32 0, i32 0, i32 0)
|
|
%168 = add i32 %167, -75
|
|
%169 = or i32 %164, %168
|
|
%170 = getelementptr i8, ptr addrspace(4) %156, i64 %108
|
|
%171 = load <4 x i32>, ptr addrspace(4) %170, align 16
|
|
%172 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %171, i32 0, i32 0, i32 0, i32 0)
|
|
%173 = add i32 %172, -77
|
|
%174 = or i32 %169, %173
|
|
%175 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%176 = add i32 %175, -93
|
|
%177 = or i32 %174, %176
|
|
%178 = inttoptr i64 %29 to ptr addrspace(4)
|
|
%179 = getelementptr i8, ptr addrspace(4) %178, i64 %50
|
|
%180 = load <4 x i32>, ptr addrspace(4) %179, align 16
|
|
%181 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %180, i32 0, i32 0, i32 0, i32 0)
|
|
%182 = add i32 %181, -94
|
|
%183 = or i32 %177, %182
|
|
%184 = load <4 x i32>, ptr addrspace(4) undef, align 16
|
|
%185 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %184, i32 0, i32 0, i32 0)
|
|
%186 = add i32 %185, -95
|
|
%187 = or i32 %183, %186
|
|
%188 = getelementptr i8, ptr addrspace(4) %27, i64 %79
|
|
%189 = load <4 x i32>, ptr addrspace(4) %188, align 16
|
|
%190 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %189, i32 0, i32 0, i32 0)
|
|
%191 = add i32 %190, -96
|
|
%192 = or i32 %187, %191
|
|
%193 = getelementptr i8, ptr addrspace(4) %27, i64 %85
|
|
%194 = load <4 x i32>, ptr addrspace(4) %193, align 16
|
|
%195 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %194, i32 0, i32 0, i32 0)
|
|
%196 = add i32 %195, -97
|
|
%197 = or i32 %192, %196
|
|
%198 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %0, i32 0
|
|
%199 = ptrtoint ptr addrspace(6) %198 to i32
|
|
%200 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %199, i32 0)
|
|
%201 = add i32 %200, -98
|
|
%202 = or i32 %197, %201
|
|
%203 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 undef, i32 0)
|
|
%204 = add i32 %203, -114
|
|
%205 = or i32 %202, %204
|
|
%206 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %2, i32 0
|
|
%207 = ptrtoint ptr addrspace(6) %206 to i32
|
|
%208 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %207, i32 0)
|
|
%209 = add i32 %208, -130
|
|
%210 = or i32 %205, %209
|
|
%211 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 undef, i32 0
|
|
%212 = ptrtoint ptr addrspace(6) %211 to i32
|
|
%213 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %212, i32 0)
|
|
%214 = add i32 %213, -178
|
|
%215 = or i32 %210, %214
|
|
%216 = inttoptr i64 %24 to ptr addrspace(4)
|
|
%217 = getelementptr i8, ptr addrspace(4) %216, i64 %50
|
|
%218 = load <4 x i32>, ptr addrspace(4) %217, align 16
|
|
%219 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %218, i32 0, i32 0, i32 0, i32 0)
|
|
%220 = add i32 %219, -194
|
|
%221 = or i32 %215, %220
|
|
%222 = inttoptr i64 %22 to ptr addrspace(4)
|
|
%223 = getelementptr i8, ptr addrspace(4) %222, i64 %50
|
|
%224 = load <4 x i32>, ptr addrspace(4) %223, align 16
|
|
%225 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %224, i32 0, i32 0, i32 0, i32 0)
|
|
%226 = add i32 %225, -195
|
|
%227 = or i32 %221, %226
|
|
%228 = getelementptr i8, ptr addrspace(4) %222, i64 %56
|
|
%229 = load <4 x i32>, ptr addrspace(4) %228, align 16
|
|
%230 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %229, i32 0, i32 0, i32 0, i32 0)
|
|
%231 = add i32 %230, -196
|
|
%232 = or i32 %227, %231
|
|
%233 = getelementptr i8, ptr addrspace(4) %222, i64 %62
|
|
%234 = load <4 x i32>, ptr addrspace(4) %233, align 16
|
|
%235 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %234, i32 0, i32 0, i32 0, i32 0)
|
|
%236 = add i32 %235, -197
|
|
%237 = or i32 %232, %236
|
|
%238 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%239 = add i32 %238, -216
|
|
%240 = or i32 %237, %239
|
|
%241 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %0, i32 0
|
|
%242 = ptrtoint ptr addrspace(6) %241 to i32
|
|
%243 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %242, i32 0)
|
|
%244 = add i32 %243, -217
|
|
%245 = or i32 %240, %244
|
|
%246 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%247 = add i32 %246, -233
|
|
%248 = or i32 %245, %247
|
|
%249 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %2, i32 0
|
|
%250 = ptrtoint ptr addrspace(6) %249 to i32
|
|
%251 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %250, i32 0)
|
|
%252 = add i32 %251, -249
|
|
%253 = or i32 %248, %252
|
|
%254 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, ptr addrspace(6) null, i32 0, i32 1, i32 undef, i32 0
|
|
%255 = ptrtoint ptr addrspace(6) %254 to i32
|
|
%256 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %255, i32 0)
|
|
%257 = add i32 %256, -297
|
|
%258 = or i32 %253, %257
|
|
%259 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%260 = add i32 %259, -313
|
|
%261 = or i32 %258, %260
|
|
%262 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%263 = add i32 %262, -329
|
|
%264 = or i32 %261, %263
|
|
%265 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%266 = add i32 %265, -345
|
|
%267 = or i32 %264, %266
|
|
%268 = getelementptr <{ [4 x i32], [9 x %llpc.array.element.5] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %4, i32 0
|
|
%269 = ptrtoint ptr addrspace(6) %268 to i32
|
|
%270 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %269, i32 0)
|
|
%271 = add i32 %270, -441
|
|
%272 = or i32 %267, %271
|
|
%273 = getelementptr i8, ptr addrspace(4) %20, i64 160
|
|
%274 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%275 = add i32 %274, -457
|
|
%276 = or i32 %272, %275
|
|
%277 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%278 = add i32 %277, -458
|
|
%279 = or i32 %276, %278
|
|
%280 = getelementptr i8, ptr addrspace(4) %273, i64 %62
|
|
%281 = load <4 x i32>, ptr addrspace(4) %280, align 16
|
|
%282 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %281, i32 0, i32 0, i32 0, i32 0)
|
|
%283 = add i32 %282, -459
|
|
%284 = or i32 %279, %283
|
|
%285 = shl i32 %5, 4
|
|
%286 = call i32 @llvm.amdgcn.readfirstlane(i32 %285)
|
|
%287 = sext i32 %286 to i64
|
|
%288 = getelementptr i8, ptr addrspace(4) %273, i64 %287
|
|
%289 = load <4 x i32>, ptr addrspace(4) %288, align 16
|
|
%290 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %289, i32 0, i32 0, i32 0, i32 0)
|
|
%291 = add i32 %290, -466
|
|
%292 = or i32 %284, %291
|
|
%293 = getelementptr i8, ptr addrspace(4) %38, i64 168
|
|
%294 = shl i32 %72, 3
|
|
%295 = sext i32 %294 to i64
|
|
%296 = getelementptr i8, ptr addrspace(4) %293, i64 %295
|
|
%.ii0.i = load i32, ptr addrspace(4) %296, align 8
|
|
%297 = and i32 undef, 65535
|
|
%298 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii0.i, i32 0
|
|
%299 = insertelement <4 x i32> %298, i32 %297, i32 1
|
|
%300 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %299, i32 0, i32 0)
|
|
%301 = add i32 %300, -467
|
|
%302 = or i32 %292, %301
|
|
%303 = shl i32 %77, 3
|
|
%304 = sext i32 %303 to i64
|
|
%305 = getelementptr i8, ptr addrspace(4) %293, i64 %304
|
|
%.ii090.i = load i32, ptr addrspace(4) %305, align 8
|
|
%.i191.i = getelementptr i8, ptr addrspace(4) %305, i64 4
|
|
%.ii192.i = load i32, ptr addrspace(4) %.i191.i, align 4
|
|
%306 = and i32 %.ii192.i, 65535
|
|
%307 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii090.i, i32 0
|
|
%308 = insertelement <4 x i32> %307, i32 %306, i32 1
|
|
%309 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %308, i32 0, i32 0)
|
|
%310 = add i32 %309, -468
|
|
%311 = or i32 %302, %310
|
|
%312 = shl i32 %83, 3
|
|
%313 = sext i32 %312 to i64
|
|
%314 = getelementptr i8, ptr addrspace(4) %293, i64 %313
|
|
%.ii096.i = load i32, ptr addrspace(4) %314, align 8
|
|
%.i197.i = getelementptr i8, ptr addrspace(4) %314, i64 4
|
|
%.ii198.i = load i32, ptr addrspace(4) %.i197.i, align 4
|
|
%315 = and i32 %.ii198.i, 65535
|
|
%316 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii096.i, i32 0
|
|
%317 = insertelement <4 x i32> %316, i32 %315, i32 1
|
|
%318 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %317, i32 0, i32 0)
|
|
%319 = add i32 %318, -469
|
|
%320 = or i32 %311, %319
|
|
%321 = call i32 @llvm.amdgcn.readfirstlane(i32 %3)
|
|
%322 = shl i32 %321, 3
|
|
%323 = sext i32 %322 to i64
|
|
%324 = getelementptr i8, ptr addrspace(4) %293, i64 %323
|
|
%.ii0102.i = load i32, ptr addrspace(4) %324, align 8
|
|
%.ii1104.i = load i32, ptr addrspace(4) undef, align 4
|
|
%325 = and i32 %.ii1104.i, 65535
|
|
%326 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii0102.i, i32 0
|
|
%327 = insertelement <4 x i32> %326, i32 %325, i32 1
|
|
%328 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %327, i32 0, i32 0)
|
|
%329 = add i32 %328, -473
|
|
%330 = or i32 %320, %329
|
|
%331 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
|
|
%332 = add i32 %331, -474
|
|
%333 = or i32 %330, %332
|
|
%334 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%335 = add i32 %334, -475
|
|
%336 = or i32 %333, %335
|
|
%337 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%338 = add i32 %337, -491
|
|
%339 = or i32 %336, %338
|
|
%340 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%341 = add i32 %340, -507
|
|
%342 = or i32 %339, %341
|
|
%343 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%344 = add i32 %343, -539
|
|
%345 = or i32 %342, %344
|
|
%346 = getelementptr i8, ptr addrspace(4) %17, i64 96
|
|
%347 = getelementptr i8, ptr addrspace(4) %346, i64 %50
|
|
%348 = load <4 x i32>, ptr addrspace(4) %347, align 16
|
|
%349 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %348, i32 0, i32 0, i32 0, i32 0)
|
|
%350 = add i32 %349, -555
|
|
%351 = or i32 %345, %350
|
|
%352 = getelementptr i8, ptr addrspace(4) %346, i64 %56
|
|
%353 = load <4 x i32>, ptr addrspace(4) %352, align 16
|
|
%354 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %353, i32 0, i32 0, i32 0, i32 0)
|
|
%355 = add i32 %354, -556
|
|
%356 = or i32 %351, %355
|
|
%357 = getelementptr i8, ptr addrspace(4) %346, i64 %62
|
|
%358 = load <4 x i32>, ptr addrspace(4) %357, align 16
|
|
%359 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %358, i32 0, i32 0, i32 0, i32 0)
|
|
%360 = add i32 %359, -557
|
|
%361 = or i32 %356, %360
|
|
%362 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%363 = add i32 %362, -574
|
|
%364 = or i32 %361, %363
|
|
%365 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%366 = add i32 %365, -575
|
|
%367 = or i32 %364, %366
|
|
%368 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%369 = add i32 %368, -576
|
|
%370 = or i32 %367, %369
|
|
%371 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%372 = add i32 %371, -577
|
|
%373 = or i32 %370, %372
|
|
%374 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%375 = add i32 %374, -593
|
|
%376 = or i32 %373, %375
|
|
%377 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %42, i32 0, i32 0)
|
|
%378 = add i32 %377, -594
|
|
%379 = or i32 %376, %378
|
|
%.not.i = icmp eq i32 %379, 0
|
|
%380 = load <8 x i32>, ptr addrspace(4) undef, align 32
|
|
%.i010.i = select i1 %.not.i, float 0x36A0000000000000, float 0.000000e+00
|
|
%381 = insertelement <4 x float> undef, float %.i010.i, i32 3
|
|
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %381, i32 15, i32 undef, i32 undef, <8 x i32> %380, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.readfirstlane(i32)
|
|
declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
|
|
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)
|
|
declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32 immarg)
|
|
declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg)
|
|
declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
|
|
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
|