andorbitset.ll is interesting since it directly depends on the difference between poison and undef. Not sure it's useful to keep the version using poison, I assume none of this code makes it to codegen. si-spill-cf.ll was also a nasty case, which I doubt has been reproducing its original issue for a very long time. I had to reclaim an older version, replace some of the poison uses, and run simplify-cfg. There's a very slight change in the final CFG with this, but final the output is approximately the same as it used to be.
772 lines
66 KiB
LLVM
772 lines
66 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -stop-after=greedy < %s | FileCheck %s
|
|
|
|
; TODO: This was introduced in D88020 to catch a case that some unreachable
|
|
; assert was hit during liverange split. But after D104509, there is some IR
|
|
; change after register coalescer which make the case not work as before. We
|
|
; need to find some other way to reproduce the bad case fixed by D88020.
|
|
|
|
%llpc.array.element = type <{ i32, [12 x i8] }>
|
|
%llpc.array.element.2 = type <{ i32, [12 x i8] }>
|
|
%llpc.array.element.5 = type <{ i32, [12 x i8] }>
|
|
|
|
define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x i32> inreg %userData) {
|
|
; CHECK-LABEL: name: _amdgpu_gs_main
|
|
; CHECK: bb.0..expVert:
|
|
; CHECK-NEXT: liveins: $sgpr3, $sgpr4, $sgpr5, $sgpr8, $sgpr9, $sgpr10, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr25, $sgpr27, $sgpr31
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: undef [[COPY:%[0-9]+]].sub0:sgpr_64 = COPY $sgpr31
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr27
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr25
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr5
|
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr4
|
|
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr3
|
|
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr18
|
|
; CHECK-NEXT: undef [[COPY7:%[0-9]+]].sub0:sgpr_64 = COPY $sgpr19
|
|
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr20
|
|
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr21
|
|
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr22
|
|
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr23
|
|
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr9
|
|
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_32 = COPY $sgpr10
|
|
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_32 = COPY $sgpr8
|
|
; CHECK-NEXT: undef [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM [[COPY]], 232, 0 :: (invariant load (s64) from %ir.39, addrspace 4)
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %125:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: KILL undef %125:sgpr_128
|
|
; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_1:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_1]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub1:sgpr_128 = S_AND_B32 [[S_LOAD_DWORDX2_IMM]].sub1, 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_]], 16, 0 :: (invariant load (s128) from %ir.81, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (invariant load (s128) from `ptr addrspace(4) poison`, addrspace 4)
|
|
; CHECK-NEXT: KILL undef %74:sreg_64
|
|
; CHECK-NEXT: KILL [[S_ADD_U32_]].sub0, [[S_ADD_U32_]].sub1
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = S_MOV_B32 0
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: KILL undef %89:sgpr_128
|
|
; CHECK-NEXT: KILL undef %118:sgpr_128
|
|
; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_1:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_2:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_3:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_1]], 64, 0 :: (invariant load (s128) from %ir.87, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_2]], 64, 0 :: (invariant load (s128) from %ir.93, addrspace 4)
|
|
; CHECK-NEXT: KILL [[S_ADD_U32_1]].sub0, [[S_ADD_U32_1]].sub1
|
|
; CHECK-NEXT: KILL [[S_ADD_U32_2]].sub0, [[S_ADD_U32_2]].sub1
|
|
; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %169:sreg_32, 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_4:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY6]], undef %169:sreg_32, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_5:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_6:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_6:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_7:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, undef %169:sreg_32, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_7:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_8:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY7]].sub0, [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_8:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_9:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_9:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_10:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_10:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_11:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_11:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], undef %302:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM [[S_MOV_B32_]], [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_MOV_B32_]], 16, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %357:sgpr_128, undef %358:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %368:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_3]], 64, 0 :: (invariant load (s128) from %ir.99, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_4]], 64, 0 :: (invariant load (s128) from %ir.107, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 0, 0 :: (invariant load (s128) from %ir.117, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 0, 0 :: (invariant load (s128) from %ir.124, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %352:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %363:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM2]], -130, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 0, 0 :: (invariant load (s128) from %ir.112, addrspace 4)
|
|
; CHECK-NEXT: undef [[S_ADD_U32_12:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY10]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_12:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_13:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_13:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_14:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_14:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_15:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY11]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_15:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %384:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 224, 0 :: (invariant load (s128) from %ir.129, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY7]], 224, 0 :: (invariant load (s128) from %ir.145, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_5]], 576, 0 :: (invariant load (s128) from %ir.150, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_6]], 224, 0 :: (invariant load (s128) from %ir.134, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_7]], 576, 0 :: (invariant load (s128) from %ir.162, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM4]], -217, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -233, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM5]], -249, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_10:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM3]], -297, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_11:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -313, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 224, 0 :: (invariant load (s128) from %ir.140, addrspace 4)
|
|
; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -329, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -345, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM6]], -441, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_16:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_16:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY13]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_17:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY2]], [[S_LSHL_B32_4]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_17:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %36:sreg_32, [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY5]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_18:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_5]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_18:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_18]], 168, 0 :: (invariant load (s32) from %ir.273, align 8, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_8]], 576, 0 :: (invariant load (s128) from %ir.157, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub3:sgpr_128 = S_MOV_B32 553734060
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 -1
|
|
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_9]], 0, 0 :: (invariant load (s128) from %ir.170, addrspace 4)
|
|
; CHECK-NEXT: [[COPY15:%[0-9]+]].sub1:sgpr_128 = COPY [[S_MOV_B32_]].sub1
|
|
; CHECK-NEXT: [[COPY15:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_10]], 0, 0 :: (invariant load (s128) from %ir.178, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_11]], 0, 0 :: (invariant load (s128) from %ir.183, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_19:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_6]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_19:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_19]], 168, 0 :: (invariant load (s64) from %ir.282, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_12]], 0, 0 :: (invariant load (s128) from %ir.205, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_13]], 0, 0 :: (invariant load (s128) from %ir.211, addrspace 4)
|
|
; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_14]], 0, 0 :: (invariant load (s128) from %ir.216, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_15]], 0, 0 :: (invariant load (s128) from %ir.221, addrspace 4)
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY16:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0
|
|
; CHECK-NEXT: [[COPY16:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY16]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_20:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_7]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_20:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM [[S_ADD_U32_20]], 168, 0 :: (invariant load (s64) from %ir.293, addrspace 4)
|
|
; CHECK-NEXT: [[COPY17:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
|
|
; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM2]].sub1, 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY17:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM2]].sub0
|
|
; CHECK-NEXT: [[COPY17:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_1]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY17]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_16]], 160, 0 :: (invariant load (s128) from %ir.256, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %470:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) poison`, addrspace 4)
|
|
; CHECK-NEXT: KILL [[S_ADD_U32_16]].sub0, [[S_ADD_U32_16]].sub1
|
|
; CHECK-NEXT: KILL undef %470:sreg_64
|
|
; CHECK-NEXT: KILL [[COPY17]].sub0_sub1_sub2, [[COPY17]].sub3
|
|
; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY14]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_17]], 160, 0 :: (invariant load (s128) from %ir.265, addrspace 4)
|
|
; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_21:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY]].sub0, [[S_LSHL_B32_8]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_21:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %57:sreg_32, [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[S_ADD_U32_21]], 168, 0 :: (invariant load (s32) from %ir.305, align 8, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]]
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]]
|
|
; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY18:%[0-9]+]]:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]]
|
|
; CHECK-NEXT: [[COPY18:%[0-9]+]].sub1:sgpr_128 = COPY [[S_AND_B32_2]]
|
|
; CHECK-NEXT: [[COPY18:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM2]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY18]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_19:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -475, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_20:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -491, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -507, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -539, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc
|
|
; CHECK-NEXT: undef [[S_ADD_U32_22:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_22:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_22]], 96, 0 :: (invariant load (s128) from %ir.323, addrspace 4)
|
|
; CHECK-NEXT: undef [[S_ADD_U32_23:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_23:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_23]], 96, 0 :: (invariant load (s128) from %ir.329, addrspace 4)
|
|
; CHECK-NEXT: undef [[S_ADD_U32_24:%[0-9]+]].sub0:sreg_64 = S_ADD_U32 [[COPY1]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_24:%[0-9]+]].sub1:sreg_64 = S_ADDC_U32 undef %33:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[S_ADD_U32_24]], 96, 0 :: (invariant load (s128) from %ir.335, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 8)
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]]
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]]
|
|
; CHECK-NEXT: KILL [[V_MOV_B32_e32_]]
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]]
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_ADD_U32_e64_]], [[V_ADD_U32_e64_1]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -4, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[V_ADD_U32_e64_2]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 27, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_2:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_1]], [[V_ADD_U32_e64_3]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 28, [[BUFFER_LOAD_DWORD_OFFSET]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_3:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_2]], [[V_SUBREV_U32_e64_]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_4:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_3]], [[V_SUBREV_U32_e64_1]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_5:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_SUB_I32_]], [[V_OR_B32_e64_4]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_6:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_SUB_I32_1]], [[V_OR_B32_e64_5]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_7:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_SUB_I32_2]], [[V_OR_B32_e64_6]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 32, [[BUFFER_LOAD_FORMAT_X_IDXEN2]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_3:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 33, [[BUFFER_LOAD_FORMAT_X_IDXEN3]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_8:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_7]], [[V_SUBREV_U32_e64_2]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_4:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 34, [[BUFFER_LOAD_FORMAT_X_IDXEN4]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_9:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_8]], [[V_SUBREV_U32_e64_3]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_5:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 36, [[BUFFER_LOAD_FORMAT_X_IDXEN5]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_10:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_9]], [[V_SUBREV_U32_e64_4]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_6:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 37, [[BUFFER_LOAD_FORMAT_X_IDXEN6]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_11:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_10]], [[V_SUBREV_U32_e64_5]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_7:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 38, [[BUFFER_LOAD_FORMAT_X_IDXEN7]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_12:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_11]], [[V_SUBREV_U32_e64_6]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_8:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 39, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_13:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_12]], [[V_SUBREV_U32_e64_7]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_9:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 50, [[BUFFER_LOAD_FORMAT_X_IDXEN8]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_14:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_13]], [[V_SUBREV_U32_e64_8]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_10:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 51, [[BUFFER_LOAD_FORMAT_X_IDXEN9]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_15:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_14]], [[V_SUBREV_U32_e64_9]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_11:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 52, [[BUFFER_LOAD_FORMAT_X_IDXEN10]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_16:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_15]], [[V_SUBREV_U32_e64_10]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_12:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 53, [[BUFFER_LOAD_FORMAT_X_IDXEN11]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_17:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_16]], [[V_SUBREV_U32_e64_11]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -72, [[BUFFER_LOAD_FORMAT_X_IDXEN12]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_18:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_17]], [[V_SUBREV_U32_e64_12]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -73, [[BUFFER_LOAD_FORMAT_X_IDXEN13]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_19:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_18]], [[V_ADD_U32_e64_4]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -74, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_20:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_19]], [[V_ADD_U32_e64_5]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -75, [[BUFFER_LOAD_FORMAT_X_IDXEN14]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_21:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_20]], [[V_ADD_U32_e64_6]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -77, [[BUFFER_LOAD_FORMAT_X_IDXEN15]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_22:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_21]], [[V_ADD_U32_e64_7]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -93, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_23:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_22]], [[V_ADD_U32_e64_8]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -94, [[BUFFER_LOAD_FORMAT_X_IDXEN16]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_24:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_23]], [[V_ADD_U32_e64_9]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -95, [[BUFFER_LOAD_DWORD_OFFSET1]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_25:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_24]], [[V_ADD_U32_e64_10]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -96, [[BUFFER_LOAD_DWORD_OFFSET2]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_26:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_25]], [[V_ADD_U32_e64_11]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -97, [[BUFFER_LOAD_DWORD_OFFSET3]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_27:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_26]], [[V_ADD_U32_e64_12]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_28:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_27]], [[V_ADD_U32_e64_13]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_29:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_2]], [[V_OR_B32_e64_28]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_30:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_3]], [[V_OR_B32_e64_29]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_31:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_4]], [[V_OR_B32_e64_30]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -194, [[BUFFER_LOAD_FORMAT_X_IDXEN17]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_32:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_5]], [[V_OR_B32_e64_31]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -195, [[BUFFER_LOAD_FORMAT_X_IDXEN18]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_33:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_32]], [[V_ADD_U32_e64_14]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -196, [[BUFFER_LOAD_FORMAT_X_IDXEN19]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_34:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_33]], [[V_ADD_U32_e64_15]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -197, [[BUFFER_LOAD_FORMAT_X_IDXEN20]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_35:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_34]], [[V_ADD_U32_e64_16]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -216, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_36:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_35]], [[V_ADD_U32_e64_17]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_37:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_36]], [[V_ADD_U32_e64_18]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_38:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_7]], [[V_OR_B32_e64_37]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_39:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_8]], [[V_OR_B32_e64_38]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_40:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_9]], [[V_OR_B32_e64_39]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_41:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_10]], [[V_OR_B32_e64_40]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_42:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_11]], [[V_OR_B32_e64_41]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_43:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_12]], [[V_OR_B32_e64_42]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_44:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_13]], [[V_OR_B32_e64_43]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -457, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_45:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_14]], [[V_OR_B32_e64_44]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -458, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_46:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_45]], [[V_ADD_U32_e64_19]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -459, [[BUFFER_LOAD_FORMAT_X_IDXEN21]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_47:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_46]], [[V_ADD_U32_e64_20]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -466, [[BUFFER_LOAD_FORMAT_X_IDXEN22]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_48:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_47]], [[V_ADD_U32_e64_21]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_49:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_48]], [[V_ADD_U32_e64_22]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_50:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_15]], [[V_OR_B32_e64_49]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_51:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_16]], [[V_OR_B32_e64_50]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_52:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_17]], [[V_OR_B32_e64_51]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_53:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_23]], [[V_OR_B32_e64_52]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_54:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_18]], [[V_OR_B32_e64_53]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_55:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_19]], [[V_OR_B32_e64_54]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_56:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_20]], [[V_OR_B32_e64_55]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_57:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_21]], [[V_OR_B32_e64_56]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_58:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_22]], [[V_OR_B32_e64_57]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -555, [[BUFFER_LOAD_FORMAT_X_IDXEN23]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -556, [[BUFFER_LOAD_FORMAT_X_IDXEN24]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_59:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_58]], [[V_ADD_U32_e64_23]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -557, [[BUFFER_LOAD_FORMAT_X_IDXEN25]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_60:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_59]], [[V_ADD_U32_e64_24]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -574, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_61:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_60]], [[V_ADD_U32_e64_25]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -575, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_62:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_61]], [[V_ADD_U32_e64_26]], implicit $exec
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM8:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX2_IMM]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -576, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_63:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_62]], [[V_ADD_U32_e64_27]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -577, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %543:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) poison`, addrspace 4)
|
|
; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec
|
|
; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc
|
|
; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec
|
|
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec
|
|
; CHECK-NEXT: undef [[V_CNDMASK_B32_e64_:%[0-9]+]].sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec
|
|
; CHECK-NEXT: IMAGE_STORE_V4_V2_nsa_gfx10 [[V_CNDMASK_B32_e64_]], undef %557:vgpr_32, undef %559:vgpr_32, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 8)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
.expVert:
|
|
%0 = extractelement <31 x i32> %userData, i64 2
|
|
%1 = extractelement <31 x i32> %userData, i64 3
|
|
%2 = extractelement <31 x i32> %userData, i64 4
|
|
%3 = extractelement <31 x i32> %userData, i64 7
|
|
%4 = extractelement <31 x i32> %userData, i64 8
|
|
%5 = extractelement <31 x i32> %userData, i64 9
|
|
%6 = extractelement <31 x i32> %userData, i64 17
|
|
%7 = extractelement <31 x i32> %userData, i64 18
|
|
%8 = extractelement <31 x i32> %userData, i64 19
|
|
%9 = extractelement <31 x i32> %userData, i64 20
|
|
%10 = extractelement <31 x i32> %userData, i64 21
|
|
%11 = extractelement <31 x i32> %userData, i64 22
|
|
%12 = extractelement <31 x i32> %userData, i64 24
|
|
%13 = extractelement <31 x i32> %userData, i64 26
|
|
%14 = extractelement <31 x i32> %userData, i64 30
|
|
%15 = insertelement <2 x i32> poison, i32 %13, i32 0
|
|
%16 = bitcast <2 x i32> %15 to i64
|
|
%17 = inttoptr i64 %16 to ptr addrspace(4)
|
|
%18 = insertelement <2 x i32> poison, i32 %12, i32 0
|
|
%19 = bitcast <2 x i32> %18 to i64
|
|
%20 = inttoptr i64 %19 to ptr addrspace(4)
|
|
%21 = insertelement <2 x i32> poison, i32 %11, i32 0
|
|
%22 = bitcast <2 x i32> %21 to i64
|
|
%23 = insertelement <2 x i32> poison, i32 %10, i32 0
|
|
%24 = bitcast <2 x i32> %23 to i64
|
|
%25 = insertelement <2 x i32> poison, i32 %9, i32 0
|
|
%26 = bitcast <2 x i32> %25 to i64
|
|
%27 = inttoptr i64 %26 to ptr addrspace(4)
|
|
%28 = insertelement <2 x i32> poison, i32 %8, i32 0
|
|
%29 = bitcast <2 x i32> %28 to i64
|
|
%30 = insertelement <2 x i32> poison, i32 %7, i32 0
|
|
%31 = bitcast <2 x i32> %30 to i64
|
|
%32 = inttoptr i64 %31 to ptr addrspace(4)
|
|
%33 = insertelement <2 x i32> poison, i32 %6, i32 0
|
|
%34 = bitcast <2 x i32> %33 to i64
|
|
%35 = inttoptr i64 %34 to ptr addrspace(4)
|
|
%36 = insertelement <2 x i32> poison, i32 %14, i32 0
|
|
%37 = bitcast <2 x i32> %36 to i64
|
|
%38 = inttoptr i64 %37 to ptr addrspace(4)
|
|
%39 = getelementptr i8, ptr addrspace(4) %38, i64 232
|
|
%rootDesc58.ii0.i = load i32, ptr addrspace(4) %39, align 8
|
|
%.i184.i = getelementptr i8, ptr addrspace(4) %38, i64 236
|
|
%rootDesc58.ii1.i = load i32, ptr addrspace(4) %.i184.i, align 4
|
|
%40 = and i32 %rootDesc58.ii1.i, 65535
|
|
%41 = insertelement <4 x i32> <i32 poison, i32 poison, i32 -1, i32 553734060>, i32 %rootDesc58.ii0.i, i32 0
|
|
%42 = insertelement <4 x i32> %41, i32 %40, i32 1
|
|
%43 = and i32 0, 65535
|
|
%44 = insertelement <4 x i32> poison, i32 %43, i32 1
|
|
%45 = load <4 x i32>, ptr addrspace(4) poison, align 16
|
|
%46 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %45, i32 0, i32 0, i32 0, i32 0)
|
|
%47 = add i32 %46, -1
|
|
%48 = shl i32 %0, 4
|
|
%49 = call i32 @llvm.amdgcn.readfirstlane(i32 %48)
|
|
%50 = sext i32 %49 to i64
|
|
%51 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%52 = add i32 %51, -2
|
|
%53 = or i32 %52, %47
|
|
%54 = shl i32 %1, 4
|
|
%55 = call i32 @llvm.amdgcn.readfirstlane(i32 %54)
|
|
%56 = sext i32 %55 to i64
|
|
%57 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%58 = add i32 %57, -3
|
|
%59 = or i32 %53, %58
|
|
%60 = shl i32 %2, 4
|
|
%61 = call i32 @llvm.amdgcn.readfirstlane(i32 %60)
|
|
%62 = sext i32 %61 to i64
|
|
%63 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%64 = add i32 %63, -4
|
|
%65 = or i32 %59, %64
|
|
%66 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%67 = add i32 %66, -27
|
|
%68 = or i32 %65, %67
|
|
%69 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> poison, i32 0, i32 0, i32 0)
|
|
%70 = add i32 %69, -28
|
|
%71 = or i32 %68, %70
|
|
%72 = call i32 @llvm.amdgcn.readfirstlane(i32 %0)
|
|
%73 = getelementptr i8, ptr addrspace(4) %35, i64 16
|
|
%74 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 0, i32 0)
|
|
%75 = add i32 %74, -29
|
|
%76 = or i32 %71, %75
|
|
%77 = call i32 @llvm.amdgcn.readfirstlane(i32 %1)
|
|
%78 = shl i32 %77, 4
|
|
%79 = sext i32 %78 to i64
|
|
%80 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 0, i32 0)
|
|
%81 = add i32 %80, -30
|
|
%82 = or i32 %76, %81
|
|
%83 = call i32 @llvm.amdgcn.readfirstlane(i32 %2)
|
|
%84 = shl i32 %83, 4
|
|
%85 = sext i32 %84 to i64
|
|
%86 = getelementptr i8, ptr addrspace(4) %73, i64 %85
|
|
%87 = load <4 x i32>, ptr addrspace(4) %86, align 16
|
|
%88 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %87, i32 0, i32 0)
|
|
%89 = add i32 %88, -31
|
|
%90 = or i32 %82, %89
|
|
%91 = getelementptr i8, ptr addrspace(4) %35, i64 64
|
|
%92 = getelementptr i8, ptr addrspace(4) %91, i64 %50
|
|
%93 = load <4 x i32>, ptr addrspace(4) %92, align 16
|
|
%94 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %93, i32 0, i32 0, i32 0, i32 0)
|
|
%95 = add i32 %94, -32
|
|
%96 = or i32 %90, %95
|
|
%97 = getelementptr i8, ptr addrspace(4) %91, i64 %56
|
|
%98 = load <4 x i32>, ptr addrspace(4) %97, align 16
|
|
%99 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %98, i32 0, i32 0, i32 0, i32 0)
|
|
%100 = add i32 %99, -33
|
|
%101 = or i32 %96, %100
|
|
%102 = getelementptr i8, ptr addrspace(4) %91, i64 %62
|
|
%103 = load <4 x i32>, ptr addrspace(4) %102, align 16
|
|
%104 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %103, i32 0, i32 0, i32 0, i32 0)
|
|
%105 = add i32 %104, -34
|
|
%106 = or i32 %101, %105
|
|
%undef = freeze i32 poison
|
|
%107 = call i32 @llvm.amdgcn.readfirstlane(i32 %undef)
|
|
%108 = sext i32 %107 to i64
|
|
%109 = getelementptr i8, ptr addrspace(4) %91, i64 %108
|
|
%110 = load <4 x i32>, ptr addrspace(4) %109, align 16
|
|
%111 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %110, i32 0, i32 0, i32 0, i32 0)
|
|
%112 = add i32 %111, -36
|
|
%113 = or i32 %106, %112
|
|
%114 = getelementptr i8, ptr addrspace(4) %32, i64 %50
|
|
%115 = load <4 x i32>, ptr addrspace(4) %114, align 16
|
|
%116 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %115, i32 0, i32 0, i32 0, i32 0)
|
|
%117 = add i32 %116, -37
|
|
%118 = or i32 %113, %117
|
|
%119 = getelementptr i8, ptr addrspace(4) %32, i64 %56
|
|
%120 = load <4 x i32>, ptr addrspace(4) %119, align 16
|
|
%121 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %120, i32 0, i32 0, i32 0, i32 0)
|
|
%122 = add i32 %121, -38
|
|
%123 = or i32 %118, %122
|
|
%124 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%125 = add i32 %124, -39
|
|
%126 = or i32 %123, %125
|
|
%undef1 = freeze i32 poison
|
|
%127 = call i32 @llvm.amdgcn.readfirstlane(i32 %undef1)
|
|
%128 = sext i32 %127 to i64
|
|
%129 = getelementptr i8, ptr addrspace(4) %32, i64 %128
|
|
%130 = load <4 x i32>, ptr addrspace(4) %129, align 16
|
|
%131 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %130, i32 0, i32 0, i32 0, i32 0)
|
|
%132 = add i32 %131, -50
|
|
%133 = or i32 %126, %132
|
|
%134 = getelementptr i8, ptr addrspace(4) %32, i64 224
|
|
%135 = getelementptr i8, ptr addrspace(4) %134, i64 %50
|
|
%136 = load <4 x i32>, ptr addrspace(4) %135, align 16
|
|
%137 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %136, i32 0, i32 0, i32 0, i32 0)
|
|
%138 = add i32 %137, -51
|
|
%139 = or i32 %133, %138
|
|
%140 = getelementptr i8, ptr addrspace(4) %134, i64 %56
|
|
%141 = load <4 x i32>, ptr addrspace(4) %140, align 16
|
|
%142 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %141, i32 0, i32 0, i32 0, i32 0)
|
|
%143 = add i32 %142, -52
|
|
%144 = or i32 %139, %143
|
|
%145 = getelementptr i8, ptr addrspace(4) %134, i64 %62
|
|
%146 = load <4 x i32>, ptr addrspace(4) %145, align 16
|
|
%147 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %146, i32 0, i32 0, i32 0, i32 0)
|
|
%148 = add i32 %147, -53
|
|
%149 = or i32 %144, %148
|
|
%150 = sext i32 0 to i64
|
|
%151 = getelementptr i8, ptr addrspace(4) %134, i64 %150
|
|
%152 = load <4 x i32>, ptr addrspace(4) %151, align 16
|
|
%153 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %152, i32 0, i32 0, i32 0, i32 0)
|
|
%154 = add i32 %153, -72
|
|
%155 = or i32 %149, %154
|
|
%156 = getelementptr i8, ptr addrspace(4) %32, i64 576
|
|
%157 = getelementptr i8, ptr addrspace(4) %156, i64 %50
|
|
%158 = load <4 x i32>, ptr addrspace(4) %157, align 16
|
|
%159 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %158, i32 0, i32 0, i32 0, i32 0)
|
|
%160 = add i32 %159, -73
|
|
%161 = or i32 %155, %160
|
|
%162 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%163 = add i32 %162, -74
|
|
%164 = or i32 %161, %163
|
|
%165 = getelementptr i8, ptr addrspace(4) %156, i64 %62
|
|
%166 = load <4 x i32>, ptr addrspace(4) %165, align 16
|
|
%167 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %166, i32 0, i32 0, i32 0, i32 0)
|
|
%168 = add i32 %167, -75
|
|
%169 = or i32 %164, %168
|
|
%170 = getelementptr i8, ptr addrspace(4) %156, i64 %108
|
|
%171 = load <4 x i32>, ptr addrspace(4) %170, align 16
|
|
%172 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %171, i32 0, i32 0, i32 0, i32 0)
|
|
%173 = add i32 %172, -77
|
|
%174 = or i32 %169, %173
|
|
%175 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%176 = add i32 %175, -93
|
|
%177 = or i32 %174, %176
|
|
%178 = inttoptr i64 %29 to ptr addrspace(4)
|
|
%179 = getelementptr i8, ptr addrspace(4) %178, i64 %50
|
|
%180 = load <4 x i32>, ptr addrspace(4) %179, align 16
|
|
%181 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %180, i32 0, i32 0, i32 0, i32 0)
|
|
%182 = add i32 %181, -94
|
|
%183 = or i32 %177, %182
|
|
%184 = load <4 x i32>, ptr addrspace(4) poison, align 16
|
|
%185 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %184, i32 0, i32 0, i32 0)
|
|
%186 = add i32 %185, -95
|
|
%187 = or i32 %183, %186
|
|
%188 = getelementptr i8, ptr addrspace(4) %27, i64 %79
|
|
%189 = load <4 x i32>, ptr addrspace(4) %188, align 16
|
|
%190 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %189, i32 0, i32 0, i32 0)
|
|
%191 = add i32 %190, -96
|
|
%192 = or i32 %187, %191
|
|
%193 = getelementptr i8, ptr addrspace(4) %27, i64 %85
|
|
%194 = load <4 x i32>, ptr addrspace(4) %193, align 16
|
|
%195 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %194, i32 0, i32 0, i32 0)
|
|
%196 = add i32 %195, -97
|
|
%197 = or i32 %192, %196
|
|
%198 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %0, i32 0
|
|
%199 = ptrtoint ptr addrspace(6) %198 to i32
|
|
%200 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %199, i32 0)
|
|
%201 = add i32 %200, -98
|
|
%202 = or i32 %197, %201
|
|
%203 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 poison, i32 0)
|
|
%204 = add i32 %203, -114
|
|
%205 = or i32 %202, %204
|
|
%206 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %2, i32 0
|
|
%207 = ptrtoint ptr addrspace(6) %206 to i32
|
|
%208 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %207, i32 0)
|
|
%209 = add i32 %208, -130
|
|
%210 = or i32 %205, %209
|
|
%211 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 0, i32 0
|
|
%212 = ptrtoint ptr addrspace(6) %211 to i32
|
|
%213 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %212, i32 0)
|
|
%214 = add i32 %213, -178
|
|
%215 = or i32 %210, %214
|
|
%216 = inttoptr i64 %24 to ptr addrspace(4)
|
|
%217 = getelementptr i8, ptr addrspace(4) %216, i64 %50
|
|
%218 = load <4 x i32>, ptr addrspace(4) %217, align 16
|
|
%219 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %218, i32 0, i32 0, i32 0, i32 0)
|
|
%220 = add i32 %219, -194
|
|
%221 = or i32 %215, %220
|
|
%222 = inttoptr i64 %22 to ptr addrspace(4)
|
|
%223 = getelementptr i8, ptr addrspace(4) %222, i64 %50
|
|
%224 = load <4 x i32>, ptr addrspace(4) %223, align 16
|
|
%225 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %224, i32 0, i32 0, i32 0, i32 0)
|
|
%226 = add i32 %225, -195
|
|
%227 = or i32 %221, %226
|
|
%228 = getelementptr i8, ptr addrspace(4) %222, i64 %56
|
|
%229 = load <4 x i32>, ptr addrspace(4) %228, align 16
|
|
%230 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %229, i32 0, i32 0, i32 0, i32 0)
|
|
%231 = add i32 %230, -196
|
|
%232 = or i32 %227, %231
|
|
%233 = getelementptr i8, ptr addrspace(4) %222, i64 %62
|
|
%234 = load <4 x i32>, ptr addrspace(4) %233, align 16
|
|
%235 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %234, i32 0, i32 0, i32 0, i32 0)
|
|
%236 = add i32 %235, -197
|
|
%237 = or i32 %232, %236
|
|
%238 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%239 = add i32 %238, -216
|
|
%240 = or i32 %237, %239
|
|
%241 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %0, i32 0
|
|
%242 = ptrtoint ptr addrspace(6) %241 to i32
|
|
%243 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 %242, i32 0)
|
|
%244 = add i32 %243, -217
|
|
%245 = or i32 %240, %244
|
|
%246 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
|
|
%247 = add i32 %246, -233
|
|
%248 = or i32 %245, %247
|
|
%249 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %2, i32 0
|
|
%250 = ptrtoint ptr addrspace(6) %249 to i32
|
|
%251 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 %250, i32 0)
|
|
%252 = add i32 %251, -249
|
|
%253 = or i32 %248, %252
|
|
%254 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, ptr addrspace(6) null, i32 0, i32 1, i32 0, i32 0
|
|
%255 = ptrtoint ptr addrspace(6) %254 to i32
|
|
%256 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 %255, i32 0)
|
|
%257 = add i32 %256, -297
|
|
%258 = or i32 %253, %257
|
|
%259 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
|
|
%260 = add i32 %259, -313
|
|
%261 = or i32 %258, %260
|
|
%262 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
|
|
%263 = add i32 %262, -329
|
|
%264 = or i32 %261, %263
|
|
%265 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
|
|
%266 = add i32 %265, -345
|
|
%267 = or i32 %264, %266
|
|
%268 = getelementptr <{ [4 x i32], [9 x %llpc.array.element.5] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %4, i32 0
|
|
%269 = ptrtoint ptr addrspace(6) %268 to i32
|
|
%270 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 %269, i32 0)
|
|
%271 = add i32 %270, -441
|
|
%272 = or i32 %267, %271
|
|
%273 = getelementptr i8, ptr addrspace(4) %20, i64 160
|
|
%274 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%275 = add i32 %274, -457
|
|
%276 = or i32 %272, %275
|
|
%277 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%278 = add i32 %277, -458
|
|
%279 = or i32 %276, %278
|
|
%280 = getelementptr i8, ptr addrspace(4) %273, i64 %62
|
|
%281 = load <4 x i32>, ptr addrspace(4) %280, align 16
|
|
%282 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %281, i32 0, i32 0, i32 0, i32 0)
|
|
%283 = add i32 %282, -459
|
|
%284 = or i32 %279, %283
|
|
%285 = shl i32 %5, 4
|
|
%286 = call i32 @llvm.amdgcn.readfirstlane(i32 %285)
|
|
%287 = sext i32 %286 to i64
|
|
%288 = getelementptr i8, ptr addrspace(4) %273, i64 %287
|
|
%289 = load <4 x i32>, ptr addrspace(4) %288, align 16
|
|
%290 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %289, i32 0, i32 0, i32 0, i32 0)
|
|
%291 = add i32 %290, -466
|
|
%292 = or i32 %284, %291
|
|
%293 = getelementptr i8, ptr addrspace(4) %38, i64 168
|
|
%294 = shl i32 %72, 3
|
|
%295 = sext i32 %294 to i64
|
|
%296 = getelementptr i8, ptr addrspace(4) %293, i64 %295
|
|
%.ii0.i = load i32, ptr addrspace(4) %296, align 8
|
|
%297 = and i32 0, 65535
|
|
%298 = insertelement <4 x i32> <i32 poison, i32 poison, i32 -1, i32 553734060>, i32 %.ii0.i, i32 0
|
|
%299 = insertelement <4 x i32> %298, i32 %297, i32 1
|
|
%300 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %299, i32 0, i32 0)
|
|
%301 = add i32 %300, -467
|
|
%302 = or i32 %292, %301
|
|
%303 = shl i32 %77, 3
|
|
%304 = sext i32 %303 to i64
|
|
%305 = getelementptr i8, ptr addrspace(4) %293, i64 %304
|
|
%.ii090.i = load i32, ptr addrspace(4) %305, align 8
|
|
%.i191.i = getelementptr i8, ptr addrspace(4) %305, i64 4
|
|
%.ii192.i = load i32, ptr addrspace(4) %.i191.i, align 4
|
|
%306 = and i32 %.ii192.i, 65535
|
|
%307 = insertelement <4 x i32> <i32 poison, i32 poison, i32 -1, i32 553734060>, i32 %.ii090.i, i32 0
|
|
%308 = insertelement <4 x i32> %307, i32 %306, i32 1
|
|
%309 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %308, i32 0, i32 0)
|
|
%310 = add i32 %309, -468
|
|
%311 = or i32 %302, %310
|
|
%312 = shl i32 %83, 3
|
|
%313 = sext i32 %312 to i64
|
|
%314 = getelementptr i8, ptr addrspace(4) %293, i64 %313
|
|
%.ii096.i = load i32, ptr addrspace(4) %314, align 8
|
|
%.i197.i = getelementptr i8, ptr addrspace(4) %314, i64 4
|
|
%.ii198.i = load i32, ptr addrspace(4) %.i197.i, align 4
|
|
%315 = and i32 %.ii198.i, 65535
|
|
%316 = insertelement <4 x i32> <i32 poison, i32 poison, i32 -1, i32 553734060>, i32 %.ii096.i, i32 0
|
|
%317 = insertelement <4 x i32> %316, i32 %315, i32 1
|
|
%318 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %317, i32 0, i32 0)
|
|
%319 = add i32 %318, -469
|
|
%320 = or i32 %311, %319
|
|
%321 = call i32 @llvm.amdgcn.readfirstlane(i32 %3)
|
|
%322 = shl i32 %321, 3
|
|
%323 = sext i32 %322 to i64
|
|
%324 = getelementptr i8, ptr addrspace(4) %293, i64 %323
|
|
%.ii0102.i = load i32, ptr addrspace(4) %324, align 8
|
|
%.ii1104.i = load i32, ptr addrspace(4) poison, align 4
|
|
%325 = and i32 %.ii1104.i, 65535
|
|
%326 = insertelement <4 x i32> <i32 poison, i32 poison, i32 -1, i32 553734060>, i32 %.ii0102.i, i32 0
|
|
%327 = insertelement <4 x i32> %326, i32 %325, i32 1
|
|
%328 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %327, i32 0, i32 0)
|
|
%329 = add i32 %328, -473
|
|
%330 = or i32 %320, %329
|
|
%331 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 0, i32 0)
|
|
%332 = add i32 %331, -474
|
|
%333 = or i32 %330, %332
|
|
%334 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
|
|
%335 = add i32 %334, -475
|
|
%336 = or i32 %333, %335
|
|
%337 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
|
|
%338 = add i32 %337, -491
|
|
%339 = or i32 %336, %338
|
|
%340 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
|
|
%341 = add i32 %340, -507
|
|
%342 = or i32 %339, %341
|
|
%343 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> poison, i32 poison, i32 0)
|
|
%344 = add i32 %343, -539
|
|
%345 = or i32 %342, %344
|
|
%346 = getelementptr i8, ptr addrspace(4) %17, i64 96
|
|
%347 = getelementptr i8, ptr addrspace(4) %346, i64 %50
|
|
%348 = load <4 x i32>, ptr addrspace(4) %347, align 16
|
|
%349 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %348, i32 0, i32 0, i32 0, i32 0)
|
|
%350 = add i32 %349, -555
|
|
%351 = or i32 %345, %350
|
|
%352 = getelementptr i8, ptr addrspace(4) %346, i64 %56
|
|
%353 = load <4 x i32>, ptr addrspace(4) %352, align 16
|
|
%354 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %353, i32 0, i32 0, i32 0, i32 0)
|
|
%355 = add i32 %354, -556
|
|
%356 = or i32 %351, %355
|
|
%357 = getelementptr i8, ptr addrspace(4) %346, i64 %62
|
|
%358 = load <4 x i32>, ptr addrspace(4) %357, align 16
|
|
%359 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %358, i32 0, i32 0, i32 0, i32 0)
|
|
%360 = add i32 %359, -557
|
|
%361 = or i32 %356, %360
|
|
%362 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%363 = add i32 %362, -574
|
|
%364 = or i32 %361, %363
|
|
%365 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%366 = add i32 %365, -575
|
|
%367 = or i32 %364, %366
|
|
%368 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%369 = add i32 %368, -576
|
|
%370 = or i32 %367, %369
|
|
%371 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%372 = add i32 %371, -577
|
|
%373 = or i32 %370, %372
|
|
%374 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> poison, i32 0, i32 0, i32 0, i32 0)
|
|
%375 = add i32 %374, -593
|
|
%376 = or i32 %373, %375
|
|
%377 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %42, i32 0, i32 0)
|
|
%378 = add i32 %377, -594
|
|
%379 = or i32 %376, %378
|
|
%.not.i = icmp eq i32 %379, 0
|
|
%380 = load <8 x i32>, ptr addrspace(4) poison, align 32
|
|
%.i010.i = select i1 %.not.i, float 0x36A0000000000000, float 0.000000e+00
|
|
%381 = insertelement <4 x float> poison, float %.i010.i, i32 3
|
|
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %381, i32 15, i32 poison, i32 poison, <8 x i32> %380, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.readfirstlane(i32)
|
|
declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
|
|
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)
|
|
declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32 immarg)
|
|
declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg)
|
|
declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
|
|
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
|