On GFX9+, SMEM instructions have an _SGPR_IMM form which is strictly more powerful than the _SGPR form. It simplifies codegen if we always select the _SGPR_IMM form with an immediate offset of 0 instead of the _SGPR form. Note that this patch just makes minimal changes to the selection patterns to prove the concept. Further simplifications are possible to reduced the number of selection patterns. On GFX9 the _SGPR form of the Real instruction is still required for assembly/disassembly but on GFX10+ it can be removed completely. Differential Revision: https://reviews.llvm.org/D147334
789 lines
66 KiB
LLVM
789 lines
66 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
; RUN: llc -march=amdgcn -mcpu=gfx1010 -stop-after=greedy < %s | FileCheck %s
|
|
|
|
; TODO: This was introduced in D88020 to catch a case that some unreachable
|
|
; assert was hit during liverange split. But after D104509, there is some IR
|
|
; change after register coalescer which make the case not work as before. We
|
|
; need to find some other way to reproduce the bad case fixed by D88020.
|
|
|
|
%llpc.array.element = type <{ i32, [12 x i8] }>
|
|
%llpc.array.element.2 = type <{ i32, [12 x i8] }>
|
|
%llpc.array.element.5 = type <{ i32, [12 x i8] }>
|
|
|
|
define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x i32> inreg %userData) {
|
|
; CHECK-LABEL: name: _amdgpu_gs_main
|
|
; CHECK: bb.0..expVert:
|
|
; CHECK-NEXT: liveins: $sgpr3, $sgpr4, $sgpr5, $sgpr8, $sgpr9, $sgpr10, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr25, $sgpr27, $sgpr31
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: undef %56.sub0:sgpr_64 = COPY $sgpr31
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr27
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr25
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr5
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr4
|
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr3
|
|
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr18
|
|
; CHECK-NEXT: undef %50.sub0:sgpr_64 = COPY $sgpr19
|
|
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr20
|
|
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr21
|
|
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr22
|
|
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr23
|
|
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr9
|
|
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr10
|
|
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr8
|
|
; CHECK-NEXT: undef %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %56, 232, 0 :: (invariant load (s64) from %ir.39, addrspace 4)
|
|
; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_1:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_1]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %130.sub0:sreg_64 = S_ADD_U32 [[COPY5]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %130.sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %130, 16, 0 :: (invariant load (s128) from %ir.81, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (invariant load (s128) from `ptr addrspace(4) undef`, addrspace 4)
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %132:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: KILL undef %74:sreg_64
|
|
; CHECK-NEXT: KILL undef %132:sgpr_128
|
|
; CHECK-NEXT: KILL %130.sub0, %130.sub1
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; CHECK-NEXT: undef %302.sub1:sgpr_128 = S_MOV_B32 0
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: KILL undef %89:sgpr_128
|
|
; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY5]], 64, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %54:sreg_32, 0, implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %149.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %149.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %156.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %149, 0, 0 :: (invariant load (s128) from %ir.87, addrspace 4)
|
|
; CHECK-NEXT: %156.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %163.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %163.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %171:sreg_32, 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %176.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], undef %171:sreg_32, implicit-def $scc
|
|
; CHECK-NEXT: %176.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %183.sub0:sreg_64 = S_ADD_U32 %50.sub0, [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %183.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %190.sub0:sreg_64 = S_ADD_U32 %50.sub0, [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: %190.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %200.sub0:sreg_64 = S_ADD_U32 %50.sub0, undef %171:sreg_32, implicit-def $scc
|
|
; CHECK-NEXT: %200.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 %50.sub0, 224, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADDC_U32_1:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %210.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %210.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %217.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: %217.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %224.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %224.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]]:sreg_32 = S_ADD_U32 %50.sub0, 576, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADDC_U32_2:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %241.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %241.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %253.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %253.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %261.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], undef %171:sreg_32, implicit-def $scc
|
|
; CHECK-NEXT: %261.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %273.sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %273.sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %286.sub0:sreg_64 = S_ADD_U32 [[COPY7]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: %286.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %293.sub0:sreg_64 = S_ADD_U32 [[COPY7]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %293.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %302, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %302, undef %314:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM %302, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %302, 16, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %369:sgpr_128, undef %370:sreg_32, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %380:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %156, 0, 0 :: (invariant load (s128) from %ir.92, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %163, 0, 0 :: (invariant load (s128) from %ir.97, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %176, 0, 0 :: (invariant load (s128) from %ir.104, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %183, 0, 0 :: (invariant load (s128) from %ir.109, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %190, 0, 0 :: (invariant load (s128) from %ir.114, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %364:sgpr_128, [[S_ADD_I32_]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %375:sgpr_128, [[S_ADD_I32_1]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM]], -98, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM1]], -114, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM2]], -130, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %327.sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %327.sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %335.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %335.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %343.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %200, 0, 0 :: (invariant load (s128) from %ir.121, addrspace 4)
|
|
; CHECK-NEXT: %343.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %351.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %351.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY10]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR_IMM undef %396:sgpr_128, [[S_ADD_I32_6]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %50, 224, 0 :: (invariant load (s128) from %ir.126, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %210, 0, 0 :: (invariant load (s128) from %ir.127, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %217, 0, 0 :: (invariant load (s128) from %ir.132, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %224, 0, 0 :: (invariant load (s128) from %ir.137, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM4]], -217, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -233, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM5]], -249, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_10:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM3]], -297, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_11:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -313, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -329, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -345, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM6]], -441, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], 160, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADDC_U32_3:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %36:sreg_32, 0, implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %411.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %411.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY11]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %425.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_4]], implicit-def $scc
|
|
; CHECK-NEXT: %425.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]]:sreg_32 = S_ADD_U32 %56.sub0, 168, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %57:sreg_32, 0, implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %241, 0, 0 :: (invariant load (s128) from %ir.147, addrspace 4)
|
|
; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %441.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_5]], implicit-def $scc
|
|
; CHECK-NEXT: %441.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %441, 0, 0 :: (invariant load (s32) from %ir.269, align 8, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %253, 0, 0 :: (invariant load (s128) from %ir.154, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %261, 0, 0 :: (invariant load (s128) from %ir.159, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: %71.sub3:sgpr_128 = S_MOV_B32 553734060
|
|
; CHECK-NEXT: %71.sub2:sgpr_128 = S_MOV_B32 -1
|
|
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY %71
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %273, 0, 0 :: (invariant load (s128) from %ir.167, addrspace 4)
|
|
; CHECK-NEXT: [[COPY13]].sub1:sgpr_128 = COPY %302.sub1
|
|
; CHECK-NEXT: [[COPY13]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY13]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %286, 0, 0 :: (invariant load (s128) from %ir.175, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %293, 0, 0 :: (invariant load (s128) from %ir.180, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %453.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_6]], implicit-def $scc
|
|
; CHECK-NEXT: %453.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %453, 0, 0 :: (invariant load (s64) from %ir.277, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0 :: (invariant load (s128) from %ir.202, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0 :: (invariant load (s128) from %ir.208, addrspace 4)
|
|
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY %71
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %343, 0, 0 :: (invariant load (s128) from %ir.213, addrspace 4)
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM]].sub1, 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY14]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]].sub0
|
|
; CHECK-NEXT: [[COPY14]].sub1:sgpr_128 = COPY [[S_AND_B32_]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY14]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %351, 0, 0 :: (invariant load (s128) from %ir.218, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %468.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_7]], implicit-def $scc
|
|
; CHECK-NEXT: %468.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %468, 0, 0 :: (invariant load (s64) from %ir.287, addrspace 4)
|
|
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY %71
|
|
; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY15]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0
|
|
; CHECK-NEXT: [[COPY15]].sub1:sgpr_128 = COPY [[S_AND_B32_1]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0 :: (invariant load (s128) from %ir.253, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0 :: (invariant load (s32) from `ptr addrspace(4) undef`, addrspace 4)
|
|
; CHECK-NEXT: KILL %411.sub0, %411.sub1
|
|
; CHECK-NEXT: KILL undef %488:sreg_64
|
|
; CHECK-NEXT: KILL [[COPY15]].sub0_sub1, [[COPY15]].sub2_sub3
|
|
; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0 :: (invariant load (s128) from %ir.261, addrspace 4)
|
|
; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %485.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_8]], implicit-def $scc
|
|
; CHECK-NEXT: %485.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %485, 0, 0 :: (invariant load (s32) from %ir.298, align 8, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]]
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]]
|
|
; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY %71
|
|
; CHECK-NEXT: [[COPY16]].sub1:sgpr_128 = COPY [[S_AND_B32_2]]
|
|
; CHECK-NEXT: [[COPY16]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM2]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY16]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_19:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -475, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_20:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -491, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -507, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR_IMM3]], -539, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], 96, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %514.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %514.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0 :: (invariant load (s128) from %ir.316, addrspace 4)
|
|
; CHECK-NEXT: undef %522.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: %522.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0 :: (invariant load (s128) from %ir.321, addrspace 4)
|
|
; CHECK-NEXT: undef %530.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %530.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0 :: (invariant load (s128) from %ir.326, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 7)
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]]
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]]
|
|
; CHECK-NEXT: KILL [[V_MOV_B32_e32_]]
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]]
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_ADD_U32_e64_]], [[V_ADD_U32_e64_1]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -4, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_]], [[V_ADD_U32_e64_2]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 27, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_2:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_1]], [[V_ADD_U32_e64_3]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 28, [[BUFFER_LOAD_DWORD_OFFSET]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_3:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_2]], [[V_SUBREV_U32_e64_]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_4:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_3]], [[V_SUBREV_U32_e64_1]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_5:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_SUB_I32_]], [[V_OR_B32_e64_4]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_6:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_SUB_I32_1]], [[V_OR_B32_e64_5]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_7:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_SUB_I32_2]], [[V_OR_B32_e64_6]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 32, [[BUFFER_LOAD_FORMAT_X_IDXEN2]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_3:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 33, [[BUFFER_LOAD_FORMAT_X_IDXEN3]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_8:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_7]], [[V_SUBREV_U32_e64_2]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_4:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 34, [[BUFFER_LOAD_FORMAT_X_IDXEN4]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_9:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_8]], [[V_SUBREV_U32_e64_3]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_5:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 36, [[BUFFER_LOAD_FORMAT_X_IDXEN5]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_10:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_9]], [[V_SUBREV_U32_e64_4]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_6:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 37, [[BUFFER_LOAD_FORMAT_X_IDXEN6]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_11:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_10]], [[V_SUBREV_U32_e64_5]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_7:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 38, [[BUFFER_LOAD_FORMAT_X_IDXEN7]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_12:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_11]], [[V_SUBREV_U32_e64_6]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_8:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 39, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_13:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_12]], [[V_SUBREV_U32_e64_7]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_9:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 50, [[BUFFER_LOAD_FORMAT_X_IDXEN8]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_14:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_13]], [[V_SUBREV_U32_e64_8]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_10:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 51, [[BUFFER_LOAD_FORMAT_X_IDXEN9]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_15:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_14]], [[V_SUBREV_U32_e64_9]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_11:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 52, [[BUFFER_LOAD_FORMAT_X_IDXEN10]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_16:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_15]], [[V_SUBREV_U32_e64_10]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e64_12:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e64 53, [[BUFFER_LOAD_FORMAT_X_IDXEN11]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_17:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_16]], [[V_SUBREV_U32_e64_11]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -72, [[BUFFER_LOAD_FORMAT_X_IDXEN12]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_18:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_17]], [[V_SUBREV_U32_e64_12]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -73, [[BUFFER_LOAD_FORMAT_X_IDXEN13]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_19:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_18]], [[V_ADD_U32_e64_4]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -74, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_20:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_19]], [[V_ADD_U32_e64_5]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -75, [[BUFFER_LOAD_FORMAT_X_IDXEN14]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_21:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_20]], [[V_ADD_U32_e64_6]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -77, [[BUFFER_LOAD_FORMAT_X_IDXEN15]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_22:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_21]], [[V_ADD_U32_e64_7]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -93, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_23:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_22]], [[V_ADD_U32_e64_8]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -94, [[BUFFER_LOAD_FORMAT_X_IDXEN16]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_24:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_23]], [[V_ADD_U32_e64_9]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -95, [[BUFFER_LOAD_DWORD_OFFSET1]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_25:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_24]], [[V_ADD_U32_e64_10]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -96, [[BUFFER_LOAD_DWORD_OFFSET2]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_26:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_25]], [[V_ADD_U32_e64_11]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -97, [[BUFFER_LOAD_DWORD_OFFSET3]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_27:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_26]], [[V_ADD_U32_e64_12]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_28:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_27]], [[V_ADD_U32_e64_13]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_29:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_2]], [[V_OR_B32_e64_28]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_30:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_3]], [[V_OR_B32_e64_29]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_31:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_4]], [[V_OR_B32_e64_30]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -194, [[BUFFER_LOAD_FORMAT_X_IDXEN17]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_32:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_5]], [[V_OR_B32_e64_31]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -195, [[BUFFER_LOAD_FORMAT_X_IDXEN18]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_33:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_32]], [[V_ADD_U32_e64_14]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -196, [[BUFFER_LOAD_FORMAT_X_IDXEN19]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_34:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_33]], [[V_ADD_U32_e64_15]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -197, [[BUFFER_LOAD_FORMAT_X_IDXEN20]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_35:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_34]], [[V_ADD_U32_e64_16]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -216, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_36:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_35]], [[V_ADD_U32_e64_17]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_37:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_36]], [[V_ADD_U32_e64_18]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_38:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_7]], [[V_OR_B32_e64_37]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_39:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_8]], [[V_OR_B32_e64_38]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_40:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_9]], [[V_OR_B32_e64_39]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_41:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_10]], [[V_OR_B32_e64_40]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_42:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_11]], [[V_OR_B32_e64_41]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_43:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_12]], [[V_OR_B32_e64_42]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_44:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_13]], [[V_OR_B32_e64_43]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -457, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_45:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_14]], [[V_OR_B32_e64_44]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -458, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_46:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_45]], [[V_ADD_U32_e64_19]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -459, [[BUFFER_LOAD_FORMAT_X_IDXEN21]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_47:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_46]], [[V_ADD_U32_e64_20]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -466, [[BUFFER_LOAD_FORMAT_X_IDXEN22]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_48:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_47]], [[V_ADD_U32_e64_21]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_49:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_48]], [[V_ADD_U32_e64_22]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_50:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_15]], [[V_OR_B32_e64_49]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_51:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_16]], [[V_OR_B32_e64_50]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_52:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_17]], [[V_OR_B32_e64_51]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_53:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_23]], [[V_OR_B32_e64_52]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_54:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_18]], [[V_OR_B32_e64_53]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_55:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_19]], [[V_OR_B32_e64_54]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_56:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_20]], [[V_OR_B32_e64_55]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_57:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_21]], [[V_OR_B32_e64_56]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_58:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_22]], [[V_OR_B32_e64_57]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -555, [[BUFFER_LOAD_FORMAT_X_IDXEN23]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -556, [[BUFFER_LOAD_FORMAT_X_IDXEN24]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_59:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_58]], [[V_ADD_U32_e64_23]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -557, [[BUFFER_LOAD_FORMAT_X_IDXEN25]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_60:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_59]], [[V_ADD_U32_e64_24]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -574, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_61:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_60]], [[V_ADD_U32_e64_25]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -575, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_62:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_61]], [[V_ADD_U32_e64_26]], implicit $exec
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM8:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -576, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_63:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_62]], [[V_ADD_U32_e64_27]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -577, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_63]], [[V_ADD_U32_e64_28]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], 0, implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e64_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_64]], [[V_ADD_U32_e64_29]], implicit $exec
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %564:sreg_64, 0, 0 :: (invariant load (s256) from `ptr addrspace(4) undef`, addrspace 4)
|
|
; CHECK-NEXT: [[V_OR_B32_e64_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[V_OR_B32_e64_65]], [[V_ADD_U32_e64_30]], implicit $exec
|
|
; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc
|
|
; CHECK-NEXT: [[V_OR_B32_e64_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e64 [[S_ADD_I32_24]], [[V_OR_B32_e64_66]], implicit $exec
|
|
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e64_67]], implicit $exec
|
|
; CHECK-NEXT: undef %624.sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec
|
|
; CHECK-NEXT: IMAGE_STORE_V4_V2_gfx10 %624, undef %578:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), addrspace 7)
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
.expVert:
|
|
%0 = extractelement <31 x i32> %userData, i64 2
|
|
%1 = extractelement <31 x i32> %userData, i64 3
|
|
%2 = extractelement <31 x i32> %userData, i64 4
|
|
%3 = extractelement <31 x i32> %userData, i64 7
|
|
%4 = extractelement <31 x i32> %userData, i64 8
|
|
%5 = extractelement <31 x i32> %userData, i64 9
|
|
%6 = extractelement <31 x i32> %userData, i64 17
|
|
%7 = extractelement <31 x i32> %userData, i64 18
|
|
%8 = extractelement <31 x i32> %userData, i64 19
|
|
%9 = extractelement <31 x i32> %userData, i64 20
|
|
%10 = extractelement <31 x i32> %userData, i64 21
|
|
%11 = extractelement <31 x i32> %userData, i64 22
|
|
%12 = extractelement <31 x i32> %userData, i64 24
|
|
%13 = extractelement <31 x i32> %userData, i64 26
|
|
%14 = extractelement <31 x i32> %userData, i64 30
|
|
%15 = insertelement <2 x i32> undef, i32 %13, i32 0
|
|
%16 = bitcast <2 x i32> %15 to i64
|
|
%17 = inttoptr i64 %16 to ptr addrspace(4)
|
|
%18 = insertelement <2 x i32> undef, i32 %12, i32 0
|
|
%19 = bitcast <2 x i32> %18 to i64
|
|
%20 = inttoptr i64 %19 to ptr addrspace(4)
|
|
%21 = insertelement <2 x i32> undef, i32 %11, i32 0
|
|
%22 = bitcast <2 x i32> %21 to i64
|
|
%23 = insertelement <2 x i32> undef, i32 %10, i32 0
|
|
%24 = bitcast <2 x i32> %23 to i64
|
|
%25 = insertelement <2 x i32> undef, i32 %9, i32 0
|
|
%26 = bitcast <2 x i32> %25 to i64
|
|
%27 = inttoptr i64 %26 to ptr addrspace(4)
|
|
%28 = insertelement <2 x i32> undef, i32 %8, i32 0
|
|
%29 = bitcast <2 x i32> %28 to i64
|
|
%30 = insertelement <2 x i32> undef, i32 %7, i32 0
|
|
%31 = bitcast <2 x i32> %30 to i64
|
|
%32 = inttoptr i64 %31 to ptr addrspace(4)
|
|
%33 = insertelement <2 x i32> undef, i32 %6, i32 0
|
|
%34 = bitcast <2 x i32> %33 to i64
|
|
%35 = inttoptr i64 %34 to ptr addrspace(4)
|
|
%36 = insertelement <2 x i32> undef, i32 %14, i32 0
|
|
%37 = bitcast <2 x i32> %36 to i64
|
|
%38 = inttoptr i64 %37 to ptr addrspace(4)
|
|
%39 = getelementptr i8, ptr addrspace(4) %38, i64 232
|
|
%rootDesc58.ii0.i = load i32, ptr addrspace(4) %39, align 8
|
|
%.i184.i = getelementptr i8, ptr addrspace(4) %38, i64 236
|
|
%rootDesc58.ii1.i = load i32, ptr addrspace(4) %.i184.i, align 4
|
|
%40 = and i32 %rootDesc58.ii1.i, 65535
|
|
%41 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %rootDesc58.ii0.i, i32 0
|
|
%42 = insertelement <4 x i32> %41, i32 %40, i32 1
|
|
%43 = and i32 undef, 65535
|
|
%44 = insertelement <4 x i32> undef, i32 %43, i32 1
|
|
%45 = load <4 x i32>, ptr addrspace(4) undef, align 16
|
|
%46 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %45, i32 0, i32 0, i32 0, i32 0)
|
|
%47 = add i32 %46, -1
|
|
%48 = shl i32 %0, 4
|
|
%49 = call i32 @llvm.amdgcn.readfirstlane(i32 %48)
|
|
%50 = sext i32 %49 to i64
|
|
%51 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%52 = add i32 %51, -2
|
|
%53 = or i32 %52, %47
|
|
%54 = shl i32 %1, 4
|
|
%55 = call i32 @llvm.amdgcn.readfirstlane(i32 %54)
|
|
%56 = sext i32 %55 to i64
|
|
%57 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%58 = add i32 %57, -3
|
|
%59 = or i32 %53, %58
|
|
%60 = shl i32 %2, 4
|
|
%61 = call i32 @llvm.amdgcn.readfirstlane(i32 %60)
|
|
%62 = sext i32 %61 to i64
|
|
%63 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%64 = add i32 %63, -4
|
|
%65 = or i32 %59, %64
|
|
%66 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%67 = add i32 %66, -27
|
|
%68 = or i32 %65, %67
|
|
%69 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> undef, i32 0, i32 0, i32 0)
|
|
%70 = add i32 %69, -28
|
|
%71 = or i32 %68, %70
|
|
%72 = call i32 @llvm.amdgcn.readfirstlane(i32 %0)
|
|
%73 = getelementptr i8, ptr addrspace(4) %35, i64 16
|
|
%74 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
|
|
%75 = add i32 %74, -29
|
|
%76 = or i32 %71, %75
|
|
%77 = call i32 @llvm.amdgcn.readfirstlane(i32 %1)
|
|
%78 = shl i32 %77, 4
|
|
%79 = sext i32 %78 to i64
|
|
%80 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
|
|
%81 = add i32 %80, -30
|
|
%82 = or i32 %76, %81
|
|
%83 = call i32 @llvm.amdgcn.readfirstlane(i32 %2)
|
|
%84 = shl i32 %83, 4
|
|
%85 = sext i32 %84 to i64
|
|
%86 = getelementptr i8, ptr addrspace(4) %73, i64 %85
|
|
%87 = load <4 x i32>, ptr addrspace(4) %86, align 16
|
|
%88 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %87, i32 0, i32 0)
|
|
%89 = add i32 %88, -31
|
|
%90 = or i32 %82, %89
|
|
%91 = getelementptr i8, ptr addrspace(4) %35, i64 64
|
|
%92 = getelementptr i8, ptr addrspace(4) %91, i64 %50
|
|
%93 = load <4 x i32>, ptr addrspace(4) %92, align 16
|
|
%94 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %93, i32 0, i32 0, i32 0, i32 0)
|
|
%95 = add i32 %94, -32
|
|
%96 = or i32 %90, %95
|
|
%97 = getelementptr i8, ptr addrspace(4) %91, i64 %56
|
|
%98 = load <4 x i32>, ptr addrspace(4) %97, align 16
|
|
%99 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %98, i32 0, i32 0, i32 0, i32 0)
|
|
%100 = add i32 %99, -33
|
|
%101 = or i32 %96, %100
|
|
%102 = getelementptr i8, ptr addrspace(4) %91, i64 %62
|
|
%103 = load <4 x i32>, ptr addrspace(4) %102, align 16
|
|
%104 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %103, i32 0, i32 0, i32 0, i32 0)
|
|
%105 = add i32 %104, -34
|
|
%106 = or i32 %101, %105
|
|
%107 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
|
|
%108 = sext i32 %107 to i64
|
|
%109 = getelementptr i8, ptr addrspace(4) %91, i64 %108
|
|
%110 = load <4 x i32>, ptr addrspace(4) %109, align 16
|
|
%111 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %110, i32 0, i32 0, i32 0, i32 0)
|
|
%112 = add i32 %111, -36
|
|
%113 = or i32 %106, %112
|
|
%114 = getelementptr i8, ptr addrspace(4) %32, i64 %50
|
|
%115 = load <4 x i32>, ptr addrspace(4) %114, align 16
|
|
%116 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %115, i32 0, i32 0, i32 0, i32 0)
|
|
%117 = add i32 %116, -37
|
|
%118 = or i32 %113, %117
|
|
%119 = getelementptr i8, ptr addrspace(4) %32, i64 %56
|
|
%120 = load <4 x i32>, ptr addrspace(4) %119, align 16
|
|
%121 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %120, i32 0, i32 0, i32 0, i32 0)
|
|
%122 = add i32 %121, -38
|
|
%123 = or i32 %118, %122
|
|
%124 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%125 = add i32 %124, -39
|
|
%126 = or i32 %123, %125
|
|
%127 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
|
|
%128 = sext i32 %127 to i64
|
|
%129 = getelementptr i8, ptr addrspace(4) %32, i64 %128
|
|
%130 = load <4 x i32>, ptr addrspace(4) %129, align 16
|
|
%131 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %130, i32 0, i32 0, i32 0, i32 0)
|
|
%132 = add i32 %131, -50
|
|
%133 = or i32 %126, %132
|
|
%134 = getelementptr i8, ptr addrspace(4) %32, i64 224
|
|
%135 = getelementptr i8, ptr addrspace(4) %134, i64 %50
|
|
%136 = load <4 x i32>, ptr addrspace(4) %135, align 16
|
|
%137 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %136, i32 0, i32 0, i32 0, i32 0)
|
|
%138 = add i32 %137, -51
|
|
%139 = or i32 %133, %138
|
|
%140 = getelementptr i8, ptr addrspace(4) %134, i64 %56
|
|
%141 = load <4 x i32>, ptr addrspace(4) %140, align 16
|
|
%142 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %141, i32 0, i32 0, i32 0, i32 0)
|
|
%143 = add i32 %142, -52
|
|
%144 = or i32 %139, %143
|
|
%145 = getelementptr i8, ptr addrspace(4) %134, i64 %62
|
|
%146 = load <4 x i32>, ptr addrspace(4) %145, align 16
|
|
%147 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %146, i32 0, i32 0, i32 0, i32 0)
|
|
%148 = add i32 %147, -53
|
|
%149 = or i32 %144, %148
|
|
%150 = sext i32 undef to i64
|
|
%151 = getelementptr i8, ptr addrspace(4) %134, i64 %150
|
|
%152 = load <4 x i32>, ptr addrspace(4) %151, align 16
|
|
%153 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %152, i32 0, i32 0, i32 0, i32 0)
|
|
%154 = add i32 %153, -72
|
|
%155 = or i32 %149, %154
|
|
%156 = getelementptr i8, ptr addrspace(4) %32, i64 576
|
|
%157 = getelementptr i8, ptr addrspace(4) %156, i64 %50
|
|
%158 = load <4 x i32>, ptr addrspace(4) %157, align 16
|
|
%159 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %158, i32 0, i32 0, i32 0, i32 0)
|
|
%160 = add i32 %159, -73
|
|
%161 = or i32 %155, %160
|
|
%162 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%163 = add i32 %162, -74
|
|
%164 = or i32 %161, %163
|
|
%165 = getelementptr i8, ptr addrspace(4) %156, i64 %62
|
|
%166 = load <4 x i32>, ptr addrspace(4) %165, align 16
|
|
%167 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %166, i32 0, i32 0, i32 0, i32 0)
|
|
%168 = add i32 %167, -75
|
|
%169 = or i32 %164, %168
|
|
%170 = getelementptr i8, ptr addrspace(4) %156, i64 %108
|
|
%171 = load <4 x i32>, ptr addrspace(4) %170, align 16
|
|
%172 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %171, i32 0, i32 0, i32 0, i32 0)
|
|
%173 = add i32 %172, -77
|
|
%174 = or i32 %169, %173
|
|
%175 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%176 = add i32 %175, -93
|
|
%177 = or i32 %174, %176
|
|
%178 = inttoptr i64 %29 to ptr addrspace(4)
|
|
%179 = getelementptr i8, ptr addrspace(4) %178, i64 %50
|
|
%180 = load <4 x i32>, ptr addrspace(4) %179, align 16
|
|
%181 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %180, i32 0, i32 0, i32 0, i32 0)
|
|
%182 = add i32 %181, -94
|
|
%183 = or i32 %177, %182
|
|
%184 = load <4 x i32>, ptr addrspace(4) undef, align 16
|
|
%185 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %184, i32 0, i32 0, i32 0)
|
|
%186 = add i32 %185, -95
|
|
%187 = or i32 %183, %186
|
|
%188 = getelementptr i8, ptr addrspace(4) %27, i64 %79
|
|
%189 = load <4 x i32>, ptr addrspace(4) %188, align 16
|
|
%190 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %189, i32 0, i32 0, i32 0)
|
|
%191 = add i32 %190, -96
|
|
%192 = or i32 %187, %191
|
|
%193 = getelementptr i8, ptr addrspace(4) %27, i64 %85
|
|
%194 = load <4 x i32>, ptr addrspace(4) %193, align 16
|
|
%195 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %194, i32 0, i32 0, i32 0)
|
|
%196 = add i32 %195, -97
|
|
%197 = or i32 %192, %196
|
|
%198 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %0, i32 0
|
|
%199 = ptrtoint ptr addrspace(6) %198 to i32
|
|
%200 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %199, i32 0)
|
|
%201 = add i32 %200, -98
|
|
%202 = or i32 %197, %201
|
|
%203 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 undef, i32 0)
|
|
%204 = add i32 %203, -114
|
|
%205 = or i32 %202, %204
|
|
%206 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %2, i32 0
|
|
%207 = ptrtoint ptr addrspace(6) %206 to i32
|
|
%208 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %207, i32 0)
|
|
%209 = add i32 %208, -130
|
|
%210 = or i32 %205, %209
|
|
%211 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, ptr addrspace(6) null, i32 0, i32 1, i32 undef, i32 0
|
|
%212 = ptrtoint ptr addrspace(6) %211 to i32
|
|
%213 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %44, i32 %212, i32 0)
|
|
%214 = add i32 %213, -178
|
|
%215 = or i32 %210, %214
|
|
%216 = inttoptr i64 %24 to ptr addrspace(4)
|
|
%217 = getelementptr i8, ptr addrspace(4) %216, i64 %50
|
|
%218 = load <4 x i32>, ptr addrspace(4) %217, align 16
|
|
%219 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %218, i32 0, i32 0, i32 0, i32 0)
|
|
%220 = add i32 %219, -194
|
|
%221 = or i32 %215, %220
|
|
%222 = inttoptr i64 %22 to ptr addrspace(4)
|
|
%223 = getelementptr i8, ptr addrspace(4) %222, i64 %50
|
|
%224 = load <4 x i32>, ptr addrspace(4) %223, align 16
|
|
%225 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %224, i32 0, i32 0, i32 0, i32 0)
|
|
%226 = add i32 %225, -195
|
|
%227 = or i32 %221, %226
|
|
%228 = getelementptr i8, ptr addrspace(4) %222, i64 %56
|
|
%229 = load <4 x i32>, ptr addrspace(4) %228, align 16
|
|
%230 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %229, i32 0, i32 0, i32 0, i32 0)
|
|
%231 = add i32 %230, -196
|
|
%232 = or i32 %227, %231
|
|
%233 = getelementptr i8, ptr addrspace(4) %222, i64 %62
|
|
%234 = load <4 x i32>, ptr addrspace(4) %233, align 16
|
|
%235 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %234, i32 0, i32 0, i32 0, i32 0)
|
|
%236 = add i32 %235, -197
|
|
%237 = or i32 %232, %236
|
|
%238 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%239 = add i32 %238, -216
|
|
%240 = or i32 %237, %239
|
|
%241 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %0, i32 0
|
|
%242 = ptrtoint ptr addrspace(6) %241 to i32
|
|
%243 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %242, i32 0)
|
|
%244 = add i32 %243, -217
|
|
%245 = or i32 %240, %244
|
|
%246 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%247 = add i32 %246, -233
|
|
%248 = or i32 %245, %247
|
|
%249 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %2, i32 0
|
|
%250 = ptrtoint ptr addrspace(6) %249 to i32
|
|
%251 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %250, i32 0)
|
|
%252 = add i32 %251, -249
|
|
%253 = or i32 %248, %252
|
|
%254 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, ptr addrspace(6) null, i32 0, i32 1, i32 undef, i32 0
|
|
%255 = ptrtoint ptr addrspace(6) %254 to i32
|
|
%256 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %255, i32 0)
|
|
%257 = add i32 %256, -297
|
|
%258 = or i32 %253, %257
|
|
%259 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%260 = add i32 %259, -313
|
|
%261 = or i32 %258, %260
|
|
%262 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%263 = add i32 %262, -329
|
|
%264 = or i32 %261, %263
|
|
%265 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%266 = add i32 %265, -345
|
|
%267 = or i32 %264, %266
|
|
%268 = getelementptr <{ [4 x i32], [9 x %llpc.array.element.5] }>, ptr addrspace(6) null, i32 0, i32 1, i32 %4, i32 0
|
|
%269 = ptrtoint ptr addrspace(6) %268 to i32
|
|
%270 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %269, i32 0)
|
|
%271 = add i32 %270, -441
|
|
%272 = or i32 %267, %271
|
|
%273 = getelementptr i8, ptr addrspace(4) %20, i64 160
|
|
%274 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%275 = add i32 %274, -457
|
|
%276 = or i32 %272, %275
|
|
%277 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%278 = add i32 %277, -458
|
|
%279 = or i32 %276, %278
|
|
%280 = getelementptr i8, ptr addrspace(4) %273, i64 %62
|
|
%281 = load <4 x i32>, ptr addrspace(4) %280, align 16
|
|
%282 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %281, i32 0, i32 0, i32 0, i32 0)
|
|
%283 = add i32 %282, -459
|
|
%284 = or i32 %279, %283
|
|
%285 = shl i32 %5, 4
|
|
%286 = call i32 @llvm.amdgcn.readfirstlane(i32 %285)
|
|
%287 = sext i32 %286 to i64
|
|
%288 = getelementptr i8, ptr addrspace(4) %273, i64 %287
|
|
%289 = load <4 x i32>, ptr addrspace(4) %288, align 16
|
|
%290 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %289, i32 0, i32 0, i32 0, i32 0)
|
|
%291 = add i32 %290, -466
|
|
%292 = or i32 %284, %291
|
|
%293 = getelementptr i8, ptr addrspace(4) %38, i64 168
|
|
%294 = shl i32 %72, 3
|
|
%295 = sext i32 %294 to i64
|
|
%296 = getelementptr i8, ptr addrspace(4) %293, i64 %295
|
|
%.ii0.i = load i32, ptr addrspace(4) %296, align 8
|
|
%297 = and i32 undef, 65535
|
|
%298 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii0.i, i32 0
|
|
%299 = insertelement <4 x i32> %298, i32 %297, i32 1
|
|
%300 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %299, i32 0, i32 0)
|
|
%301 = add i32 %300, -467
|
|
%302 = or i32 %292, %301
|
|
%303 = shl i32 %77, 3
|
|
%304 = sext i32 %303 to i64
|
|
%305 = getelementptr i8, ptr addrspace(4) %293, i64 %304
|
|
%.ii090.i = load i32, ptr addrspace(4) %305, align 8
|
|
%.i191.i = getelementptr i8, ptr addrspace(4) %305, i64 4
|
|
%.ii192.i = load i32, ptr addrspace(4) %.i191.i, align 4
|
|
%306 = and i32 %.ii192.i, 65535
|
|
%307 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii090.i, i32 0
|
|
%308 = insertelement <4 x i32> %307, i32 %306, i32 1
|
|
%309 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %308, i32 0, i32 0)
|
|
%310 = add i32 %309, -468
|
|
%311 = or i32 %302, %310
|
|
%312 = shl i32 %83, 3
|
|
%313 = sext i32 %312 to i64
|
|
%314 = getelementptr i8, ptr addrspace(4) %293, i64 %313
|
|
%.ii096.i = load i32, ptr addrspace(4) %314, align 8
|
|
%.i197.i = getelementptr i8, ptr addrspace(4) %314, i64 4
|
|
%.ii198.i = load i32, ptr addrspace(4) %.i197.i, align 4
|
|
%315 = and i32 %.ii198.i, 65535
|
|
%316 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii096.i, i32 0
|
|
%317 = insertelement <4 x i32> %316, i32 %315, i32 1
|
|
%318 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %317, i32 0, i32 0)
|
|
%319 = add i32 %318, -469
|
|
%320 = or i32 %311, %319
|
|
%321 = call i32 @llvm.amdgcn.readfirstlane(i32 %3)
|
|
%322 = shl i32 %321, 3
|
|
%323 = sext i32 %322 to i64
|
|
%324 = getelementptr i8, ptr addrspace(4) %293, i64 %323
|
|
%.ii0102.i = load i32, ptr addrspace(4) %324, align 8
|
|
%.ii1104.i = load i32, ptr addrspace(4) undef, align 4
|
|
%325 = and i32 %.ii1104.i, 65535
|
|
%326 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii0102.i, i32 0
|
|
%327 = insertelement <4 x i32> %326, i32 %325, i32 1
|
|
%328 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %327, i32 0, i32 0)
|
|
%329 = add i32 %328, -473
|
|
%330 = or i32 %320, %329
|
|
%331 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
|
|
%332 = add i32 %331, -474
|
|
%333 = or i32 %330, %332
|
|
%334 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%335 = add i32 %334, -475
|
|
%336 = or i32 %333, %335
|
|
%337 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%338 = add i32 %337, -491
|
|
%339 = or i32 %336, %338
|
|
%340 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%341 = add i32 %340, -507
|
|
%342 = or i32 %339, %341
|
|
%343 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%344 = add i32 %343, -539
|
|
%345 = or i32 %342, %344
|
|
%346 = getelementptr i8, ptr addrspace(4) %17, i64 96
|
|
%347 = getelementptr i8, ptr addrspace(4) %346, i64 %50
|
|
%348 = load <4 x i32>, ptr addrspace(4) %347, align 16
|
|
%349 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %348, i32 0, i32 0, i32 0, i32 0)
|
|
%350 = add i32 %349, -555
|
|
%351 = or i32 %345, %350
|
|
%352 = getelementptr i8, ptr addrspace(4) %346, i64 %56
|
|
%353 = load <4 x i32>, ptr addrspace(4) %352, align 16
|
|
%354 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %353, i32 0, i32 0, i32 0, i32 0)
|
|
%355 = add i32 %354, -556
|
|
%356 = or i32 %351, %355
|
|
%357 = getelementptr i8, ptr addrspace(4) %346, i64 %62
|
|
%358 = load <4 x i32>, ptr addrspace(4) %357, align 16
|
|
%359 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %358, i32 0, i32 0, i32 0, i32 0)
|
|
%360 = add i32 %359, -557
|
|
%361 = or i32 %356, %360
|
|
%362 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%363 = add i32 %362, -574
|
|
%364 = or i32 %361, %363
|
|
%365 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%366 = add i32 %365, -575
|
|
%367 = or i32 %364, %366
|
|
%368 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%369 = add i32 %368, -576
|
|
%370 = or i32 %367, %369
|
|
%371 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%372 = add i32 %371, -577
|
|
%373 = or i32 %370, %372
|
|
%374 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%375 = add i32 %374, -593
|
|
%376 = or i32 %373, %375
|
|
%377 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %42, i32 0, i32 0)
|
|
%378 = add i32 %377, -594
|
|
%379 = or i32 %376, %378
|
|
%.not.i = icmp eq i32 %379, 0
|
|
%380 = load <8 x i32>, ptr addrspace(4) undef, align 32
|
|
%.i010.i = select i1 %.not.i, float 0x36A0000000000000, float 0.000000e+00
|
|
%381 = insertelement <4 x float> undef, float %.i010.i, i32 3
|
|
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %381, i32 15, i32 undef, i32 undef, <8 x i32> %380, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.readfirstlane(i32)
|
|
declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
|
|
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)
|
|
declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32 immarg)
|
|
declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg)
|
|
declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
|
|
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
|