In the 2e29b0138c we introduce a specific solving algorithm
that analyzes the VGPR to SGPR copies use chains and either lowers
the copy to v_readfirstlane_b32 or converts the whole chain to VALU forms.
Same time we still have the code that blindly converts to VALU REG_SEQUENCE and PHIs
in case they produce SGPR but have VGPRs input operands. In case the REG_SEQUENCE and PHIs
are in the VGPR to SGPR copy use chain, and this chain was considered long enough to convert
copy to v_readfistlane_b32, further lowering them to VALU leads to several kinds of issues.
At first, we have v_readfistlane_b32 which is completely useless because most parts of its use chain
were moved to VALU forms. Second, we may encounter subtle bugs related to the EXEC-dependent CF
because of the weird mixing of SALU and VALU instructions.
This change removes the code that moves REG_SEQUENCE and PHIs to VALU. Instead, we use the fact
that both REG_SEQUENCE and PHIs have copy semantics. That is, if they define SGPR but have VGPR inputs,
we insert VGPR to SGPR copies to make them pure SGPR. Then, the new copies are processed by the common
VGPR to SGPR lowering algorithm.
This is Part 2 in the series of commits aiming at the massive refactoring of the SIFixSGPRCopies pass.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D130367
824 lines
70 KiB
LLVM
824 lines
70 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
; RUN: llc -march=amdgcn -mcpu=gfx1010 -stop-after=greedy < %s | FileCheck %s
|
|
|
|
; TODO: This was introduced in D88020 to catch a case that some unreachable
|
|
; assert was hit during liverange split. But after D104509, there is some IR
|
|
; change after register coalescer which make the case not work as before. We
|
|
; need to find some other way to reproduce the bad case fixed by D88020.
|
|
|
|
%llpc.array.element = type <{ i32, [12 x i8] }>
|
|
%llpc.array.element.2 = type <{ i32, [12 x i8] }>
|
|
%llpc.array.element.5 = type <{ i32, [12 x i8] }>
|
|
|
|
define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x i32> inreg %userData) {
|
|
; CHECK-LABEL: name: _amdgpu_gs_main
|
|
; CHECK: bb.0..expVert:
|
|
; CHECK-NEXT: liveins: $sgpr3, $sgpr4, $sgpr5, $sgpr8, $sgpr9, $sgpr10, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr25, $sgpr27, $sgpr31
|
|
; CHECK-NEXT: {{ $}}
|
|
; CHECK-NEXT: undef %56.sub0:sgpr_64 = COPY $sgpr31
|
|
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr27
|
|
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr25
|
|
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr5
|
|
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr4
|
|
; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr3
|
|
; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr18
|
|
; CHECK-NEXT: undef %50.sub0:sgpr_64 = COPY $sgpr19
|
|
; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr20
|
|
; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_32 = COPY $sgpr21
|
|
; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_32 = COPY $sgpr22
|
|
; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_32 = COPY $sgpr23
|
|
; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_32 = COPY $sgpr9
|
|
; CHECK-NEXT: [[COPY11:%[0-9]+]]:sgpr_32 = COPY $sgpr10
|
|
; CHECK-NEXT: [[COPY12:%[0-9]+]]:sgpr_32 = COPY $sgpr8
|
|
; CHECK-NEXT: undef %71.sub0_sub1:sgpr_128 = S_LOAD_DWORDX2_IMM %56, 232, 0 :: (dereferenceable invariant load (s64) from %ir.40, addrspace 4)
|
|
; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_1:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_2:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_1:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_1]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_2:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_2]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: %71.sub1:sgpr_128 = S_AND_B32 %71.sub1, 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %130.sub0:sreg_64 = S_ADD_U32 [[COPY5]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %130.sub1:sreg_64 = S_ADDC_U32 undef %54:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %130, 16, 0 :: (dereferenceable invariant load (s128) from %ir.84, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM1:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM undef %74:sreg_64, 0, 0 :: (dereferenceable invariant load (s128) from `<4 x i32> addrspace(4)* undef`, addrspace 4)
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %132:sgpr_128, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: KILL undef %74:sreg_64
|
|
; CHECK-NEXT: KILL undef %132:sgpr_128
|
|
; CHECK-NEXT: KILL %130.sub0, %130.sub1
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[S_LOAD_DWORDX4_IMM]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
|
; CHECK-NEXT: undef %302.sub1:sgpr_128 = S_MOV_B32 0
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], undef %89:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: KILL undef %89:sgpr_128
|
|
; CHECK-NEXT: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 29, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_SUB_I32_1:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM]], 30, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_SUB_I32_2:%[0-9]+]]:sreg_32 = S_SUB_I32 [[S_BUFFER_LOAD_DWORD_IMM1]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY5]], 64, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %54:sreg_32, 0, implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %149.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %149.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %156.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM2:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %149, 0, 0 :: (dereferenceable invariant load (s128) from %ir.91, addrspace 4)
|
|
; CHECK-NEXT: %156.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %163.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %163.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ASHR_I32_3:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 undef %171:sreg_32, 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %176.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_]], undef %171:sreg_32, implicit-def $scc
|
|
; CHECK-NEXT: %176.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %183.sub0:sreg_64 = S_ADD_U32 %50.sub0, [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %183.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %190.sub0:sreg_64 = S_ADD_U32 %50.sub0, [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: %190.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %200.sub0:sreg_64 = S_ADD_U32 %50.sub0, undef %171:sreg_32, implicit-def $scc
|
|
; CHECK-NEXT: %200.sub1:sreg_64 = S_ADDC_U32 undef %51:sreg_32, [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 %50.sub0, 224, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADDC_U32_1:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %210.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %210.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %217.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: %217.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %224.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_1]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %224.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_1]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_2:%[0-9]+]]:sreg_32 = S_ADD_U32 %50.sub0, 576, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADDC_U32_2:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %51:sreg_32, 0, implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %241.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %241.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %253.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %253.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %261.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_2]], undef %171:sreg_32, implicit-def $scc
|
|
; CHECK-NEXT: %261.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_2]], [[S_ASHR_I32_3]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %273.sub0:sreg_64 = S_ADD_U32 [[COPY6]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %273.sub1:sreg_64 = S_ADDC_U32 undef %48:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %286.sub0:sreg_64 = S_ADD_U32 [[COPY7]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: %286.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %293.sub0:sreg_64 = S_ADD_U32 [[COPY7]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %293.sub1:sreg_64 = S_ADDC_U32 undef %45:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_1:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_2]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %302, [[S_ADD_I32_]], 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR1:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %302, undef %314:sreg_32, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR %302, [[S_ADD_I32_1]], 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %302, 16, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET undef %118:sgpr_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %369:sgpr_128, undef %370:sreg_32, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM3:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %380:sgpr_128, 16, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM3:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %156, 0, 0 :: (dereferenceable invariant load (s128) from %ir.97, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM4:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %163, 0, 0 :: (dereferenceable invariant load (s128) from %ir.103, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM5:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %176, 0, 0 :: (dereferenceable invariant load (s128) from %ir.111, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM6:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %183, 0, 0 :: (dereferenceable invariant load (s128) from %ir.117, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM7:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %190, 0, 0 :: (dereferenceable invariant load (s128) from %ir.123, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM2]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %364:sgpr_128, [[S_ADD_I32_]], 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %375:sgpr_128, [[S_ADD_I32_1]], 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_ADD_I32_2:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR]], -98, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_3:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR1]], -114, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_4:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR2]], -130, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM2]], -178, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %327.sub0:sreg_64 = S_ADD_U32 [[COPY8]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %327.sub1:sreg_64 = S_ADDC_U32 undef %42:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %335.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %335.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %343.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM8:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %200, 0, 0 :: (dereferenceable invariant load (s128) from %ir.131, addrspace 4)
|
|
; CHECK-NEXT: %343.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %351.sub0:sreg_64 = S_ADD_U32 [[COPY9]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %351.sub1:sreg_64 = S_ADDC_U32 undef %39:sreg_32, [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_3:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY10]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM3]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_LSHL_B32_3]], 16, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR undef %396:sgpr_128, [[S_ADD_I32_6]], 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN4:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM4]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM9:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %50, 224, 0 :: (dereferenceable invariant load (s128) from %ir.155, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM10:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %210, 0, 0 :: (dereferenceable invariant load (s128) from %ir.138, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN5:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM5]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM11:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %217, 0, 0 :: (dereferenceable invariant load (s128) from %ir.144, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM12:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %224, 0, 0 :: (dereferenceable invariant load (s128) from %ir.150, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN6:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN7:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR4]], -217, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -233, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_9:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR5]], -249, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_10:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM3]], -297, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_11:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -313, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_12:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -329, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_13:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -345, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_14:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR6]], -441, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_3:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY1]], 160, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADDC_U32_3:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %36:sreg_32, 0, implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %411.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %411.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LSHL_B32_4:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY11]], 4, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM10]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_ASHR_I32_4:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_4]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %425.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_3]], [[S_LSHL_B32_4]], implicit-def $scc
|
|
; CHECK-NEXT: %425.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_3]], [[S_ASHR_I32_4]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_4:%[0-9]+]]:sreg_32 = S_ADD_U32 %56.sub0, 168, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %57:sreg_32, 0, implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM13:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %241, 0, 0 :: (dereferenceable invariant load (s128) from %ir.162, addrspace 4)
|
|
; CHECK-NEXT: [[S_LSHL_B32_5:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY4]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM11]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_ASHR_I32_5:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_5]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %441.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_5]], implicit-def $scc
|
|
; CHECK-NEXT: %441.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_5]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %441, 0, 0 :: (dereferenceable invariant load (s32) from %ir..i085.i, align 8, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM14:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %253, 0, 0 :: (dereferenceable invariant load (s128) from %ir.170, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM12]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM15:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %261, 0, 0 :: (dereferenceable invariant load (s128) from %ir.176, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM13]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: %71.sub3:sgpr_128 = S_MOV_B32 553734060
|
|
; CHECK-NEXT: %71.sub2:sgpr_128 = S_MOV_B32 -1
|
|
; CHECK-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY %71
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM16:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %273, 0, 0 :: (dereferenceable invariant load (s128) from %ir.185, addrspace 4)
|
|
; CHECK-NEXT: [[COPY13]].sub1:sgpr_128 = COPY %302.sub1
|
|
; CHECK-NEXT: [[COPY13]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM4:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY13]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM14]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM15]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM17:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %286, 0, 0 :: (dereferenceable invariant load (s128) from %ir.194, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM18:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %293, 0, 0 :: (dereferenceable invariant load (s128) from %ir.200, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN16:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM16]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_LSHL_B32_6:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY3]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET1:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM1]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_ASHR_I32_6:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_6]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_15:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM4]], -467, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %453.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_6]], implicit-def $scc
|
|
; CHECK-NEXT: %453.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_6]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %453, 0, 0 :: (dereferenceable invariant load (s64) from %ir.308, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET2:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM17]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET3:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[S_LOAD_DWORDX4_IMM18]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM19:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %327, 0, 0 :: (dereferenceable invariant load (s128) from %ir.223, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM20:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %335, 0, 0 :: (dereferenceable invariant load (s128) from %ir.230, addrspace 4)
|
|
; CHECK-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY %71
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM21:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %343, 0, 0 :: (dereferenceable invariant load (s128) from %ir.236, addrspace 4)
|
|
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM]].sub1, 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY14]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM]].sub0
|
|
; CHECK-NEXT: [[COPY14]].sub1:sgpr_128 = COPY [[S_AND_B32_]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM5:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY14]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM22:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %351, 0, 0 :: (dereferenceable invariant load (s128) from %ir.242, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN17:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM19]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN18:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM20]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_LSHL_B32_7:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN19:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM21]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_ASHR_I32_7:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_7]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_16:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM5]], -468, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %468.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_7]], implicit-def $scc
|
|
; CHECK-NEXT: %468.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_7]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN20:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM22]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %468, 0, 0 :: (dereferenceable invariant load (s64) from %ir.320, addrspace 4)
|
|
; CHECK-NEXT: [[COPY15:%[0-9]+]]:sgpr_128 = COPY %71
|
|
; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORDX2_IMM1]].sub1, 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY15]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0
|
|
; CHECK-NEXT: [[COPY15]].sub1:sgpr_128 = COPY [[S_AND_B32_1]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM6:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY15]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM23:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %411, 0, 0 :: (dereferenceable invariant load (s128) from %ir.282, addrspace 4)
|
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM undef %488:sreg_64, 0, 0 :: (dereferenceable invariant load (s32) from `i32 addrspace(4)* undef`, addrspace 4)
|
|
; CHECK-NEXT: KILL %411.sub0, %411.sub1
|
|
; CHECK-NEXT: KILL undef %488:sreg_64
|
|
; CHECK-NEXT: KILL [[COPY15]].sub0_sub1, [[COPY15]].sub2_sub3
|
|
; CHECK-NEXT: [[S_LSHL_B32_8:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY12]], 3, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM24:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %425, 0, 0 :: (dereferenceable invariant load (s128) from %ir.291, addrspace 4)
|
|
; CHECK-NEXT: [[S_ASHR_I32_8:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[S_LSHL_B32_8]], 31, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_17:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM6]], -469, implicit-def dead $scc
|
|
; CHECK-NEXT: undef %485.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_4]], [[S_LSHL_B32_8]], implicit-def $scc
|
|
; CHECK-NEXT: %485.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_4]], [[S_ASHR_I32_8]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORD_IMM2:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %485, 0, 0 :: (dereferenceable invariant load (s32) from %ir..i0100.i, align 8, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN21:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM23]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN22:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM24]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM24]]
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM23]]
|
|
; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_LOAD_DWORD_IMM1]], 65535, implicit-def dead $scc
|
|
; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_128 = COPY %71
|
|
; CHECK-NEXT: [[COPY16]].sub1:sgpr_128 = COPY [[S_AND_B32_2]]
|
|
; CHECK-NEXT: [[COPY16]].sub0:sgpr_128 = COPY [[S_LOAD_DWORD_IMM2]]
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM7:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[COPY16]], 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[S_ADD_I32_18:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM]], -474, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_19:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -475, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_20:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -491, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_21:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -507, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_22:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_SGPR3]], -539, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_I32_23:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM7]], -473, implicit-def dead $scc
|
|
; CHECK-NEXT: [[S_ADD_U32_5:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], 96, implicit-def $scc
|
|
; CHECK-NEXT: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 undef %33:sreg_32, 0, implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: undef %514.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_]], implicit-def $scc
|
|
; CHECK-NEXT: %514.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM25:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %514, 0, 0 :: (dereferenceable invariant load (s128) from %ir.351, addrspace 4)
|
|
; CHECK-NEXT: undef %522.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_1]], implicit-def $scc
|
|
; CHECK-NEXT: %522.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_1]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM26:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %522, 0, 0 :: (dereferenceable invariant load (s128) from %ir.357, addrspace 4)
|
|
; CHECK-NEXT: undef %530.sub0:sreg_64 = S_ADD_U32 [[S_ADD_U32_5]], [[S_LSHL_B32_2]], implicit-def $scc
|
|
; CHECK-NEXT: %530.sub1:sreg_64 = S_ADDC_U32 [[S_ADDC_U32_5]], [[S_ASHR_I32_2]], implicit-def dead $scc, implicit $scc
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM27:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM %530, 0, 0 :: (dereferenceable invariant load (s128) from %ir.363, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN23:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM25]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN24:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM26]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN25:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM27]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from custom "BufferResource", align 1, addrspace 4)
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM27]]
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM25]]
|
|
; CHECK-NEXT: KILL [[V_MOV_B32_e32_]]
|
|
; CHECK-NEXT: KILL [[S_LOAD_DWORDX4_IMM26]]
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -2, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -1, [[BUFFER_LOAD_FORMAT_X_IDXEN1]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -3, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -4, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_]], [[V_ADD_U32_e32_2]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 27, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_2:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_1]], [[V_ADD_U32_e32_3]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 28, [[BUFFER_LOAD_DWORD_OFFSET]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_3:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_2]], [[V_SUBREV_U32_e32_]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_4:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_3]], [[V_SUBREV_U32_e32_1]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_5:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_SUB_I32_]], [[V_OR_B32_e32_4]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_6:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_SUB_I32_1]], [[V_OR_B32_e32_5]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_7:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_SUB_I32_2]], [[V_OR_B32_e32_6]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e32_2:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 32, [[BUFFER_LOAD_FORMAT_X_IDXEN2]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e32_3:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 33, [[BUFFER_LOAD_FORMAT_X_IDXEN3]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_8:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_7]], [[V_SUBREV_U32_e32_2]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e32_4:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 34, [[BUFFER_LOAD_FORMAT_X_IDXEN4]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_9:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_8]], [[V_SUBREV_U32_e32_3]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e32_5:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 36, [[BUFFER_LOAD_FORMAT_X_IDXEN5]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_10:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_9]], [[V_SUBREV_U32_e32_4]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e32_6:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 37, [[BUFFER_LOAD_FORMAT_X_IDXEN6]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_11:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_10]], [[V_SUBREV_U32_e32_5]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e32_7:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 38, [[BUFFER_LOAD_FORMAT_X_IDXEN7]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_12:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_11]], [[V_SUBREV_U32_e32_6]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e32_8:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 39, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_13:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_12]], [[V_SUBREV_U32_e32_7]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e32_9:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 50, [[BUFFER_LOAD_FORMAT_X_IDXEN8]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_14:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_13]], [[V_SUBREV_U32_e32_8]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e32_10:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 51, [[BUFFER_LOAD_FORMAT_X_IDXEN9]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_15:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_14]], [[V_SUBREV_U32_e32_9]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e32_11:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 52, [[BUFFER_LOAD_FORMAT_X_IDXEN10]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_16:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_15]], [[V_SUBREV_U32_e32_10]], implicit $exec
|
|
; CHECK-NEXT: [[V_SUBREV_U32_e32_12:%[0-9]+]]:vgpr_32 = V_SUBREV_U32_e32 53, [[BUFFER_LOAD_FORMAT_X_IDXEN11]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_17:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_16]], [[V_SUBREV_U32_e32_11]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -72, [[BUFFER_LOAD_FORMAT_X_IDXEN12]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_18:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_17]], [[V_SUBREV_U32_e32_12]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -73, [[BUFFER_LOAD_FORMAT_X_IDXEN13]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_19:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_18]], [[V_ADD_U32_e32_4]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -74, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_20:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_19]], [[V_ADD_U32_e32_5]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -75, [[BUFFER_LOAD_FORMAT_X_IDXEN14]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_21:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_20]], [[V_ADD_U32_e32_6]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -77, [[BUFFER_LOAD_FORMAT_X_IDXEN15]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_22:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_21]], [[V_ADD_U32_e32_7]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -93, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_23:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_22]], [[V_ADD_U32_e32_8]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -94, [[BUFFER_LOAD_FORMAT_X_IDXEN16]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_24:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_23]], [[V_ADD_U32_e32_9]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -95, [[BUFFER_LOAD_DWORD_OFFSET1]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_25:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_24]], [[V_ADD_U32_e32_10]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -96, [[BUFFER_LOAD_DWORD_OFFSET2]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_26:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_25]], [[V_ADD_U32_e32_11]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -97, [[BUFFER_LOAD_DWORD_OFFSET3]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_27:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_26]], [[V_ADD_U32_e32_12]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_28:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_27]], [[V_ADD_U32_e32_13]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_29:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_2]], [[V_OR_B32_e32_28]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_30:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_3]], [[V_OR_B32_e32_29]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_31:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_4]], [[V_OR_B32_e32_30]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -194, [[BUFFER_LOAD_FORMAT_X_IDXEN17]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_32:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_5]], [[V_OR_B32_e32_31]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -195, [[BUFFER_LOAD_FORMAT_X_IDXEN18]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_33:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_32]], [[V_ADD_U32_e32_14]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -196, [[BUFFER_LOAD_FORMAT_X_IDXEN19]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_34:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_33]], [[V_ADD_U32_e32_15]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -197, [[BUFFER_LOAD_FORMAT_X_IDXEN20]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_35:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_34]], [[V_ADD_U32_e32_16]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -216, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_36:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_35]], [[V_ADD_U32_e32_17]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_37:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_36]], [[V_ADD_U32_e32_18]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_38:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_7]], [[V_OR_B32_e32_37]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_39:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_8]], [[V_OR_B32_e32_38]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_40:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_9]], [[V_OR_B32_e32_39]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_41:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_10]], [[V_OR_B32_e32_40]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_42:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_11]], [[V_OR_B32_e32_41]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_43:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_12]], [[V_OR_B32_e32_42]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_44:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_13]], [[V_OR_B32_e32_43]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -457, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_45:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_14]], [[V_OR_B32_e32_44]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -458, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_46:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_45]], [[V_ADD_U32_e32_19]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -459, [[BUFFER_LOAD_FORMAT_X_IDXEN21]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_47:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_46]], [[V_ADD_U32_e32_20]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -466, [[BUFFER_LOAD_FORMAT_X_IDXEN22]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_48:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_47]], [[V_ADD_U32_e32_21]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_49:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_48]], [[V_ADD_U32_e32_22]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_50:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_15]], [[V_OR_B32_e32_49]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_51:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_16]], [[V_OR_B32_e32_50]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_52:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_17]], [[V_OR_B32_e32_51]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_53:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_23]], [[V_OR_B32_e32_52]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_54:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_18]], [[V_OR_B32_e32_53]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_55:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_19]], [[V_OR_B32_e32_54]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_56:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_20]], [[V_OR_B32_e32_55]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_57:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_21]], [[V_OR_B32_e32_56]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_58:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_22]], [[V_OR_B32_e32_57]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -555, [[BUFFER_LOAD_FORMAT_X_IDXEN23]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -556, [[BUFFER_LOAD_FORMAT_X_IDXEN24]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_59:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_58]], [[V_ADD_U32_e32_23]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -557, [[BUFFER_LOAD_FORMAT_X_IDXEN25]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_60:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_59]], [[V_ADD_U32_e32_24]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -574, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_61:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_60]], [[V_ADD_U32_e32_25]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -575, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_62:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_61]], [[V_ADD_U32_e32_26]], implicit $exec
|
|
; CHECK-NEXT: [[S_BUFFER_LOAD_DWORD_IMM8:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %71, 0, 0 :: (dereferenceable invariant load (s32))
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -576, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_63:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_62]], [[V_ADD_U32_e32_27]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -577, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_64:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_63]], [[V_ADD_U32_e32_28]], implicit $exec
|
|
; CHECK-NEXT: [[V_ADD_U32_e32_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -593, [[BUFFER_LOAD_FORMAT_X_IDXEN]], implicit $exec
|
|
; CHECK-NEXT: [[V_OR_B32_e32_65:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_64]], [[V_ADD_U32_e32_29]], implicit $exec
|
|
; CHECK-NEXT: [[S_LOAD_DWORDX8_IMM:%[0-9]+]]:sgpr_256 = S_LOAD_DWORDX8_IMM undef %564:sreg_64, 0, 0 :: (dereferenceable invariant load (s256) from `<8 x i32> addrspace(4)* undef`, addrspace 4)
|
|
; CHECK-NEXT: [[V_OR_B32_e32_66:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[V_OR_B32_e32_65]], [[V_ADD_U32_e32_30]], implicit $exec
|
|
; CHECK-NEXT: [[S_ADD_I32_24:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BUFFER_LOAD_DWORD_IMM8]], -594, implicit-def dead $scc
|
|
; CHECK-NEXT: [[V_OR_B32_e32_67:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[S_ADD_I32_24]], [[V_OR_B32_e32_66]], implicit $exec
|
|
; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 0, [[V_OR_B32_e32_67]], implicit $exec
|
|
; CHECK-NEXT: undef %693.sub3:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[V_CMP_EQ_U32_e64_]], implicit $exec
|
|
; CHECK-NEXT: IMAGE_STORE_V4_V2_gfx10 %693, undef %578:vreg_64, [[S_LOAD_DWORDX8_IMM]], 15, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "ImageResource")
|
|
; CHECK-NEXT: S_ENDPGM 0
|
|
.expVert:
|
|
%0 = extractelement <31 x i32> %userData, i64 2
|
|
%1 = extractelement <31 x i32> %userData, i64 3
|
|
%2 = extractelement <31 x i32> %userData, i64 4
|
|
%3 = extractelement <31 x i32> %userData, i64 7
|
|
%4 = extractelement <31 x i32> %userData, i64 8
|
|
%5 = extractelement <31 x i32> %userData, i64 9
|
|
%6 = extractelement <31 x i32> %userData, i64 17
|
|
%7 = extractelement <31 x i32> %userData, i64 18
|
|
%8 = extractelement <31 x i32> %userData, i64 19
|
|
%9 = extractelement <31 x i32> %userData, i64 20
|
|
%10 = extractelement <31 x i32> %userData, i64 21
|
|
%11 = extractelement <31 x i32> %userData, i64 22
|
|
%12 = extractelement <31 x i32> %userData, i64 24
|
|
%13 = extractelement <31 x i32> %userData, i64 26
|
|
%14 = extractelement <31 x i32> %userData, i64 30
|
|
%15 = insertelement <2 x i32> undef, i32 %13, i32 0
|
|
%16 = bitcast <2 x i32> %15 to i64
|
|
%17 = inttoptr i64 %16 to i8 addrspace(4)*
|
|
%18 = insertelement <2 x i32> undef, i32 %12, i32 0
|
|
%19 = bitcast <2 x i32> %18 to i64
|
|
%20 = inttoptr i64 %19 to i8 addrspace(4)*
|
|
%21 = insertelement <2 x i32> undef, i32 %11, i32 0
|
|
%22 = bitcast <2 x i32> %21 to i64
|
|
%23 = insertelement <2 x i32> undef, i32 %10, i32 0
|
|
%24 = bitcast <2 x i32> %23 to i64
|
|
%25 = insertelement <2 x i32> undef, i32 %9, i32 0
|
|
%26 = bitcast <2 x i32> %25 to i64
|
|
%27 = inttoptr i64 %26 to i8 addrspace(4)*
|
|
%28 = insertelement <2 x i32> undef, i32 %8, i32 0
|
|
%29 = bitcast <2 x i32> %28 to i64
|
|
%30 = insertelement <2 x i32> undef, i32 %7, i32 0
|
|
%31 = bitcast <2 x i32> %30 to i64
|
|
%32 = inttoptr i64 %31 to i8 addrspace(4)*
|
|
%33 = insertelement <2 x i32> undef, i32 %6, i32 0
|
|
%34 = bitcast <2 x i32> %33 to i64
|
|
%35 = inttoptr i64 %34 to i8 addrspace(4)*
|
|
%36 = insertelement <2 x i32> undef, i32 %14, i32 0
|
|
%37 = bitcast <2 x i32> %36 to i64
|
|
%38 = inttoptr i64 %37 to i8 addrspace(4)*
|
|
%39 = getelementptr i8, i8 addrspace(4)* %38, i64 232
|
|
%.i0.i = bitcast i8 addrspace(4)* %39 to i32 addrspace(4)*
|
|
%rootDesc58.ii0.i = load i32, i32 addrspace(4)* %.i0.i, align 8
|
|
%.i184.i = getelementptr i8, i8 addrspace(4)* %38, i64 236
|
|
%40 = bitcast i8 addrspace(4)* %.i184.i to i32 addrspace(4)*
|
|
%rootDesc58.ii1.i = load i32, i32 addrspace(4)* %40, align 4
|
|
%41 = and i32 %rootDesc58.ii1.i, 65535
|
|
%42 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %rootDesc58.ii0.i, i32 0
|
|
%43 = insertelement <4 x i32> %42, i32 %41, i32 1
|
|
%44 = and i32 undef, 65535
|
|
%45 = insertelement <4 x i32> undef, i32 %44, i32 1
|
|
%46 = load <4 x i32>, <4 x i32> addrspace(4)* undef, align 16
|
|
%47 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %46, i32 0, i32 0, i32 0, i32 0)
|
|
%48 = add i32 %47, -1
|
|
%49 = shl i32 %0, 4
|
|
%50 = call i32 @llvm.amdgcn.readfirstlane(i32 %49)
|
|
%51 = sext i32 %50 to i64
|
|
%52 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%53 = add i32 %52, -2
|
|
%54 = or i32 %53, %48
|
|
%55 = shl i32 %1, 4
|
|
%56 = call i32 @llvm.amdgcn.readfirstlane(i32 %55)
|
|
%57 = sext i32 %56 to i64
|
|
%58 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%59 = add i32 %58, -3
|
|
%60 = or i32 %54, %59
|
|
%61 = shl i32 %2, 4
|
|
%62 = call i32 @llvm.amdgcn.readfirstlane(i32 %61)
|
|
%63 = sext i32 %62 to i64
|
|
%64 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%65 = add i32 %64, -4
|
|
%66 = or i32 %60, %65
|
|
%67 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%68 = add i32 %67, -27
|
|
%69 = or i32 %66, %68
|
|
%70 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> undef, i32 0, i32 0, i32 0)
|
|
%71 = add i32 %70, -28
|
|
%72 = or i32 %69, %71
|
|
%73 = call i32 @llvm.amdgcn.readfirstlane(i32 %0)
|
|
%74 = getelementptr i8, i8 addrspace(4)* %35, i64 16
|
|
%75 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
|
|
%76 = add i32 %75, -29
|
|
%77 = or i32 %72, %76
|
|
%78 = call i32 @llvm.amdgcn.readfirstlane(i32 %1)
|
|
%79 = shl i32 %78, 4
|
|
%80 = sext i32 %79 to i64
|
|
%81 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
|
|
%82 = add i32 %81, -30
|
|
%83 = or i32 %77, %82
|
|
%84 = call i32 @llvm.amdgcn.readfirstlane(i32 %2)
|
|
%85 = shl i32 %84, 4
|
|
%86 = sext i32 %85 to i64
|
|
%87 = getelementptr i8, i8 addrspace(4)* %74, i64 %86
|
|
%88 = bitcast i8 addrspace(4)* %87 to <4 x i32> addrspace(4)*
|
|
%89 = load <4 x i32>, <4 x i32> addrspace(4)* %88, align 16
|
|
%90 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %89, i32 0, i32 0)
|
|
%91 = add i32 %90, -31
|
|
%92 = or i32 %83, %91
|
|
%93 = getelementptr i8, i8 addrspace(4)* %35, i64 64
|
|
%94 = getelementptr i8, i8 addrspace(4)* %93, i64 %51
|
|
%95 = bitcast i8 addrspace(4)* %94 to <4 x i32> addrspace(4)*
|
|
%96 = load <4 x i32>, <4 x i32> addrspace(4)* %95, align 16
|
|
%97 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %96, i32 0, i32 0, i32 0, i32 0)
|
|
%98 = add i32 %97, -32
|
|
%99 = or i32 %92, %98
|
|
%100 = getelementptr i8, i8 addrspace(4)* %93, i64 %57
|
|
%101 = bitcast i8 addrspace(4)* %100 to <4 x i32> addrspace(4)*
|
|
%102 = load <4 x i32>, <4 x i32> addrspace(4)* %101, align 16
|
|
%103 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %102, i32 0, i32 0, i32 0, i32 0)
|
|
%104 = add i32 %103, -33
|
|
%105 = or i32 %99, %104
|
|
%106 = getelementptr i8, i8 addrspace(4)* %93, i64 %63
|
|
%107 = bitcast i8 addrspace(4)* %106 to <4 x i32> addrspace(4)*
|
|
%108 = load <4 x i32>, <4 x i32> addrspace(4)* %107, align 16
|
|
%109 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %108, i32 0, i32 0, i32 0, i32 0)
|
|
%110 = add i32 %109, -34
|
|
%111 = or i32 %105, %110
|
|
%112 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
|
|
%113 = sext i32 %112 to i64
|
|
%114 = getelementptr i8, i8 addrspace(4)* %93, i64 %113
|
|
%115 = bitcast i8 addrspace(4)* %114 to <4 x i32> addrspace(4)*
|
|
%116 = load <4 x i32>, <4 x i32> addrspace(4)* %115, align 16
|
|
%117 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %116, i32 0, i32 0, i32 0, i32 0)
|
|
%118 = add i32 %117, -36
|
|
%119 = or i32 %111, %118
|
|
%120 = getelementptr i8, i8 addrspace(4)* %32, i64 %51
|
|
%121 = bitcast i8 addrspace(4)* %120 to <4 x i32> addrspace(4)*
|
|
%122 = load <4 x i32>, <4 x i32> addrspace(4)* %121, align 16
|
|
%123 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %122, i32 0, i32 0, i32 0, i32 0)
|
|
%124 = add i32 %123, -37
|
|
%125 = or i32 %119, %124
|
|
%126 = getelementptr i8, i8 addrspace(4)* %32, i64 %57
|
|
%127 = bitcast i8 addrspace(4)* %126 to <4 x i32> addrspace(4)*
|
|
%128 = load <4 x i32>, <4 x i32> addrspace(4)* %127, align 16
|
|
%129 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %128, i32 0, i32 0, i32 0, i32 0)
|
|
%130 = add i32 %129, -38
|
|
%131 = or i32 %125, %130
|
|
%132 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%133 = add i32 %132, -39
|
|
%134 = or i32 %131, %133
|
|
%135 = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
|
|
%136 = sext i32 %135 to i64
|
|
%137 = getelementptr i8, i8 addrspace(4)* %32, i64 %136
|
|
%138 = bitcast i8 addrspace(4)* %137 to <4 x i32> addrspace(4)*
|
|
%139 = load <4 x i32>, <4 x i32> addrspace(4)* %138, align 16
|
|
%140 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %139, i32 0, i32 0, i32 0, i32 0)
|
|
%141 = add i32 %140, -50
|
|
%142 = or i32 %134, %141
|
|
%143 = getelementptr i8, i8 addrspace(4)* %32, i64 224
|
|
%144 = getelementptr i8, i8 addrspace(4)* %143, i64 %51
|
|
%145 = bitcast i8 addrspace(4)* %144 to <4 x i32> addrspace(4)*
|
|
%146 = load <4 x i32>, <4 x i32> addrspace(4)* %145, align 16
|
|
%147 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %146, i32 0, i32 0, i32 0, i32 0)
|
|
%148 = add i32 %147, -51
|
|
%149 = or i32 %142, %148
|
|
%150 = getelementptr i8, i8 addrspace(4)* %143, i64 %57
|
|
%151 = bitcast i8 addrspace(4)* %150 to <4 x i32> addrspace(4)*
|
|
%152 = load <4 x i32>, <4 x i32> addrspace(4)* %151, align 16
|
|
%153 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %152, i32 0, i32 0, i32 0, i32 0)
|
|
%154 = add i32 %153, -52
|
|
%155 = or i32 %149, %154
|
|
%156 = getelementptr i8, i8 addrspace(4)* %143, i64 %63
|
|
%157 = bitcast i8 addrspace(4)* %156 to <4 x i32> addrspace(4)*
|
|
%158 = load <4 x i32>, <4 x i32> addrspace(4)* %157, align 16
|
|
%159 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %158, i32 0, i32 0, i32 0, i32 0)
|
|
%160 = add i32 %159, -53
|
|
%161 = or i32 %155, %160
|
|
%162 = sext i32 undef to i64
|
|
%163 = getelementptr i8, i8 addrspace(4)* %143, i64 %162
|
|
%164 = bitcast i8 addrspace(4)* %163 to <4 x i32> addrspace(4)*
|
|
%165 = load <4 x i32>, <4 x i32> addrspace(4)* %164, align 16
|
|
%166 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %165, i32 0, i32 0, i32 0, i32 0)
|
|
%167 = add i32 %166, -72
|
|
%168 = or i32 %161, %167
|
|
%169 = getelementptr i8, i8 addrspace(4)* %32, i64 576
|
|
%170 = getelementptr i8, i8 addrspace(4)* %169, i64 %51
|
|
%171 = bitcast i8 addrspace(4)* %170 to <4 x i32> addrspace(4)*
|
|
%172 = load <4 x i32>, <4 x i32> addrspace(4)* %171, align 16
|
|
%173 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %172, i32 0, i32 0, i32 0, i32 0)
|
|
%174 = add i32 %173, -73
|
|
%175 = or i32 %168, %174
|
|
%176 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%177 = add i32 %176, -74
|
|
%178 = or i32 %175, %177
|
|
%179 = getelementptr i8, i8 addrspace(4)* %169, i64 %63
|
|
%180 = bitcast i8 addrspace(4)* %179 to <4 x i32> addrspace(4)*
|
|
%181 = load <4 x i32>, <4 x i32> addrspace(4)* %180, align 16
|
|
%182 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %181, i32 0, i32 0, i32 0, i32 0)
|
|
%183 = add i32 %182, -75
|
|
%184 = or i32 %178, %183
|
|
%185 = getelementptr i8, i8 addrspace(4)* %169, i64 %113
|
|
%186 = bitcast i8 addrspace(4)* %185 to <4 x i32> addrspace(4)*
|
|
%187 = load <4 x i32>, <4 x i32> addrspace(4)* %186, align 16
|
|
%188 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %187, i32 0, i32 0, i32 0, i32 0)
|
|
%189 = add i32 %188, -77
|
|
%190 = or i32 %184, %189
|
|
%191 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%192 = add i32 %191, -93
|
|
%193 = or i32 %190, %192
|
|
%194 = inttoptr i64 %29 to i8 addrspace(4)*
|
|
%195 = getelementptr i8, i8 addrspace(4)* %194, i64 %51
|
|
%196 = bitcast i8 addrspace(4)* %195 to <4 x i32> addrspace(4)*
|
|
%197 = load <4 x i32>, <4 x i32> addrspace(4)* %196, align 16
|
|
%198 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %197, i32 0, i32 0, i32 0, i32 0)
|
|
%199 = add i32 %198, -94
|
|
%200 = or i32 %193, %199
|
|
%201 = load <4 x i32>, <4 x i32> addrspace(4)* undef, align 16
|
|
%202 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %201, i32 0, i32 0, i32 0)
|
|
%203 = add i32 %202, -95
|
|
%204 = or i32 %200, %203
|
|
%205 = getelementptr i8, i8 addrspace(4)* %27, i64 %80
|
|
%206 = bitcast i8 addrspace(4)* %205 to <4 x i32> addrspace(4)*
|
|
%207 = load <4 x i32>, <4 x i32> addrspace(4)* %206, align 16
|
|
%208 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %207, i32 0, i32 0, i32 0)
|
|
%209 = add i32 %208, -96
|
|
%210 = or i32 %204, %209
|
|
%211 = getelementptr i8, i8 addrspace(4)* %27, i64 %86
|
|
%212 = bitcast i8 addrspace(4)* %211 to <4 x i32> addrspace(4)*
|
|
%213 = load <4 x i32>, <4 x i32> addrspace(4)* %212, align 16
|
|
%214 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %213, i32 0, i32 0, i32 0)
|
|
%215 = add i32 %214, -97
|
|
%216 = or i32 %210, %215
|
|
%217 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, <{ [4 x i32], [6 x %llpc.array.element] }> addrspace(6)* null, i32 0, i32 1, i32 %0, i32 0
|
|
%218 = ptrtoint i32 addrspace(6)* %217 to i32
|
|
%219 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %45, i32 %218, i32 0)
|
|
%220 = add i32 %219, -98
|
|
%221 = or i32 %216, %220
|
|
%222 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %45, i32 undef, i32 0)
|
|
%223 = add i32 %222, -114
|
|
%224 = or i32 %221, %223
|
|
%225 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, <{ [4 x i32], [6 x %llpc.array.element] }> addrspace(6)* null, i32 0, i32 1, i32 %2, i32 0
|
|
%226 = ptrtoint i32 addrspace(6)* %225 to i32
|
|
%227 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %45, i32 %226, i32 0)
|
|
%228 = add i32 %227, -130
|
|
%229 = or i32 %224, %228
|
|
%230 = getelementptr <{ [4 x i32], [6 x %llpc.array.element] }>, <{ [4 x i32], [6 x %llpc.array.element] }> addrspace(6)* null, i32 0, i32 1, i32 undef, i32 0
|
|
%231 = ptrtoint i32 addrspace(6)* %230 to i32
|
|
%232 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %45, i32 %231, i32 0)
|
|
%233 = add i32 %232, -178
|
|
%234 = or i32 %229, %233
|
|
%235 = inttoptr i64 %24 to i8 addrspace(4)*
|
|
%236 = getelementptr i8, i8 addrspace(4)* %235, i64 %51
|
|
%237 = bitcast i8 addrspace(4)* %236 to <4 x i32> addrspace(4)*
|
|
%238 = load <4 x i32>, <4 x i32> addrspace(4)* %237, align 16
|
|
%239 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %238, i32 0, i32 0, i32 0, i32 0)
|
|
%240 = add i32 %239, -194
|
|
%241 = or i32 %234, %240
|
|
%242 = inttoptr i64 %22 to i8 addrspace(4)*
|
|
%243 = getelementptr i8, i8 addrspace(4)* %242, i64 %51
|
|
%244 = bitcast i8 addrspace(4)* %243 to <4 x i32> addrspace(4)*
|
|
%245 = load <4 x i32>, <4 x i32> addrspace(4)* %244, align 16
|
|
%246 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %245, i32 0, i32 0, i32 0, i32 0)
|
|
%247 = add i32 %246, -195
|
|
%248 = or i32 %241, %247
|
|
%249 = getelementptr i8, i8 addrspace(4)* %242, i64 %57
|
|
%250 = bitcast i8 addrspace(4)* %249 to <4 x i32> addrspace(4)*
|
|
%251 = load <4 x i32>, <4 x i32> addrspace(4)* %250, align 16
|
|
%252 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %251, i32 0, i32 0, i32 0, i32 0)
|
|
%253 = add i32 %252, -196
|
|
%254 = or i32 %248, %253
|
|
%255 = getelementptr i8, i8 addrspace(4)* %242, i64 %63
|
|
%256 = bitcast i8 addrspace(4)* %255 to <4 x i32> addrspace(4)*
|
|
%257 = load <4 x i32>, <4 x i32> addrspace(4)* %256, align 16
|
|
%258 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %257, i32 0, i32 0, i32 0, i32 0)
|
|
%259 = add i32 %258, -197
|
|
%260 = or i32 %254, %259
|
|
%261 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%262 = add i32 %261, -216
|
|
%263 = or i32 %260, %262
|
|
%264 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, <{ [4 x i32], [6 x %llpc.array.element.2] }> addrspace(6)* null, i32 0, i32 1, i32 %0, i32 0
|
|
%265 = ptrtoint i32 addrspace(6)* %264 to i32
|
|
%266 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %265, i32 0)
|
|
%267 = add i32 %266, -217
|
|
%268 = or i32 %263, %267
|
|
%269 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%270 = add i32 %269, -233
|
|
%271 = or i32 %268, %270
|
|
%272 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, <{ [4 x i32], [6 x %llpc.array.element.2] }> addrspace(6)* null, i32 0, i32 1, i32 %2, i32 0
|
|
%273 = ptrtoint i32 addrspace(6)* %272 to i32
|
|
%274 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %273, i32 0)
|
|
%275 = add i32 %274, -249
|
|
%276 = or i32 %271, %275
|
|
%277 = getelementptr <{ [4 x i32], [6 x %llpc.array.element.2] }>, <{ [4 x i32], [6 x %llpc.array.element.2] }> addrspace(6)* null, i32 0, i32 1, i32 undef, i32 0
|
|
%278 = ptrtoint i32 addrspace(6)* %277 to i32
|
|
%279 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %278, i32 0)
|
|
%280 = add i32 %279, -297
|
|
%281 = or i32 %276, %280
|
|
%282 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%283 = add i32 %282, -313
|
|
%284 = or i32 %281, %283
|
|
%285 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%286 = add i32 %285, -329
|
|
%287 = or i32 %284, %286
|
|
%288 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%289 = add i32 %288, -345
|
|
%290 = or i32 %287, %289
|
|
%291 = getelementptr <{ [4 x i32], [9 x %llpc.array.element.5] }>, <{ [4 x i32], [9 x %llpc.array.element.5] }> addrspace(6)* null, i32 0, i32 1, i32 %4, i32 0
|
|
%292 = ptrtoint i32 addrspace(6)* %291 to i32
|
|
%293 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %292, i32 0)
|
|
%294 = add i32 %293, -441
|
|
%295 = or i32 %290, %294
|
|
%296 = getelementptr i8, i8 addrspace(4)* %20, i64 160
|
|
%297 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%298 = add i32 %297, -457
|
|
%299 = or i32 %295, %298
|
|
%300 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%301 = add i32 %300, -458
|
|
%302 = or i32 %299, %301
|
|
%303 = getelementptr i8, i8 addrspace(4)* %296, i64 %63
|
|
%304 = bitcast i8 addrspace(4)* %303 to <4 x i32> addrspace(4)*
|
|
%305 = load <4 x i32>, <4 x i32> addrspace(4)* %304, align 16
|
|
%306 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %305, i32 0, i32 0, i32 0, i32 0)
|
|
%307 = add i32 %306, -459
|
|
%308 = or i32 %302, %307
|
|
%309 = shl i32 %5, 4
|
|
%310 = call i32 @llvm.amdgcn.readfirstlane(i32 %309)
|
|
%311 = sext i32 %310 to i64
|
|
%312 = getelementptr i8, i8 addrspace(4)* %296, i64 %311
|
|
%313 = bitcast i8 addrspace(4)* %312 to <4 x i32> addrspace(4)*
|
|
%314 = load <4 x i32>, <4 x i32> addrspace(4)* %313, align 16
|
|
%315 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %314, i32 0, i32 0, i32 0, i32 0)
|
|
%316 = add i32 %315, -466
|
|
%317 = or i32 %308, %316
|
|
%318 = getelementptr i8, i8 addrspace(4)* %38, i64 168
|
|
%319 = shl i32 %73, 3
|
|
%320 = sext i32 %319 to i64
|
|
%321 = getelementptr i8, i8 addrspace(4)* %318, i64 %320
|
|
%.i085.i = bitcast i8 addrspace(4)* %321 to i32 addrspace(4)*
|
|
%.ii0.i = load i32, i32 addrspace(4)* %.i085.i, align 8
|
|
%322 = and i32 undef, 65535
|
|
%323 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii0.i, i32 0
|
|
%324 = insertelement <4 x i32> %323, i32 %322, i32 1
|
|
%325 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %324, i32 0, i32 0)
|
|
%326 = add i32 %325, -467
|
|
%327 = or i32 %317, %326
|
|
%328 = shl i32 %78, 3
|
|
%329 = sext i32 %328 to i64
|
|
%330 = getelementptr i8, i8 addrspace(4)* %318, i64 %329
|
|
%.i088.i = bitcast i8 addrspace(4)* %330 to i32 addrspace(4)*
|
|
%.ii090.i = load i32, i32 addrspace(4)* %.i088.i, align 8
|
|
%.i191.i = getelementptr i8, i8 addrspace(4)* %330, i64 4
|
|
%331 = bitcast i8 addrspace(4)* %.i191.i to i32 addrspace(4)*
|
|
%.ii192.i = load i32, i32 addrspace(4)* %331, align 4
|
|
%332 = and i32 %.ii192.i, 65535
|
|
%333 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii090.i, i32 0
|
|
%334 = insertelement <4 x i32> %333, i32 %332, i32 1
|
|
%335 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %334, i32 0, i32 0)
|
|
%336 = add i32 %335, -468
|
|
%337 = or i32 %327, %336
|
|
%338 = shl i32 %84, 3
|
|
%339 = sext i32 %338 to i64
|
|
%340 = getelementptr i8, i8 addrspace(4)* %318, i64 %339
|
|
%.i094.i = bitcast i8 addrspace(4)* %340 to i32 addrspace(4)*
|
|
%.ii096.i = load i32, i32 addrspace(4)* %.i094.i, align 8
|
|
%.i197.i = getelementptr i8, i8 addrspace(4)* %340, i64 4
|
|
%341 = bitcast i8 addrspace(4)* %.i197.i to i32 addrspace(4)*
|
|
%.ii198.i = load i32, i32 addrspace(4)* %341, align 4
|
|
%342 = and i32 %.ii198.i, 65535
|
|
%343 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii096.i, i32 0
|
|
%344 = insertelement <4 x i32> %343, i32 %342, i32 1
|
|
%345 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %344, i32 0, i32 0)
|
|
%346 = add i32 %345, -469
|
|
%347 = or i32 %337, %346
|
|
%348 = call i32 @llvm.amdgcn.readfirstlane(i32 %3)
|
|
%349 = shl i32 %348, 3
|
|
%350 = sext i32 %349 to i64
|
|
%351 = getelementptr i8, i8 addrspace(4)* %318, i64 %350
|
|
%.i0100.i = bitcast i8 addrspace(4)* %351 to i32 addrspace(4)*
|
|
%.ii0102.i = load i32, i32 addrspace(4)* %.i0100.i, align 8
|
|
%.ii1104.i = load i32, i32 addrspace(4)* undef, align 4
|
|
%352 = and i32 %.ii1104.i, 65535
|
|
%353 = insertelement <4 x i32> <i32 undef, i32 undef, i32 -1, i32 553734060>, i32 %.ii0102.i, i32 0
|
|
%354 = insertelement <4 x i32> %353, i32 %352, i32 1
|
|
%355 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %354, i32 0, i32 0)
|
|
%356 = add i32 %355, -473
|
|
%357 = or i32 %347, %356
|
|
%358 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 0, i32 0)
|
|
%359 = add i32 %358, -474
|
|
%360 = or i32 %357, %359
|
|
%361 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%362 = add i32 %361, -475
|
|
%363 = or i32 %360, %362
|
|
%364 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%365 = add i32 %364, -491
|
|
%366 = or i32 %363, %365
|
|
%367 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%368 = add i32 %367, -507
|
|
%369 = or i32 %366, %368
|
|
%370 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 undef, i32 0)
|
|
%371 = add i32 %370, -539
|
|
%372 = or i32 %369, %371
|
|
%373 = getelementptr i8, i8 addrspace(4)* %17, i64 96
|
|
%374 = getelementptr i8, i8 addrspace(4)* %373, i64 %51
|
|
%375 = bitcast i8 addrspace(4)* %374 to <4 x i32> addrspace(4)*
|
|
%376 = load <4 x i32>, <4 x i32> addrspace(4)* %375, align 16
|
|
%377 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %376, i32 0, i32 0, i32 0, i32 0)
|
|
%378 = add i32 %377, -555
|
|
%379 = or i32 %372, %378
|
|
%380 = getelementptr i8, i8 addrspace(4)* %373, i64 %57
|
|
%381 = bitcast i8 addrspace(4)* %380 to <4 x i32> addrspace(4)*
|
|
%382 = load <4 x i32>, <4 x i32> addrspace(4)* %381, align 16
|
|
%383 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %382, i32 0, i32 0, i32 0, i32 0)
|
|
%384 = add i32 %383, -556
|
|
%385 = or i32 %379, %384
|
|
%386 = getelementptr i8, i8 addrspace(4)* %373, i64 %63
|
|
%387 = bitcast i8 addrspace(4)* %386 to <4 x i32> addrspace(4)*
|
|
%388 = load <4 x i32>, <4 x i32> addrspace(4)* %387, align 16
|
|
%389 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %388, i32 0, i32 0, i32 0, i32 0)
|
|
%390 = add i32 %389, -557
|
|
%391 = or i32 %385, %390
|
|
%392 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%393 = add i32 %392, -574
|
|
%394 = or i32 %391, %393
|
|
%395 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%396 = add i32 %395, -575
|
|
%397 = or i32 %394, %396
|
|
%398 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%399 = add i32 %398, -576
|
|
%400 = or i32 %397, %399
|
|
%401 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%402 = add i32 %401, -577
|
|
%403 = or i32 %400, %402
|
|
%404 = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> undef, i32 0, i32 0, i32 0, i32 0)
|
|
%405 = add i32 %404, -593
|
|
%406 = or i32 %403, %405
|
|
%407 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %43, i32 0, i32 0)
|
|
%408 = add i32 %407, -594
|
|
%409 = or i32 %406, %408
|
|
%.not.i = icmp eq i32 %409, 0
|
|
%410 = load <8 x i32>, <8 x i32> addrspace(4)* undef, align 32
|
|
%.i010.i = select i1 %.not.i, float 0x36A0000000000000, float 0.000000e+00
|
|
%411 = insertelement <4 x float> undef, float %.i010.i, i32 3
|
|
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %411, i32 15, i32 undef, i32 undef, <8 x i32> %410, i32 0, i32 0)
|
|
ret void
|
|
}
|
|
|
|
declare i32 @llvm.amdgcn.readfirstlane(i32)
|
|
declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
|
|
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)
|
|
declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32 immarg)
|
|
declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg)
|
|
declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
|
|
declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
|